summaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs/xfs_btree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/libxfs/xfs_btree.c')
-rw-r--r--fs/xfs/libxfs/xfs_btree.c2613
1 files changed, 1659 insertions, 954 deletions
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index bbdae2b4559f..dbe9df8c3300 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -11,94 +11,152 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_buf_item.h"
#include "xfs_btree.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_alloc.h"
#include "xfs_log.h"
-
-/*
- * Cursor allocation zone.
- */
-kmem_zone_t *xfs_btree_cur_zone;
+#include "xfs_btree_staging.h"
+#include "xfs_ag.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_health.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_rmap.h"
+#include "xfs_quota.h"
+#include "xfs_metafile.h"
+#include "xfs_rtrefcount_btree.h"
/*
* Btree magic numbers.
*/
-static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
- { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
- XFS_FIBT_MAGIC, 0 },
- { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
- XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
- XFS_REFC_CRC_MAGIC }
-};
-
uint32_t
xfs_btree_magic(
- int crc,
- xfs_btnum_t btnum)
+ struct xfs_mount *mp,
+ const struct xfs_btree_ops *ops)
{
- uint32_t magic = xfs_magics[crc][btnum];
+ int idx = xfs_has_crc(mp) ? 1 : 0;
+ __be32 magic = ops->buf_ops->magic[idx];
/* Ensure we asked for crc for crc-only magics. */
ASSERT(magic != 0);
- return magic;
+ return be32_to_cpu(magic);
}
/*
- * Check a long btree block header. Return the address of the failing check,
- * or NULL if everything is ok.
+ * These sibling pointer checks are optimised for null sibling pointers. This
+ * happens a lot, and we don't need to byte swap at runtime if the sibling
+ * pointer is NULL.
+ *
+ * These are explicitly marked at inline because the cost of calling them as
+ * functions instead of inlining them is about 36 bytes extra code per call site
+ * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these
+ * two sibling check functions reduces the compiled code size by over 300
+ * bytes.
*/
-xfs_failaddr_t
-__xfs_btree_check_lblock(
+static inline xfs_failaddr_t
+xfs_btree_check_fsblock_siblings(
+ struct xfs_mount *mp,
+ xfs_fsblock_t fsb,
+ __be64 dsibling)
+{
+ xfs_fsblock_t sibling;
+
+ if (dsibling == cpu_to_be64(NULLFSBLOCK))
+ return NULL;
+
+ sibling = be64_to_cpu(dsibling);
+ if (sibling == fsb)
+ return __this_address;
+ if (!xfs_verify_fsbno(mp, sibling))
+ return __this_address;
+ return NULL;
+}
+
+static inline xfs_failaddr_t
+xfs_btree_check_memblock_siblings(
+ struct xfs_buftarg *btp,
+ xfbno_t bno,
+ __be64 dsibling)
+{
+ xfbno_t sibling;
+
+ if (dsibling == cpu_to_be64(NULLFSBLOCK))
+ return NULL;
+
+ sibling = be64_to_cpu(dsibling);
+ if (sibling == bno)
+ return __this_address;
+ if (!xmbuf_verify_daddr(btp, xfbno_to_daddr(sibling)))
+ return __this_address;
+ return NULL;
+}
+
+static inline xfs_failaddr_t
+xfs_btree_check_agblock_siblings(
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno,
+ __be32 dsibling)
+{
+ xfs_agblock_t sibling;
+
+ if (dsibling == cpu_to_be32(NULLAGBLOCK))
+ return NULL;
+
+ sibling = be32_to_cpu(dsibling);
+ if (sibling == agbno)
+ return __this_address;
+ if (!xfs_verify_agbno(pag, sibling))
+ return __this_address;
+ return NULL;
+}
+
+static xfs_failaddr_t
+__xfs_btree_check_lblock_hdr(
struct xfs_btree_cur *cur,
struct xfs_btree_block *block,
int level,
struct xfs_buf *bp)
{
struct xfs_mount *mp = cur->bc_mp;
- xfs_btnum_t btnum = cur->bc_btnum;
- int crc = xfs_sb_version_hascrc(&mp->m_sb);
- if (crc) {
+ if (xfs_has_crc(mp)) {
if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
return __this_address;
if (block->bb_u.l.bb_blkno !=
- cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL))
+ cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
return __this_address;
if (block->bb_u.l.bb_pad != cpu_to_be32(0))
return __this_address;
}
- if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
+ if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
return __this_address;
if (be16_to_cpu(block->bb_level) != level)
return __this_address;
if (be16_to_cpu(block->bb_numrecs) >
cur->bc_ops->get_maxrecs(cur, level))
return __this_address;
- if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
- !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib),
- level + 1))
- return __this_address;
- if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
- !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib),
- level + 1))
- return __this_address;
return NULL;
}
-/* Check a long btree block header. */
-static int
-xfs_btree_check_lblock(
+/*
+ * Check a long btree block header. Return the address of the failing check,
+ * or NULL if everything is ok.
+ */
+static xfs_failaddr_t
+__xfs_btree_check_fsblock(
struct xfs_btree_cur *cur,
struct xfs_btree_block *block,
int level,
@@ -106,80 +164,131 @@ xfs_btree_check_lblock(
{
struct xfs_mount *mp = cur->bc_mp;
xfs_failaddr_t fa;
+ xfs_fsblock_t fsb;
- fa = __xfs_btree_check_lblock(cur, block, level, bp);
- if (unlikely(XFS_TEST_ERROR(fa != NULL, mp,
- XFS_ERRTAG_BTREE_CHECK_LBLOCK))) {
- if (bp)
- trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- return -EFSCORRUPTED;
+ fa = __xfs_btree_check_lblock_hdr(cur, block, level, bp);
+ if (fa)
+ return fa;
+
+ /*
+ * For inode-rooted btrees, the root block sits in the inode fork. In
+ * that case bp is NULL, and the block must not have any siblings.
+ */
+ if (!bp) {
+ if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK))
+ return __this_address;
+ if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK))
+ return __this_address;
+ return NULL;
}
- return 0;
+
+ fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
+ fa = xfs_btree_check_fsblock_siblings(mp, fsb,
+ block->bb_u.l.bb_leftsib);
+ if (!fa)
+ fa = xfs_btree_check_fsblock_siblings(mp, fsb,
+ block->bb_u.l.bb_rightsib);
+ return fa;
+}
+
+/*
+ * Check an in-memory btree block header. Return the address of the failing
+ * check, or NULL if everything is ok.
+ */
+static xfs_failaddr_t
+__xfs_btree_check_memblock(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ int level,
+ struct xfs_buf *bp)
+{
+ struct xfs_buftarg *btp = cur->bc_mem.xfbtree->target;
+ xfs_failaddr_t fa;
+ xfbno_t bno;
+
+ fa = __xfs_btree_check_lblock_hdr(cur, block, level, bp);
+ if (fa)
+ return fa;
+
+ bno = xfs_daddr_to_xfbno(xfs_buf_daddr(bp));
+ fa = xfs_btree_check_memblock_siblings(btp, bno,
+ block->bb_u.l.bb_leftsib);
+ if (!fa)
+ fa = xfs_btree_check_memblock_siblings(btp, bno,
+ block->bb_u.l.bb_rightsib);
+ return fa;
}
/*
* Check a short btree block header. Return the address of the failing check,
* or NULL if everything is ok.
*/
-xfs_failaddr_t
-__xfs_btree_check_sblock(
+static xfs_failaddr_t
+__xfs_btree_check_agblock(
struct xfs_btree_cur *cur,
struct xfs_btree_block *block,
int level,
struct xfs_buf *bp)
{
struct xfs_mount *mp = cur->bc_mp;
- xfs_btnum_t btnum = cur->bc_btnum;
- int crc = xfs_sb_version_hascrc(&mp->m_sb);
+ struct xfs_perag *pag = to_perag(cur->bc_group);
+ xfs_failaddr_t fa;
+ xfs_agblock_t agbno;
- if (crc) {
+ if (xfs_has_crc(mp)) {
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
return __this_address;
- if (block->bb_u.s.bb_blkno !=
- cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL))
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
return __this_address;
}
- if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
+ if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
return __this_address;
if (be16_to_cpu(block->bb_level) != level)
return __this_address;
if (be16_to_cpu(block->bb_numrecs) >
cur->bc_ops->get_maxrecs(cur, level))
return __this_address;
- if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
- !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib),
- level + 1))
- return __this_address;
- if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
- !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib),
- level + 1))
- return __this_address;
- return NULL;
+ agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
+ fa = xfs_btree_check_agblock_siblings(pag, agbno,
+ block->bb_u.s.bb_leftsib);
+ if (!fa)
+ fa = xfs_btree_check_agblock_siblings(pag, agbno,
+ block->bb_u.s.bb_rightsib);
+ return fa;
}
-/* Check a short btree block header. */
-STATIC int
-xfs_btree_check_sblock(
+/*
+ * Internal btree block check.
+ *
+ * Return NULL if the block is ok or the address of the failed check otherwise.
+ */
+xfs_failaddr_t
+__xfs_btree_check_block(
struct xfs_btree_cur *cur,
struct xfs_btree_block *block,
int level,
struct xfs_buf *bp)
{
- struct xfs_mount *mp = cur->bc_mp;
- xfs_failaddr_t fa;
-
- fa = __xfs_btree_check_sblock(cur, block, level, bp);
- if (unlikely(XFS_TEST_ERROR(fa != NULL, mp,
- XFS_ERRTAG_BTREE_CHECK_SBLOCK))) {
- if (bp)
- trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- return -EFSCORRUPTED;
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_MEM:
+ return __xfs_btree_check_memblock(cur, block, level, bp);
+ case XFS_BTREE_TYPE_AG:
+ return __xfs_btree_check_agblock(cur, block, level, bp);
+ case XFS_BTREE_TYPE_INODE:
+ return __xfs_btree_check_fsblock(cur, block, level, bp);
+ default:
+ ASSERT(0);
+ return __this_address;
}
- return 0;
+}
+
+static inline unsigned int xfs_btree_block_errtag(struct xfs_btree_cur *cur)
+{
+ if (cur->bc_ops->ptr_len == XFS_BTREE_SHORT_PTR_LEN)
+ return XFS_ERRTAG_BTREE_CHECK_SBLOCK;
+ return XFS_ERRTAG_BTREE_CHECK_LBLOCK;
}
/*
@@ -192,34 +301,49 @@ xfs_btree_check_block(
int level, /* level of the btree block */
struct xfs_buf *bp) /* buffer containing block, if any */
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return xfs_btree_check_lblock(cur, block, level, bp);
- else
- return xfs_btree_check_sblock(cur, block, level, bp);
-}
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_failaddr_t fa;
-/* Check that this long pointer is valid and points within the fs. */
-bool
-xfs_btree_check_lptr(
- struct xfs_btree_cur *cur,
- xfs_fsblock_t fsbno,
- int level)
-{
- if (level <= 0)
- return false;
- return xfs_verify_fsbno(cur->bc_mp, fsbno);
+ fa = __xfs_btree_check_block(cur, block, level, bp);
+ if (XFS_IS_CORRUPT(mp, fa != NULL) ||
+ XFS_TEST_ERROR(mp, xfs_btree_block_errtag(cur))) {
+ if (bp)
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_btree_mark_sick(cur);
+ return -EFSCORRUPTED;
+ }
+ return 0;
}
-/* Check that this short pointer is valid and points within the AG. */
-bool
-xfs_btree_check_sptr(
- struct xfs_btree_cur *cur,
- xfs_agblock_t agbno,
- int level)
+int
+__xfs_btree_check_ptr(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *ptr,
+ int index,
+ int level)
{
if (level <= 0)
- return false;
- return xfs_verify_agbno(cur->bc_mp, cur->bc_private.a.agno, agbno);
+ return -EFSCORRUPTED;
+
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_MEM:
+ if (!xfbtree_verify_bno(cur->bc_mem.xfbtree,
+ be64_to_cpu((&ptr->l)[index])))
+ return -EFSCORRUPTED;
+ break;
+ case XFS_BTREE_TYPE_INODE:
+ if (!xfs_verify_fsbno(cur->bc_mp,
+ be64_to_cpu((&ptr->l)[index])))
+ return -EFSCORRUPTED;
+ break;
+ case XFS_BTREE_TYPE_AG:
+ if (!xfs_verify_agbno(to_perag(cur->bc_group),
+ be32_to_cpu((&ptr->s)[index])))
+ return -EFSCORRUPTED;
+ break;
+ }
+
+ return 0;
}
/*
@@ -228,31 +352,40 @@ xfs_btree_check_sptr(
*/
static int
xfs_btree_check_ptr(
- struct xfs_btree_cur *cur,
- union xfs_btree_ptr *ptr,
- int index,
- int level)
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *ptr,
+ int index,
+ int level)
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]),
- level))
- return 0;
- xfs_err(cur->bc_mp,
-"Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.",
- cur->bc_private.b.ip->i_ino,
- cur->bc_private.b.whichfork, cur->bc_btnum,
+ int error;
+
+ error = __xfs_btree_check_ptr(cur, ptr, index, level);
+ if (error) {
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_MEM:
+ xfs_err(cur->bc_mp,
+"In-memory: Corrupt %sbt flags 0x%x pointer at level %d index %d fa %pS.",
+ cur->bc_ops->name, cur->bc_flags, level, index,
+ __this_address);
+ break;
+ case XFS_BTREE_TYPE_INODE:
+ xfs_err(cur->bc_mp,
+"Inode %llu fork %d: Corrupt %sbt pointer at level %d index %d.",
+ cur->bc_ino.ip->i_ino,
+ cur->bc_ino.whichfork, cur->bc_ops->name,
level, index);
- } else {
- if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]),
- level))
- return 0;
- xfs_err(cur->bc_mp,
-"AG %u: Corrupt btree %d pointer at level %d index %d.",
- cur->bc_private.a.agno, cur->bc_btnum,
+ break;
+ case XFS_BTREE_TYPE_AG:
+ xfs_err(cur->bc_mp,
+"AG %u: Corrupt %sbt pointer at level %d index %d.",
+ cur->bc_group->xg_gno, cur->bc_ops->name,
level, index);
+ break;
+ }
+ xfs_btree_mark_sick(cur);
}
- return -EFSCORRUPTED;
+ return error;
}
#ifdef DEBUG
@@ -270,13 +403,13 @@ xfs_btree_check_ptr(
* it to disk.
*/
void
-xfs_btree_lblock_calc_crc(
+xfs_btree_fsblock_calc_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_buf_log_item *bip = bp->b_log_item;
- if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ if (!xfs_has_crc(bp->b_mount))
return;
if (bip)
block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -284,13 +417,13 @@ xfs_btree_lblock_calc_crc(
}
bool
-xfs_btree_lblock_verify_crc(
+xfs_btree_fsblock_verify_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (xfs_has_crc(mp)) {
if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
return false;
return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
@@ -308,13 +441,13 @@ xfs_btree_lblock_verify_crc(
* it to disk.
*/
void
-xfs_btree_sblock_calc_crc(
+xfs_btree_agblock_calc_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_buf_log_item *bip = bp->b_log_item;
- if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ if (!xfs_has_crc(bp->b_mount))
return;
if (bip)
block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -322,13 +455,13 @@ xfs_btree_sblock_calc_crc(
}
bool
-xfs_btree_sblock_verify_crc(
+xfs_btree_agblock_verify_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (xfs_has_crc(mp)) {
if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
return false;
return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
@@ -344,6 +477,17 @@ xfs_btree_free_block(
{
int error;
+ trace_xfs_btree_free_block(cur, bp);
+
+ /*
+ * Don't allow block freeing for a staging cursor, because staging
+ * cursors do not support regular btree modifications.
+ */
+ if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
error = cur->bc_ops->free_block(cur, bp);
if (!error) {
xfs_trans_binval(cur->bc_tp, bp);
@@ -357,57 +501,83 @@ xfs_btree_free_block(
*/
void
xfs_btree_del_cursor(
- xfs_btree_cur_t *cur, /* btree cursor */
- int error) /* del because of error */
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int error) /* del because of error */
{
- int i; /* btree level */
+ int i; /* btree level */
/*
- * Clear the buffer pointers, and release the buffers.
- * If we're doing this in the face of an error, we
- * need to make sure to inspect all of the entries
- * in the bc_bufs array for buffers to be unlocked.
- * This is because some of the btree code works from
- * level n down to 0, and if we get an error along
- * the way we won't have initialized all the entries
- * down to 0.
+ * Clear the buffer pointers and release the buffers. If we're doing
+ * this because of an error, inspect all of the entries in the bc_bufs
+ * array for buffers to be unlocked. This is because some of the btree
+ * code works from level n down to 0, and if we get an error along the
+ * way we won't have initialized all the entries down to 0.
*/
for (i = 0; i < cur->bc_nlevels; i++) {
- if (cur->bc_bufs[i])
- xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
+ if (cur->bc_levels[i].bp)
+ xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp);
else if (!error)
break;
}
+
/*
- * Can't free a bmap cursor without having dealt with the
- * allocated indirect blocks' accounting.
- */
- ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
- cur->bc_private.b.allocated == 0);
- /*
- * Free the cursor.
+ * If we are doing a BMBT update, the number of unaccounted blocks
+ * allocated during this cursor life time should be zero. If it's not
+ * zero, then we should be shut down or on our way to shutdown due to
+ * cancelling a dirty transaction on error.
*/
- kmem_zone_free(xfs_btree_cur_zone, cur);
+ ASSERT(!xfs_btree_is_bmap(cur->bc_ops) || cur->bc_bmap.allocated == 0 ||
+ xfs_is_shutdown(cur->bc_mp) || error != 0);
+
+ if (cur->bc_group)
+ xfs_group_put(cur->bc_group);
+ kmem_cache_free(cur->bc_cache, cur);
+}
+
+/* Return the buffer target for this btree's buffer. */
+static inline struct xfs_buftarg *
+xfs_btree_buftarg(
+ struct xfs_btree_cur *cur)
+{
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_MEM)
+ return cur->bc_mem.xfbtree->target;
+ return cur->bc_mp->m_ddev_targp;
+}
+
+/* Return the block size (in units of 512b sectors) for this btree. */
+static inline unsigned int
+xfs_btree_bbsize(
+ struct xfs_btree_cur *cur)
+{
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_MEM)
+ return XFBNO_BBSIZE;
+ return cur->bc_mp->m_bsize;
}
/*
* Duplicate the btree cursor.
* Allocate a new one, copy the record, re-get the buffers.
*/
-int /* error */
+int /* error */
xfs_btree_dup_cursor(
- xfs_btree_cur_t *cur, /* input cursor */
- xfs_btree_cur_t **ncur) /* output cursor */
+ struct xfs_btree_cur *cur, /* input cursor */
+ struct xfs_btree_cur **ncur) /* output cursor */
{
- xfs_buf_t *bp; /* btree block's buffer pointer */
- int error; /* error return value */
- int i; /* level number of btree block */
- xfs_mount_t *mp; /* mount structure for filesystem */
- xfs_btree_cur_t *new; /* new cursor value */
- xfs_trans_t *tp; /* transaction pointer, can be NULL */
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_trans *tp = cur->bc_tp;
+ struct xfs_buf *bp;
+ struct xfs_btree_cur *new;
+ int error;
+ int i;
- tp = cur->bc_tp;
- mp = cur->bc_mp;
+ /*
+ * Don't allow staging cursors to be duplicated because they're supposed
+ * to be kept private to a single thread.
+ */
+ if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
/*
* Allocate a new cursor like the old one.
@@ -423,21 +593,24 @@ xfs_btree_dup_cursor(
* For each level current, re-get the buffer and copy the ptr value.
*/
for (i = 0; i < new->bc_nlevels; i++) {
- new->bc_ptrs[i] = cur->bc_ptrs[i];
- new->bc_ra[i] = cur->bc_ra[i];
- bp = cur->bc_bufs[i];
+ new->bc_levels[i].ptr = cur->bc_levels[i].ptr;
+ new->bc_levels[i].ra = cur->bc_levels[i].ra;
+ bp = cur->bc_levels[i].bp;
if (bp) {
- error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
- XFS_BUF_ADDR(bp), mp->m_bsize,
- 0, &bp,
- cur->bc_ops->buf_ops);
+ error = xfs_trans_read_buf(mp, tp,
+ xfs_btree_buftarg(cur),
+ xfs_buf_daddr(bp),
+ xfs_btree_bbsize(cur), 0, &bp,
+ cur->bc_ops->buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_btree_mark_sick(new);
if (error) {
xfs_btree_del_cursor(new, error);
*ncur = NULL;
return error;
}
}
- new->bc_bufs[i] = bp;
+ new->bc_levels[i].bp = bp;
}
*ncur = new;
return 0;
@@ -472,7 +645,7 @@ xfs_btree_dup_cursor(
* record, key or pointer (xfs_btree_*_addr). Note that all addressing
* inside the btree block is done using indices starting at one, not zero!
*
- * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
+ * If XFS_BTGEO_OVERLAPPING is set, then this btree supports keys containing
* overlapping intervals. In such a tree, records are still sorted lowest to
* highest and indexed by the smallest key value that refers to the record.
* However, nodes are different: each pointer has two associated keys -- one
@@ -522,26 +695,17 @@ xfs_btree_dup_cursor(
*/
static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
+ if (xfs_has_crc(cur->bc_mp))
return XFS_BTREE_LBLOCK_CRC_LEN;
return XFS_BTREE_LBLOCK_LEN;
}
- if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
+ if (xfs_has_crc(cur->bc_mp))
return XFS_BTREE_SBLOCK_CRC_LEN;
return XFS_BTREE_SBLOCK_LEN;
}
/*
- * Return size of btree block pointers for this btree instance.
- */
-static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
-{
- return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
- sizeof(__be64) : sizeof(__be32);
-}
-
-/*
* Calculate offset of the n-th record in a btree block.
*/
STATIC size_t
@@ -588,7 +752,7 @@ xfs_btree_ptr_offset(
{
return xfs_btree_block_len(cur) +
cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
- (n - 1) * xfs_btree_ptr_len(cur);
+ (n - 1) * cur->bc_ops->ptr_len;
}
/*
@@ -647,6 +811,17 @@ xfs_btree_ptr_addr(
((char *)block + xfs_btree_ptr_offset(cur, n, level));
}
+struct xfs_ifork *
+xfs_btree_ifork_ptr(
+ struct xfs_btree_cur *cur)
+{
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE);
+
+ if (cur->bc_flags & XFS_BTREE_STAGING)
+ return cur->bc_ino.ifake->if_fork;
+ return xfs_ifork_ptr(cur->bc_ino.ip, cur->bc_ino.whichfork);
+}
+
/*
* Get the root block which is stored in the inode.
*
@@ -657,9 +832,8 @@ STATIC struct xfs_btree_block *
xfs_btree_get_iroot(
struct xfs_btree_cur *cur)
{
- struct xfs_ifork *ifp;
+ struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
- ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
return (struct xfs_btree_block *)ifp->if_broot;
}
@@ -673,84 +847,26 @@ xfs_btree_get_block(
int level, /* level in btree */
struct xfs_buf **bpp) /* buffer containing the block */
{
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- (level == cur->bc_nlevels - 1)) {
+ if (xfs_btree_at_iroot(cur, level)) {
*bpp = NULL;
return xfs_btree_get_iroot(cur);
}
- *bpp = cur->bc_bufs[level];
+ *bpp = cur->bc_levels[level].bp;
return XFS_BUF_TO_BLOCK(*bpp);
}
/*
- * Get a buffer for the block, return it with no data read.
- * Long-form addressing.
- */
-xfs_buf_t * /* buffer for fsbno */
-xfs_btree_get_bufl(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_fsblock_t fsbno, /* file system block number */
- uint lock) /* lock flags for get_buf */
-{
- xfs_daddr_t d; /* real disk block address */
-
- ASSERT(fsbno != NULLFSBLOCK);
- d = XFS_FSB_TO_DADDR(mp, fsbno);
- return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
-}
-
-/*
- * Get a buffer for the block, return it with no data read.
- * Short-form addressing.
- */
-xfs_buf_t * /* buffer for agno/agbno */
-xfs_btree_get_bufs(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno, /* allocation group block number */
- uint lock) /* lock flags for get_buf */
-{
- xfs_daddr_t d; /* real disk block address */
-
- ASSERT(agno != NULLAGNUMBER);
- ASSERT(agbno != NULLAGBLOCK);
- d = XFS_AGB_TO_DADDR(mp, agno, agbno);
- return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
-}
-
-/*
- * Check for the cursor referring to the last block at the given level.
- */
-int /* 1=is last block, 0=not last block */
-xfs_btree_islastblock(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level) /* level to check */
-{
- struct xfs_btree_block *block; /* generic btree block pointer */
- xfs_buf_t *bp; /* buffer containing block */
-
- block = xfs_btree_get_block(cur, level, &bp);
- xfs_btree_check_block(cur, block, level, bp);
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
- else
- return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
-}
-
-/*
* Change the cursor to point to the first record at the given level.
* Other levels are unaffected.
*/
STATIC int /* success=1, failure=0 */
xfs_btree_firstrec(
- xfs_btree_cur_t *cur, /* btree cursor */
+ struct xfs_btree_cur *cur, /* btree cursor */
int level) /* level to change */
{
struct xfs_btree_block *block; /* generic btree block pointer */
- xfs_buf_t *bp; /* buffer containing block */
+ struct xfs_buf *bp; /* buffer containing block */
/*
* Get the block pointer for this level.
@@ -766,7 +882,7 @@ xfs_btree_firstrec(
/*
* Set the ptr value to 1, that's the first record/key.
*/
- cur->bc_ptrs[level] = 1;
+ cur->bc_levels[level].ptr = 1;
return 1;
}
@@ -776,11 +892,11 @@ xfs_btree_firstrec(
*/
STATIC int /* success=1, failure=0 */
xfs_btree_lastrec(
- xfs_btree_cur_t *cur, /* btree cursor */
+ struct xfs_btree_cur *cur, /* btree cursor */
int level) /* level to change */
{
struct xfs_btree_block *block; /* generic btree block pointer */
- xfs_buf_t *bp; /* buffer containing block */
+ struct xfs_buf *bp; /* buffer containing block */
/*
* Get the block pointer for this level.
@@ -796,7 +912,7 @@ xfs_btree_lastrec(
/*
* Set the ptr value to numrecs, that's the last record/key.
*/
- cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
+ cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs);
return 1;
}
@@ -806,20 +922,20 @@ xfs_btree_lastrec(
*/
void
xfs_btree_offsets(
- int64_t fields, /* bitmask of fields */
+ uint32_t fields, /* bitmask of fields */
const short *offsets, /* table of field offsets */
int nbits, /* number of bits to inspect */
int *first, /* output: first byte offset */
int *last) /* output: last byte offset */
{
int i; /* current bit number */
- int64_t imask; /* mask for current bit number */
+ uint32_t imask; /* mask for current bit number */
ASSERT(fields != 0);
/*
* Find the lowest bit, so the first byte offset.
*/
- for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
+ for (i = 0, imask = 1u; ; i++, imask <<= 1) {
if (imask & fields) {
*first = offsets[i];
break;
@@ -828,7 +944,7 @@ xfs_btree_offsets(
/*
* Find the highest bit, so the last byte offset.
*/
- for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
+ for (i = nbits - 1, imask = 1u << i; ; i--, imask >>= 1) {
if (imask & fields) {
*last = offsets[i + 1] - 1;
break;
@@ -836,96 +952,52 @@ xfs_btree_offsets(
}
}
-/*
- * Get a buffer for the block, return it read in.
- * Long-form addressing.
- */
-int
-xfs_btree_read_bufl(
- struct xfs_mount *mp, /* file system mount point */
- struct xfs_trans *tp, /* transaction pointer */
- xfs_fsblock_t fsbno, /* file system block number */
- uint lock, /* lock flags for read_buf */
- struct xfs_buf **bpp, /* buffer for fsbno */
- int refval, /* ref count value for buffer */
- const struct xfs_buf_ops *ops)
-{
- struct xfs_buf *bp; /* return value */
- xfs_daddr_t d; /* real disk block address */
- int error;
-
- if (!xfs_verify_fsbno(mp, fsbno))
- return -EFSCORRUPTED;
- d = XFS_FSB_TO_DADDR(mp, fsbno);
- error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
- mp->m_bsize, lock, &bp, ops);
- if (error)
- return error;
- if (bp)
- xfs_buf_set_ref(bp, refval);
- *bpp = bp;
- return 0;
-}
-
-/*
- * Read-ahead the block, don't wait for it, don't return a buffer.
- * Long-form addressing.
- */
-/* ARGSUSED */
-void
-xfs_btree_reada_bufl(
- struct xfs_mount *mp, /* file system mount point */
- xfs_fsblock_t fsbno, /* file system block number */
- xfs_extlen_t count, /* count of filesystem blocks */
- const struct xfs_buf_ops *ops)
+STATIC int
+xfs_btree_readahead_fsblock(
+ struct xfs_btree_cur *cur,
+ int lr,
+ struct xfs_btree_block *block)
{
- xfs_daddr_t d;
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
+ xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
+ int rval = 0;
- ASSERT(fsbno != NULLFSBLOCK);
- d = XFS_FSB_TO_DADDR(mp, fsbno);
- xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
-}
+ if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
+ xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, left),
+ mp->m_bsize, cur->bc_ops->buf_ops);
+ rval++;
+ }
-/*
- * Read-ahead the block, don't wait for it, don't return a buffer.
- * Short-form addressing.
- */
-/* ARGSUSED */
-void
-xfs_btree_reada_bufs(
- struct xfs_mount *mp, /* file system mount point */
- xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno, /* allocation group block number */
- xfs_extlen_t count, /* count of filesystem blocks */
- const struct xfs_buf_ops *ops)
-{
- xfs_daddr_t d;
+ if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
+ xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, right),
+ mp->m_bsize, cur->bc_ops->buf_ops);
+ rval++;
+ }
- ASSERT(agno != NULLAGNUMBER);
- ASSERT(agbno != NULLAGBLOCK);
- d = XFS_AGB_TO_DADDR(mp, agno, agbno);
- xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
+ return rval;
}
STATIC int
-xfs_btree_readahead_lblock(
+xfs_btree_readahead_memblock(
struct xfs_btree_cur *cur,
int lr,
struct xfs_btree_block *block)
{
+ struct xfs_buftarg *btp = cur->bc_mem.xfbtree->target;
+ xfbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
+ xfbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
int rval = 0;
- xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
- xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
- xfs_btree_reada_bufl(cur->bc_mp, left, 1,
- cur->bc_ops->buf_ops);
+ xfs_buf_readahead(btp, xfbno_to_daddr(left), XFBNO_BBSIZE,
+ cur->bc_ops->buf_ops);
rval++;
}
if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
- xfs_btree_reada_bufl(cur->bc_mp, right, 1,
- cur->bc_ops->buf_ops);
+ xfs_buf_readahead(btp, xfbno_to_daddr(right), XFBNO_BBSIZE,
+ cur->bc_ops->buf_ops);
rval++;
}
@@ -933,25 +1005,28 @@ xfs_btree_readahead_lblock(
}
STATIC int
-xfs_btree_readahead_sblock(
+xfs_btree_readahead_agblock(
struct xfs_btree_cur *cur,
int lr,
- struct xfs_btree_block *block)
+ struct xfs_btree_block *block)
{
- int rval = 0;
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
-
+ int rval = 0;
if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
- left, 1, cur->bc_ops->buf_ops);
+ xfs_buf_readahead(mp->m_ddev_targp,
+ xfs_agbno_to_daddr(pag, left), mp->m_bsize,
+ cur->bc_ops->buf_ops);
rval++;
}
if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
- right, 1, cur->bc_ops->buf_ops);
+ xfs_buf_readahead(mp->m_ddev_targp,
+ xfs_agbno_to_daddr(pag, right), mp->m_bsize,
+ cur->bc_ops->buf_ops);
rval++;
}
@@ -974,44 +1049,52 @@ xfs_btree_readahead(
* No readahead needed if we are at the root level and the
* btree root is stored in the inode.
*/
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- (lev == cur->bc_nlevels - 1))
+ if (xfs_btree_at_iroot(cur, lev))
return 0;
- if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
+ if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra)
return 0;
- cur->bc_ra[lev] |= lr;
- block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
+ cur->bc_levels[lev].ra |= lr;
+ block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp);
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return xfs_btree_readahead_lblock(cur, lr, block);
- return xfs_btree_readahead_sblock(cur, lr, block);
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_AG:
+ return xfs_btree_readahead_agblock(cur, lr, block);
+ case XFS_BTREE_TYPE_INODE:
+ return xfs_btree_readahead_fsblock(cur, lr, block);
+ case XFS_BTREE_TYPE_MEM:
+ return xfs_btree_readahead_memblock(cur, lr, block);
+ default:
+ ASSERT(0);
+ return 0;
+ }
}
STATIC int
xfs_btree_ptr_to_daddr(
- struct xfs_btree_cur *cur,
- union xfs_btree_ptr *ptr,
- xfs_daddr_t *daddr)
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *ptr,
+ xfs_daddr_t *daddr)
{
- xfs_fsblock_t fsbno;
- xfs_agblock_t agbno;
int error;
error = xfs_btree_check_ptr(cur, ptr, 0, 1);
if (error)
return error;
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- fsbno = be64_to_cpu(ptr->l);
- *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno);
- } else {
- agbno = be32_to_cpu(ptr->s);
- *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
- agbno);
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_AG:
+ *daddr = xfs_agbno_to_daddr(to_perag(cur->bc_group),
+ be32_to_cpu(ptr->s));
+ break;
+ case XFS_BTREE_TYPE_INODE:
+ *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
+ break;
+ case XFS_BTREE_TYPE_MEM:
+ *daddr = xfbno_to_daddr(be64_to_cpu(ptr->l));
+ break;
}
-
return 0;
}
@@ -1031,8 +1114,9 @@ xfs_btree_readahead_ptr(
if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr))
return;
- xfs_buf_readahead(cur->bc_mp->m_ddev_targp, daddr,
- cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
+ xfs_buf_readahead(xfs_btree_buftarg(cur), daddr,
+ xfs_btree_bbsize(cur) * count,
+ cur->bc_ops->buf_ops);
}
/*
@@ -1041,53 +1125,64 @@ xfs_btree_readahead_ptr(
*/
STATIC void
xfs_btree_setbuf(
- xfs_btree_cur_t *cur, /* btree cursor */
+ struct xfs_btree_cur *cur, /* btree cursor */
int lev, /* level in btree */
- xfs_buf_t *bp) /* new buffer to set */
+ struct xfs_buf *bp) /* new buffer to set */
{
struct xfs_btree_block *b; /* btree block */
- if (cur->bc_bufs[lev])
- xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
- cur->bc_bufs[lev] = bp;
- cur->bc_ra[lev] = 0;
+ if (cur->bc_levels[lev].bp)
+ xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp);
+ cur->bc_levels[lev].bp = bp;
+ cur->bc_levels[lev].ra = 0;
b = XFS_BUF_TO_BLOCK(bp);
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
- cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
+ cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
- cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
+ cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
} else {
if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
- cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
+ cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
- cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
+ cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
}
}
bool
xfs_btree_ptr_is_null(
- struct xfs_btree_cur *cur,
- union xfs_btree_ptr *ptr)
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *ptr)
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
return ptr->l == cpu_to_be64(NULLFSBLOCK);
else
return ptr->s == cpu_to_be32(NULLAGBLOCK);
}
-STATIC void
+void
xfs_btree_set_ptr_null(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr)
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
ptr->l = cpu_to_be64(NULLFSBLOCK);
else
ptr->s = cpu_to_be32(NULLAGBLOCK);
}
+static inline bool
+xfs_btree_ptrs_equal(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr1,
+ union xfs_btree_ptr *ptr2)
+{
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
+ return ptr1->l == ptr2->l;
+ return ptr1->s == ptr2->s;
+}
+
/*
* Get/set/init sibling pointers
*/
@@ -1100,7 +1195,7 @@ xfs_btree_get_sibling(
{
ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
if (lr == XFS_BB_RIGHTSIB)
ptr->l = block->bb_u.l.bb_rightsib;
else
@@ -1113,16 +1208,16 @@ xfs_btree_get_sibling(
}
}
-STATIC void
+void
xfs_btree_set_sibling(
- struct xfs_btree_cur *cur,
- struct xfs_btree_block *block,
- union xfs_btree_ptr *ptr,
- int lr)
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ const union xfs_btree_ptr *ptr,
+ int lr)
{
ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
if (lr == XFS_BB_RIGHTSIB)
block->bb_u.l.bb_rightsib = ptr->l;
else
@@ -1135,25 +1230,24 @@ xfs_btree_set_sibling(
}
}
-void
-xfs_btree_init_block_int(
+static void
+__xfs_btree_init_block(
struct xfs_mount *mp,
struct xfs_btree_block *buf,
+ const struct xfs_btree_ops *ops,
xfs_daddr_t blkno,
- xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
- __u64 owner,
- unsigned int flags)
+ __u64 owner)
{
- int crc = xfs_sb_version_hascrc(&mp->m_sb);
- __u32 magic = xfs_btree_magic(crc, btnum);
+ bool crc = xfs_has_crc(mp);
+ __u32 magic = xfs_btree_magic(mp, ops);
buf->bb_magic = cpu_to_be32(magic);
buf->bb_level = cpu_to_be16(level);
buf->bb_numrecs = cpu_to_be16(numrecs);
- if (flags & XFS_BTREE_LONG_PTRS) {
+ if (ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
if (crc) {
@@ -1164,14 +1258,12 @@ xfs_btree_init_block_int(
buf->bb_u.l.bb_lsn = 0;
}
} else {
- /* owner is a 32 bit value on short blocks */
- __u32 __owner = (__u32)owner;
-
buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
if (crc) {
buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
- buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
+ /* owner is a 32 bit value on short blocks */
+ buf->bb_u.s.bb_owner = cpu_to_be32((__u32)owner);
uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
buf->bb_u.s.bb_lsn = 0;
}
@@ -1180,65 +1272,57 @@ xfs_btree_init_block_int(
void
xfs_btree_init_block(
- struct xfs_mount *mp,
- struct xfs_buf *bp,
- xfs_btnum_t btnum,
- __u16 level,
- __u16 numrecs,
- __u64 owner,
- unsigned int flags)
+ struct xfs_mount *mp,
+ struct xfs_btree_block *block,
+ const struct xfs_btree_ops *ops,
+ __u16 level,
+ __u16 numrecs,
+ __u64 owner)
{
- xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
- btnum, level, numrecs, owner, flags);
+ __xfs_btree_init_block(mp, block, ops, XFS_BUF_DADDR_NULL, level,
+ numrecs, owner);
}
-STATIC void
-xfs_btree_init_block_cur(
- struct xfs_btree_cur *cur,
- struct xfs_buf *bp,
- int level,
- int numrecs)
+void
+xfs_btree_init_buf(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp,
+ const struct xfs_btree_ops *ops,
+ __u16 level,
+ __u16 numrecs,
+ __u64 owner)
{
- __u64 owner;
-
- /*
- * we can pull the owner from the cursor right now as the different
- * owners align directly with the pointer size of the btree. This may
- * change in future, but is safe for current users of the generic btree
- * code.
- */
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- owner = cur->bc_private.b.ip->i_ino;
- else
- owner = cur->bc_private.a.agno;
-
- xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
- cur->bc_btnum, level, numrecs,
- owner, cur->bc_flags);
+ __xfs_btree_init_block(mp, XFS_BUF_TO_BLOCK(bp), ops,
+ xfs_buf_daddr(bp), level, numrecs, owner);
+ bp->b_ops = ops->buf_ops;
}
-/*
- * Return true if ptr is the last record in the btree and
- * we need to track updates to this record. The decision
- * will be further refined in the update_lastrec method.
- */
-STATIC int
-xfs_btree_is_lastrec(
- struct xfs_btree_cur *cur,
- struct xfs_btree_block *block,
- int level)
+static inline __u64
+xfs_btree_owner(
+ struct xfs_btree_cur *cur)
{
- union xfs_btree_ptr ptr;
-
- if (level > 0)
- return 0;
- if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_MEM:
+ return cur->bc_mem.xfbtree->owner;
+ case XFS_BTREE_TYPE_INODE:
+ return cur->bc_ino.ip->i_ino;
+ case XFS_BTREE_TYPE_AG:
+ return cur->bc_group->xg_gno;
+ default:
+ ASSERT(0);
return 0;
+ }
+}
- xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
- if (!xfs_btree_ptr_is_null(cur, &ptr))
- return 0;
- return 1;
+void
+xfs_btree_init_block_cur(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp,
+ int level,
+ int numrecs)
+{
+ xfs_btree_init_buf(cur->bc_mp, bp, cur->bc_ops, level, numrecs,
+ xfs_btree_owner(cur));
}
STATIC void
@@ -1247,66 +1331,46 @@ xfs_btree_buf_to_ptr(
struct xfs_buf *bp,
union xfs_btree_ptr *ptr)
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
- XFS_BUF_ADDR(bp)));
- else {
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_AG:
ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
- XFS_BUF_ADDR(bp)));
+ xfs_buf_daddr(bp)));
+ break;
+ case XFS_BTREE_TYPE_INODE:
+ ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
+ xfs_buf_daddr(bp)));
+ break;
+ case XFS_BTREE_TYPE_MEM:
+ ptr->l = cpu_to_be64(xfs_daddr_to_xfbno(xfs_buf_daddr(bp)));
+ break;
}
}
-STATIC void
+static inline void
xfs_btree_set_refs(
struct xfs_btree_cur *cur,
struct xfs_buf *bp)
{
- switch (cur->bc_btnum) {
- case XFS_BTNUM_BNO:
- case XFS_BTNUM_CNT:
- xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
- break;
- case XFS_BTNUM_INO:
- case XFS_BTNUM_FINO:
- xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
- break;
- case XFS_BTNUM_BMAP:
- xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
- break;
- case XFS_BTNUM_RMAP:
- xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
- break;
- case XFS_BTNUM_REFC:
- xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF);
- break;
- default:
- ASSERT(0);
- }
+ xfs_buf_set_ref(bp, cur->bc_ops->lru_refs);
}
-STATIC int
+int
xfs_btree_get_buf_block(
- struct xfs_btree_cur *cur,
- union xfs_btree_ptr *ptr,
- int flags,
- struct xfs_btree_block **block,
- struct xfs_buf **bpp)
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *ptr,
+ struct xfs_btree_block **block,
+ struct xfs_buf **bpp)
{
- struct xfs_mount *mp = cur->bc_mp;
- xfs_daddr_t d;
- int error;
-
- /* need to sort out how callers deal with failures first */
- ASSERT(!(flags & XBF_TRYLOCK));
+ xfs_daddr_t d;
+ int error;
error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
if (error)
return error;
- *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
- mp->m_bsize, flags);
-
- if (!*bpp)
- return -ENOMEM;
+ error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d,
+ xfs_btree_bbsize(cur), 0, bpp);
+ if (error)
+ return error;
(*bpp)->b_ops = cur->bc_ops->buf_ops;
*block = XFS_BUF_TO_BLOCK(*bpp);
@@ -1317,13 +1381,13 @@ xfs_btree_get_buf_block(
* Read in the buffer at the given ptr and return the buffer and
* the block pointer within the buffer.
*/
-STATIC int
+int
xfs_btree_read_buf_block(
- struct xfs_btree_cur *cur,
- union xfs_btree_ptr *ptr,
- int flags,
- struct xfs_btree_block **block,
- struct xfs_buf **bpp)
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *ptr,
+ int flags,
+ struct xfs_btree_block **block,
+ struct xfs_buf **bpp)
{
struct xfs_mount *mp = cur->bc_mp;
xfs_daddr_t d;
@@ -1335,9 +1399,11 @@ xfs_btree_read_buf_block(
error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
if (error)
return error;
- error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
- mp->m_bsize, flags, bpp,
- cur->bc_ops->buf_ops);
+ error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d,
+ xfs_btree_bbsize(cur), flags, bpp,
+ cur->bc_ops->buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_btree_mark_sick(cur);
if (error)
return error;
@@ -1349,12 +1415,12 @@ xfs_btree_read_buf_block(
/*
* Copy keys from one btree block to another.
*/
-STATIC void
+void
xfs_btree_copy_keys(
- struct xfs_btree_cur *cur,
- union xfs_btree_key *dst_key,
- union xfs_btree_key *src_key,
- int numkeys)
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *dst_key,
+ const union xfs_btree_key *src_key,
+ int numkeys)
{
ASSERT(numkeys >= 0);
memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
@@ -1377,15 +1443,15 @@ xfs_btree_copy_recs(
/*
* Copy block pointers from one btree block to another.
*/
-STATIC void
+void
xfs_btree_copy_ptrs(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *dst_ptr,
- union xfs_btree_ptr *src_ptr,
+ const union xfs_btree_ptr *src_ptr,
int numptrs)
{
ASSERT(numptrs >= 0);
- memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
+ memcpy(dst_ptr, src_ptr, numptrs * cur->bc_ops->ptr_len);
}
/*
@@ -1441,8 +1507,8 @@ xfs_btree_shift_ptrs(
ASSERT(numptrs >= 0);
ASSERT(dir == 1 || dir == -1);
- dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
- memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
+ dst_ptr = (char *)ptr + (dir * cur->bc_ops->ptr_len);
+ memmove(dst_ptr, ptr, numptrs * cur->bc_ops->ptr_len);
}
/*
@@ -1462,8 +1528,8 @@ xfs_btree_log_keys(
xfs_btree_key_offset(cur, first),
xfs_btree_key_offset(cur, last + 1) - 1);
} else {
- xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
- xfs_ilog_fbroot(cur->bc_private.b.whichfork));
+ xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
+ xfs_ilog_fbroot(cur->bc_ino.whichfork));
}
}
@@ -1477,12 +1543,16 @@ xfs_btree_log_recs(
int first,
int last)
{
+ if (!bp) {
+ xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
+ xfs_ilog_fbroot(cur->bc_ino.whichfork));
+ return;
+ }
xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
xfs_trans_log_buf(cur->bc_tp, bp,
xfs_btree_rec_offset(cur, first),
xfs_btree_rec_offset(cur, last + 1) - 1);
-
}
/*
@@ -1505,8 +1575,8 @@ xfs_btree_log_ptrs(
xfs_btree_ptr_offset(cur, first, level),
xfs_btree_ptr_offset(cur, last + 1, level) - 1);
} else {
- xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
- xfs_ilog_fbroot(cur->bc_private.b.whichfork));
+ xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
+ xfs_ilog_fbroot(cur->bc_ino.whichfork));
}
}
@@ -1518,7 +1588,7 @@ void
xfs_btree_log_block(
struct xfs_btree_cur *cur, /* btree cursor */
struct xfs_buf *bp, /* buffer containing btree block */
- int fields) /* mask of fields: XFS_BB_... */
+ uint32_t fields) /* mask of fields: XFS_BB_... */
{
int first; /* first byte offset logged */
int last; /* last byte offset logged */
@@ -1553,7 +1623,7 @@ xfs_btree_log_block(
if (bp) {
int nbits;
- if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
+ if (xfs_has_crc(cur->bc_mp)) {
/*
* We don't log the CRC when updating a btree
* block but instead recreate it during log
@@ -1568,14 +1638,14 @@ xfs_btree_log_block(
nbits = XFS_BB_NUM_BITS;
}
xfs_btree_offsets(fields,
- (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
+ (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) ?
loffsets : soffsets,
nbits, &first, &last);
xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
xfs_trans_log_buf(cur->bc_tp, bp, first, last);
} else {
- xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
- xfs_ilog_fbroot(cur->bc_private.b.whichfork));
+ xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
+ xfs_ilog_fbroot(cur->bc_ino.whichfork));
}
}
@@ -1610,7 +1680,7 @@ xfs_btree_increment(
#endif
/* We're done if we remain in the block after the increment. */
- if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
+ if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block))
goto out1;
/* Fail if we just went off the right edge of the tree. */
@@ -1633,7 +1703,7 @@ xfs_btree_increment(
goto error0;
#endif
- if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
+ if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block))
break;
/* Read-ahead the right block for the next loop. */
@@ -1645,9 +1715,10 @@ xfs_btree_increment(
* confused or have the tree root in an inode.
*/
if (lev == cur->bc_nlevels) {
- if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
goto out0;
ASSERT(0);
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1660,14 +1731,14 @@ xfs_btree_increment(
for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
union xfs_btree_ptr *ptrp;
- ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
+ ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
--lev;
error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
if (error)
goto error0;
xfs_btree_setbuf(cur, lev, bp);
- cur->bc_ptrs[lev] = 1;
+ cur->bc_levels[lev].ptr = 1;
}
out1:
*stat = 1;
@@ -1692,7 +1763,7 @@ xfs_btree_decrement(
int *stat) /* success/failure */
{
struct xfs_btree_block *block;
- xfs_buf_t *bp;
+ struct xfs_buf *bp;
int error; /* error return value */
int lev;
union xfs_btree_ptr ptr;
@@ -1703,7 +1774,7 @@ xfs_btree_decrement(
xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
/* We're done if we remain in the block after the decrement. */
- if (--cur->bc_ptrs[level] > 0)
+ if (--cur->bc_levels[level].ptr > 0)
goto out1;
/* Get a pointer to the btree block. */
@@ -1727,7 +1798,7 @@ xfs_btree_decrement(
* Stop when we don't go off the left edge of a block.
*/
for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
- if (--cur->bc_ptrs[lev] > 0)
+ if (--cur->bc_levels[lev].ptr > 0)
break;
/* Read-ahead the left block for the next loop. */
xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
@@ -1738,9 +1809,10 @@ xfs_btree_decrement(
* or the root of the tree is in an inode.
*/
if (lev == cur->bc_nlevels) {
- if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
goto out0;
ASSERT(0);
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1753,13 +1825,13 @@ xfs_btree_decrement(
for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
union xfs_btree_ptr *ptrp;
- ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
+ ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
--lev;
error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
if (error)
goto error0;
xfs_btree_setbuf(cur, lev, bp);
- cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
+ cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block);
}
out1:
*stat = 1;
@@ -1773,20 +1845,46 @@ error0:
return error;
}
+/*
+ * Check the btree block owner now that we have the context to know who the
+ * real owner is.
+ */
+static inline xfs_failaddr_t
+xfs_btree_check_block_owner(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block)
+{
+ __u64 owner;
+
+ if (!xfs_has_crc(cur->bc_mp) ||
+ (cur->bc_flags & XFS_BTREE_BMBT_INVALID_OWNER))
+ return NULL;
+
+ owner = xfs_btree_owner(cur);
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
+ if (be64_to_cpu(block->bb_u.l.bb_owner) != owner)
+ return __this_address;
+ } else {
+ if (be32_to_cpu(block->bb_u.s.bb_owner) != owner)
+ return __this_address;
+ }
+
+ return NULL;
+}
+
int
xfs_btree_lookup_get_block(
- struct xfs_btree_cur *cur, /* btree cursor */
- int level, /* level in the btree */
- union xfs_btree_ptr *pp, /* ptr to btree block */
- struct xfs_btree_block **blkp) /* return btree block */
+ struct xfs_btree_cur *cur, /* btree cursor */
+ int level, /* level in the btree */
+ const union xfs_btree_ptr *pp, /* ptr to btree block */
+ struct xfs_btree_block **blkp) /* return btree block */
{
struct xfs_buf *bp; /* buffer pointer for btree block */
xfs_daddr_t daddr;
int error = 0;
/* special case the root block if in an inode */
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- (level == cur->bc_nlevels - 1)) {
+ if (xfs_btree_at_iroot(cur, level)) {
*blkp = xfs_btree_get_iroot(cur);
return 0;
}
@@ -1797,11 +1895,11 @@ xfs_btree_lookup_get_block(
*
* Otherwise throw it away and get a new one.
*/
- bp = cur->bc_bufs[level];
+ bp = cur->bc_levels[level].bp;
error = xfs_btree_ptr_to_daddr(cur, pp, &daddr);
if (error)
return error;
- if (bp && XFS_BUF_ADDR(bp) == daddr) {
+ if (bp && xfs_buf_daddr(bp) == daddr) {
*blkp = XFS_BUF_TO_BLOCK(bp);
return 0;
}
@@ -1811,11 +1909,7 @@ xfs_btree_lookup_get_block(
return error;
/* Check the inode owner since the verifiers don't. */
- if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) &&
- !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) &&
- (cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
- be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
- cur->bc_private.b.ip->i_ino)
+ if (xfs_btree_check_block_owner(cur, *blkp) != NULL)
goto out_bad;
/* Did we get the level we were looking for? */
@@ -1831,7 +1925,9 @@ xfs_btree_lookup_get_block(
out_bad:
*blkp = NULL;
+ xfs_buf_mark_corrupt(bp);
xfs_trans_brelse(cur->bc_tp, bp);
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
@@ -1858,6 +1954,27 @@ xfs_lookup_get_search_key(
}
/*
+ * Initialize a pointer to the root block.
+ */
+void
+xfs_btree_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) {
+ /*
+ * Inode-rooted btrees call xfs_btree_get_iroot to find the root
+ * in xfs_btree_lookup_get_block and don't need a pointer here.
+ */
+ ptr->l = 0;
+ } else if (cur->bc_flags & XFS_BTREE_STAGING) {
+ ptr->s = cpu_to_be32(cur->bc_ag.afake->af_root);
+ } else {
+ cur->bc_ops->init_ptr_from_cur(cur, ptr);
+ }
+}
+
+/*
* Lookup the record. The cursor is made to point to it, based on dir.
* stat is set to 0 if can't find any such record, 1 for success.
*/
@@ -1868,7 +1985,7 @@ xfs_btree_lookup(
int *stat) /* success/failure */
{
struct xfs_btree_block *block; /* current btree block */
- int64_t diff; /* difference for the current key */
+ int cmp_r; /* current key comparison result */
int error; /* error return value */
int keyno; /* current key number */
int level; /* level in the btree */
@@ -1878,14 +1995,16 @@ xfs_btree_lookup(
XFS_BTREE_STATS_INC(cur, lookup);
/* No such thing as a zero-level tree. */
- if (cur->bc_nlevels == 0)
+ if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
block = NULL;
keyno = 0;
/* initialise start pointer from cursor */
- cur->bc_ops->init_ptr_from_cur(cur, &ptr);
+ xfs_btree_init_ptr_from_cur(cur, &ptr);
pp = &ptr;
/*
@@ -1894,13 +2013,13 @@ xfs_btree_lookup(
* on the lookup record, then follow the corresponding block
* pointer down to the next level.
*/
- for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
+ for (level = cur->bc_nlevels - 1, cmp_r = 1; level >= 0; level--) {
/* Get the block we need to do the lookup on. */
error = xfs_btree_lookup_get_block(cur, level, pp, &block);
if (error)
goto error0;
- if (diff == 0) {
+ if (cmp_r == 0) {
/*
* If we already had a key match at a higher level, we
* know we need to use the first entry in this block.
@@ -1922,10 +2041,11 @@ xfs_btree_lookup(
XFS_ERRLEVEL_LOW,
cur->bc_mp, block,
sizeof(*block));
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
- cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
+ cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE;
*stat = 0;
return 0;
}
@@ -1945,15 +2065,16 @@ xfs_btree_lookup(
keyno, block, &key);
/*
- * Compute difference to get next direction:
+ * Compute comparison result to get next
+ * direction:
* - less than, move right
* - greater than, move left
* - equal, we're done
*/
- diff = cur->bc_ops->key_diff(cur, kp);
- if (diff < 0)
+ cmp_r = cur->bc_ops->cmp_key_with_cur(cur, kp);
+ if (cmp_r < 0)
low = keyno + 1;
- else if (diff > 0)
+ else if (cmp_r > 0)
high = keyno - 1;
else
break;
@@ -1969,7 +2090,7 @@ xfs_btree_lookup(
* If we moved left, need the previous key number,
* unless there isn't one.
*/
- if (diff > 0 && --keyno < 1)
+ if (cmp_r > 0 && --keyno < 1)
keyno = 1;
pp = xfs_btree_ptr_addr(cur, keyno, block);
@@ -1977,12 +2098,12 @@ xfs_btree_lookup(
if (error)
goto error0;
- cur->bc_ptrs[level] = keyno;
+ cur->bc_levels[level].ptr = keyno;
}
}
/* Done with the search. See if we need to adjust the results. */
- if (dir != XFS_LOOKUP_LE && diff < 0) {
+ if (dir != XFS_LOOKUP_LE && cmp_r < 0) {
keyno++;
/*
* If ge search and we went off the end of the block, but it's
@@ -1994,22 +2115,25 @@ xfs_btree_lookup(
!xfs_btree_ptr_is_null(cur, &ptr)) {
int i;
- cur->bc_ptrs[0] = keyno;
+ cur->bc_levels[0].ptr = keyno;
error = xfs_btree_increment(cur, 0, &i);
if (error)
goto error0;
- XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ return -EFSCORRUPTED;
+ }
*stat = 1;
return 0;
}
- } else if (dir == XFS_LOOKUP_LE && diff > 0)
+ } else if (dir == XFS_LOOKUP_LE && cmp_r > 0)
keyno--;
- cur->bc_ptrs[0] = keyno;
+ cur->bc_levels[0].ptr = keyno;
/* Return if we succeeded or not. */
if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
*stat = 0;
- else if (dir != XFS_LOOKUP_EQ || diff == 0)
+ else if (dir != XFS_LOOKUP_EQ || cmp_r == 0)
*stat = 1;
else
*stat = 0;
@@ -2025,7 +2149,7 @@ xfs_btree_high_key_from_key(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
- ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
+ ASSERT(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING);
return (union xfs_btree_key *)((char *)key +
(cur->bc_ops->key_len / 2));
}
@@ -2046,14 +2170,13 @@ xfs_btree_get_leaf_keys(
rec = xfs_btree_rec_addr(cur, 1, block);
cur->bc_ops->init_key_from_rec(key, rec);
- if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+ if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) {
cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
rec = xfs_btree_rec_addr(cur, n, block);
cur->bc_ops->init_high_key_from_rec(&hkey, rec);
- if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey)
- > 0)
+ if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey))
max_hkey = hkey;
}
@@ -2074,14 +2197,14 @@ xfs_btree_get_node_keys(
union xfs_btree_key *high;
int n;
- if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+ if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) {
memcpy(key, xfs_btree_key_addr(cur, 1, block),
cur->bc_ops->key_len / 2);
max_hkey = xfs_btree_high_key_addr(cur, 1, block);
for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
hkey = xfs_btree_high_key_addr(cur, n, block);
- if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0)
+ if (xfs_btree_keycmp_gt(cur, hkey, max_hkey))
max_hkey = hkey;
}
@@ -2118,7 +2241,7 @@ xfs_btree_needs_key_update(
struct xfs_btree_cur *cur,
int ptr)
{
- return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
+ return (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) || ptr == 1;
}
/*
@@ -2142,7 +2265,7 @@ __xfs_btree_updkeys(
struct xfs_buf *bp;
int ptr;
- ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
+ ASSERT(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING);
/* Exit if there aren't any parent levels to update. */
if (level + 1 >= cur->bc_nlevels)
@@ -2164,12 +2287,12 @@ __xfs_btree_updkeys(
if (error)
return error;
#endif
- ptr = cur->bc_ptrs[level];
+ ptr = cur->bc_levels[level].ptr;
nlkey = xfs_btree_key_addr(cur, ptr, block);
nhkey = xfs_btree_high_key_addr(cur, ptr, block);
if (!force_all &&
- !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 ||
- cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0))
+ xfs_btree_keycmp_eq(cur, nlkey, lkey) &&
+ xfs_btree_keycmp_eq(cur, nhkey, hkey))
break;
xfs_btree_copy_keys(cur, nlkey, lkey, 1);
xfs_btree_log_keys(cur, bp, ptr, ptr);
@@ -2211,7 +2334,7 @@ xfs_btree_update_keys(
ASSERT(level >= 0);
block = xfs_btree_get_block(cur, level, &bp);
- if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
+ if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)
return __xfs_btree_updkeys(cur, level, block, bp, false);
/*
@@ -2231,7 +2354,7 @@ xfs_btree_update_keys(
if (error)
return error;
#endif
- ptr = cur->bc_ptrs[level];
+ ptr = cur->bc_levels[level].ptr;
kp = xfs_btree_key_addr(cur, ptr, block);
xfs_btree_copy_keys(cur, kp, &key, 1);
xfs_btree_log_keys(cur, bp, ptr, ptr);
@@ -2265,22 +2388,13 @@ xfs_btree_update(
goto error0;
#endif
/* Get the address of the rec to be updated. */
- ptr = cur->bc_ptrs[0];
+ ptr = cur->bc_levels[0].ptr;
rp = xfs_btree_rec_addr(cur, ptr, block);
/* Fill in the new contents and log them. */
xfs_btree_copy_recs(cur, rp, rec, 1);
xfs_btree_log_recs(cur, bp, ptr, ptr);
- /*
- * If we are tracking the last record in the tree and
- * we are at the far right edge of the tree, update it.
- */
- if (xfs_btree_is_lastrec(cur, block, 0)) {
- cur->bc_ops->update_lastrec(cur, block, rec,
- ptr, LASTREC_UPDATE);
- }
-
/* Pass new key value up to our parent. */
if (xfs_btree_needs_key_update(cur, ptr)) {
error = xfs_btree_update_keys(cur, 0);
@@ -2318,8 +2432,7 @@ xfs_btree_lshift(
int error; /* error return value */
int i;
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- level == cur->bc_nlevels - 1)
+ if (xfs_btree_at_iroot(cur, level))
goto out0;
/* Set up variables for this block as "right". */
@@ -2340,7 +2453,7 @@ xfs_btree_lshift(
* If the cursor entry is the one that would be moved, don't
* do it... it's too complicated.
*/
- if (cur->bc_ptrs[level] <= 1)
+ if (cur->bc_levels[level].ptr <= 1)
goto out0;
/* Set up the left neighbor as "left". */
@@ -2419,8 +2532,6 @@ xfs_btree_lshift(
XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
if (level > 0) {
/* It's a nonleaf. operate on keys and ptrs */
- int i; /* loop index */
-
for (i = 0; i < rrecs; i++) {
error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level);
if (error)
@@ -2448,12 +2559,16 @@ xfs_btree_lshift(
* Using a temporary cursor, update the parent key values of the
* block on the left.
*/
- if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+ if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) {
error = xfs_btree_dup_cursor(cur, &tcur);
if (error)
goto error0;
i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
error = xfs_btree_decrement(tcur, level, &i);
if (error)
@@ -2473,7 +2588,7 @@ xfs_btree_lshift(
goto error0;
/* Slide the cursor value left one. */
- cur->bc_ptrs[level]--;
+ cur->bc_levels[level].ptr--;
*stat = 1;
return 0;
@@ -2512,8 +2627,7 @@ xfs_btree_rshift(
int error; /* error return value */
int i; /* loop counter */
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- (level == cur->bc_nlevels - 1))
+ if (xfs_btree_at_iroot(cur, level))
goto out0;
/* Set up variables for this block as "left". */
@@ -2535,7 +2649,7 @@ xfs_btree_rshift(
* do it... it's too complicated.
*/
lrecs = xfs_btree_get_numrecs(left);
- if (cur->bc_ptrs[level] >= lrecs)
+ if (cur->bc_levels[level].ptr >= lrecs)
goto out0;
/* Set up the right neighbor as "right". */
@@ -2620,14 +2734,18 @@ xfs_btree_rshift(
if (error)
goto error0;
i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
error = xfs_btree_increment(tcur, level, &i);
if (error)
goto error1;
/* Update the parent high keys of the left block, if needed. */
- if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+ if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) {
error = xfs_btree_update_keys(cur, level);
if (error)
goto error1;
@@ -2655,6 +2773,32 @@ error1:
return error;
}
+static inline int
+xfs_btree_alloc_block(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *hint_block,
+ union xfs_btree_ptr *new_block,
+ int *stat)
+{
+ int error;
+
+ /*
+ * Don't allow block allocation for a staging cursor, because staging
+ * cursors do not support regular btree modifications.
+ *
+ * Bulk loading uses a separate callback to obtain new blocks from a
+ * preallocated list, which prevents ENOSPC failures during loading.
+ */
+ if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ error = cur->bc_ops->alloc_block(cur, hint_block, new_block, stat);
+ trace_xfs_btree_alloc_block(cur, new_block, *stat, error);
+ return error;
+}
+
/*
* Split cur/level block in half.
* Return new block number and the key to its first
@@ -2698,7 +2842,7 @@ __xfs_btree_split(
xfs_btree_buf_to_ptr(cur, lbp, &lptr);
/* Allocate the new block. If we can't do it, we're toast. Give up. */
- error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
+ error = xfs_btree_alloc_block(cur, &lptr, &rptr, stat);
if (error)
goto error0;
if (*stat == 0)
@@ -2706,7 +2850,7 @@ __xfs_btree_split(
XFS_BTREE_STATS_INC(cur, alloc);
/* Set up the new block as "right". */
- error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
+ error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
if (error)
goto error0;
@@ -2720,7 +2864,7 @@ __xfs_btree_split(
*/
lrecs = xfs_btree_get_numrecs(left);
rrecs = lrecs / 2;
- if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
+ if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1)
rrecs++;
src_index = (lrecs - rrecs + 1);
@@ -2805,7 +2949,7 @@ __xfs_btree_split(
}
/* Update the parent high keys of the left block, if needed. */
- if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+ if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) {
error = xfs_btree_update_keys(cur, level);
if (error)
goto error0;
@@ -2816,9 +2960,9 @@ __xfs_btree_split(
* If it's just pointing past the last entry in left, then we'll
* insert there, so don't change anything in that case.
*/
- if (cur->bc_ptrs[level] > lrecs + 1) {
+ if (cur->bc_levels[level].ptr > lrecs + 1) {
xfs_btree_setbuf(cur, level, rbp);
- cur->bc_ptrs[level] -= lrecs;
+ cur->bc_levels[level].ptr -= lrecs;
}
/*
* If there are more levels, we'll need another cursor which refers
@@ -2828,7 +2972,7 @@ __xfs_btree_split(
error = xfs_btree_dup_cursor(cur, curp);
if (error)
goto error0;
- (*curp)->bc_ptrs[level + 1]++;
+ (*curp)->bc_levels[level + 1].ptr++;
}
*ptrp = rptr;
*stat = 1;
@@ -2841,6 +2985,7 @@ error0:
return error;
}
+#ifdef __KERNEL__
struct xfs_btree_split_args {
struct xfs_btree_cur *cur;
int level;
@@ -2864,7 +3009,7 @@ xfs_btree_split_worker(
struct xfs_btree_split_args *args = container_of(work,
struct xfs_btree_split_args, work);
unsigned long pflags;
- unsigned long new_pflags = PF_MEMALLOC_NOFS;
+ unsigned long new_pflags = 0;
/*
* we are in a transaction context here, but may also be doing work
@@ -2873,21 +3018,42 @@ xfs_btree_split_worker(
* in any way.
*/
if (args->kswapd)
- new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+ new_pflags |= PF_MEMALLOC | PF_KSWAPD;
current_set_flags_nested(&pflags, new_pflags);
+ xfs_trans_set_context(args->cur->bc_tp);
args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
args->key, args->curp, args->stat);
- complete(args->done);
+ xfs_trans_clear_context(args->cur->bc_tp);
current_restore_flags_nested(&pflags, new_pflags);
+
+ /*
+ * Do not access args after complete() has run here. We don't own args
+ * and the owner may run and free args before we return here.
+ */
+ complete(args->done);
+
}
/*
- * BMBT split requests often come in with little stack to work on. Push
+ * BMBT split requests often come in with little stack to work on so we push
* them off to a worker thread so there is lots of stack to use. For the other
* btree types, just call directly to avoid the context switch overhead here.
+ *
+ * Care must be taken here - the work queue rescuer thread introduces potential
+ * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new
+ * AGFs to allocate blocks. A task being run by the rescuer could attempt to
+ * lock an AGF that is already locked by a task queued to run by the rescuer,
+ * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to
+ * release it until the current thread it is running gains the lock.
+ *
+ * To avoid this issue, we only ever queue BMBT splits that don't have an AGF
+ * already locked to allocate from. The only place that doesn't hold an AGF
+ * locked is unwritten extent conversion at IO completion, but that has already
+ * been offloaded to a worker thread and hence has no stack consumption issues
+ * we have to worry about.
*/
STATIC int /* error */
xfs_btree_split(
@@ -2901,7 +3067,8 @@ xfs_btree_split(
struct xfs_btree_split_args args;
DECLARE_COMPLETION_ONSTACK(done);
- if (cur->bc_btnum != XFS_BTNUM_BMAP)
+ if (!xfs_btree_is_bmap(cur->bc_ops) ||
+ cur->bc_tp->t_highest_agno == NULLAGNUMBER)
return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
args.cur = cur;
@@ -2918,7 +3085,134 @@ xfs_btree_split(
destroy_work_on_stack(&args.work);
return args.result;
}
+#else
+#define xfs_btree_split __xfs_btree_split
+#endif /* __KERNEL__ */
+
+/* Move the records from a root leaf block to a separate block. */
+STATIC void
+xfs_btree_promote_leaf_iroot(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ struct xfs_buf *cbp,
+ union xfs_btree_ptr *cptr,
+ struct xfs_btree_block *cblock)
+{
+ union xfs_btree_rec *rp;
+ union xfs_btree_rec *crp;
+ union xfs_btree_key *kp;
+ union xfs_btree_ptr *pp;
+ struct xfs_btree_block *broot;
+ int numrecs = xfs_btree_get_numrecs(block);
+
+ /* Copy the records from the leaf broot into the new child block. */
+ rp = xfs_btree_rec_addr(cur, 1, block);
+ crp = xfs_btree_rec_addr(cur, 1, cblock);
+ xfs_btree_copy_recs(cur, crp, rp, numrecs);
+
+ /*
+ * Increment the tree height.
+ *
+ * Trickery here: The amount of memory that we need per record for the
+ * ifork's btree root block may change when we convert the broot from a
+ * leaf to a node block. Free the existing leaf broot so that nobody
+ * thinks we need to migrate node pointers when we realloc the broot
+ * buffer after bumping nlevels.
+ */
+ cur->bc_ops->broot_realloc(cur, 0);
+ cur->bc_nlevels++;
+ cur->bc_levels[1].ptr = 1;
+
+ /*
+ * Allocate a new node broot and initialize it to point to the new
+ * child block.
+ */
+ broot = cur->bc_ops->broot_realloc(cur, 1);
+ xfs_btree_init_block(cur->bc_mp, broot, cur->bc_ops,
+ cur->bc_nlevels - 1, 1, cur->bc_ino.ip->i_ino);
+
+ pp = xfs_btree_ptr_addr(cur, 1, broot);
+ kp = xfs_btree_key_addr(cur, 1, broot);
+ xfs_btree_copy_ptrs(cur, pp, cptr, 1);
+ xfs_btree_get_keys(cur, cblock, kp);
+
+ /* Attach the new block to the cursor and log it. */
+ xfs_btree_setbuf(cur, 0, cbp);
+ xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
+ xfs_btree_log_recs(cur, cbp, 1, numrecs);
+}
+
+/*
+ * Move the keys and pointers from a root block to a separate block.
+ *
+ * Since the keyptr size does not change, all we have to do is increase the
+ * tree height, copy the keyptrs to the new internal node (cblock), shrink
+ * the root, and copy the pointers there.
+ */
+STATIC int
+xfs_btree_promote_node_iroot(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ int level,
+ struct xfs_buf *cbp,
+ union xfs_btree_ptr *cptr,
+ struct xfs_btree_block *cblock)
+{
+ union xfs_btree_key *ckp;
+ union xfs_btree_key *kp;
+ union xfs_btree_ptr *cpp;
+ union xfs_btree_ptr *pp;
+ int i;
+ int error;
+ int numrecs = xfs_btree_get_numrecs(block);
+
+ /*
+ * Increase tree height, adjusting the root block level to match.
+ * We cannot change the root btree node size until we've copied the
+ * block contents to the new child block.
+ */
+ be16_add_cpu(&block->bb_level, 1);
+ cur->bc_nlevels++;
+ cur->bc_levels[level + 1].ptr = 1;
+
+ /*
+ * Adjust the root btree record count, then copy the keys from the old
+ * root to the new child block.
+ */
+ xfs_btree_set_numrecs(block, 1);
+ kp = xfs_btree_key_addr(cur, 1, block);
+ ckp = xfs_btree_key_addr(cur, 1, cblock);
+ xfs_btree_copy_keys(cur, ckp, kp, numrecs);
+
+ /* Check the pointers and copy them to the new child block. */
+ pp = xfs_btree_ptr_addr(cur, 1, block);
+ cpp = xfs_btree_ptr_addr(cur, 1, cblock);
+ for (i = 0; i < numrecs; i++) {
+ error = xfs_btree_debug_check_ptr(cur, pp, i, level);
+ if (error)
+ return error;
+ }
+ xfs_btree_copy_ptrs(cur, cpp, pp, numrecs);
+
+ /*
+ * Set the first keyptr to point to the new child block, then shrink
+ * the memory buffer for the root block.
+ */
+ error = xfs_btree_debug_check_ptr(cur, cptr, 0, level);
+ if (error)
+ return error;
+ xfs_btree_copy_ptrs(cur, pp, cptr, 1);
+ xfs_btree_get_keys(cur, cblock, kp);
+
+ cur->bc_ops->broot_realloc(cur, 1);
+ /* Attach the new block to the cursor and log it. */
+ xfs_btree_setbuf(cur, level, cbp);
+ xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
+ xfs_btree_log_keys(cur, cbp, 1, numrecs);
+ xfs_btree_log_ptrs(cur, cbp, 1, numrecs);
+ return 0;
+}
/*
* Copy the old inode root contents into a real block and make the
@@ -2933,26 +3227,27 @@ xfs_btree_new_iroot(
struct xfs_buf *cbp; /* buffer for cblock */
struct xfs_btree_block *block; /* btree block */
struct xfs_btree_block *cblock; /* child btree block */
- union xfs_btree_key *ckp; /* child key pointer */
- union xfs_btree_ptr *cpp; /* child ptr pointer */
- union xfs_btree_key *kp; /* pointer to btree key */
- union xfs_btree_ptr *pp; /* pointer to block addr */
+ union xfs_btree_ptr aptr;
union xfs_btree_ptr nptr; /* new block addr */
int level; /* btree level */
int error; /* error return code */
- int i; /* loop counter */
XFS_BTREE_STATS_INC(cur, newroot);
- ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE);
level = cur->bc_nlevels - 1;
block = xfs_btree_get_iroot(cur);
- pp = xfs_btree_ptr_addr(cur, 1, block);
+ ASSERT(level > 0 || (cur->bc_ops->geom_flags & XFS_BTGEO_IROOT_RECORDS));
+ if (level > 0)
+ aptr = *xfs_btree_ptr_addr(cur, 1, block);
+ else
+ aptr.l = cpu_to_be64(XFS_INO_TO_FSB(cur->bc_mp,
+ cur->bc_ino.ip->i_ino));
/* Allocate the new block. If we can't do it, we're toast. Give up. */
- error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
+ error = xfs_btree_alloc_block(cur, &aptr, &nptr, stat);
if (error)
goto error0;
if (*stat == 0)
@@ -2961,7 +3256,7 @@ xfs_btree_new_iroot(
XFS_BTREE_STATS_INC(cur, alloc);
/* Copy the root into a real block. */
- error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
+ error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
if (error)
goto error0;
@@ -2970,59 +3265,45 @@ xfs_btree_new_iroot(
* In that case have to also ensure the blkno remains correct
*/
memcpy(cblock, block, xfs_btree_block_len(cur));
- if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
+ if (xfs_has_crc(cur->bc_mp)) {
+ __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp));
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
+ cblock->bb_u.l.bb_blkno = bno;
else
- cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
+ cblock->bb_u.s.bb_blkno = bno;
}
- be16_add_cpu(&block->bb_level, 1);
- xfs_btree_set_numrecs(block, 1);
- cur->bc_nlevels++;
- cur->bc_ptrs[level + 1] = 1;
-
- kp = xfs_btree_key_addr(cur, 1, block);
- ckp = xfs_btree_key_addr(cur, 1, cblock);
- xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
-
- cpp = xfs_btree_ptr_addr(cur, 1, cblock);
- for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
- error = xfs_btree_debug_check_ptr(cur, pp, i, level);
+ if (level > 0) {
+ error = xfs_btree_promote_node_iroot(cur, block, level, cbp,
+ &nptr, cblock);
if (error)
goto error0;
+ } else {
+ xfs_btree_promote_leaf_iroot(cur, block, cbp, &nptr, cblock);
}
- xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
-
- error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level);
- if (error)
- goto error0;
-
- xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
-
- xfs_iroot_realloc(cur->bc_private.b.ip,
- 1 - xfs_btree_get_numrecs(cblock),
- cur->bc_private.b.whichfork);
-
- xfs_btree_setbuf(cur, level, cbp);
-
- /*
- * Do all this logging at the end so that
- * the root is at the right level.
- */
- xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
- xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
- xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
-
- *logflags |=
- XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
+ *logflags |= XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork);
*stat = 1;
return 0;
error0:
return error;
}
+static void
+xfs_btree_set_root(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *ptr,
+ int inc)
+{
+ if (cur->bc_flags & XFS_BTREE_STAGING) {
+ /* Update the btree root information for a per-AG fake root. */
+ cur->bc_ag.afake->af_root = be32_to_cpu(ptr->s);
+ cur->bc_ag.afake->af_levels += inc;
+ } else {
+ cur->bc_ops->set_root(cur, ptr, inc);
+ }
+}
+
/*
* Allocate a new root block, fill it in.
*/
@@ -3047,10 +3328,10 @@ xfs_btree_new_root(
XFS_BTREE_STATS_INC(cur, newroot);
/* initialise our start point from the cursor */
- cur->bc_ops->init_ptr_from_cur(cur, &rptr);
+ xfs_btree_init_ptr_from_cur(cur, &rptr);
/* Allocate the new block. If we can't do it, we're toast. Give up. */
- error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
+ error = xfs_btree_alloc_block(cur, &rptr, &lptr, stat);
if (error)
goto error0;
if (*stat == 0)
@@ -3058,12 +3339,12 @@ xfs_btree_new_root(
XFS_BTREE_STATS_INC(cur, alloc);
/* Set up the new block. */
- error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
+ error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
if (error)
goto error0;
/* Set the root in the holding structure increasing the level by 1. */
- cur->bc_ops->set_root(cur, &lptr, 1);
+ xfs_btree_set_root(cur, &lptr, 1);
/*
* At the previous root level there are now two blocks: the old root,
@@ -3141,8 +3422,9 @@ xfs_btree_new_root(
/* Fix up the cursor. */
xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
- cur->bc_ptrs[cur->bc_nlevels] = nptr;
+ cur->bc_levels[cur->bc_nlevels].ptr = nptr;
cur->bc_nlevels++;
+ ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
*stat = 1;
return 0;
error0:
@@ -3166,13 +3448,12 @@ xfs_btree_make_block_unfull(
{
int error = 0;
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- level == cur->bc_nlevels - 1) {
- struct xfs_inode *ip = cur->bc_private.b.ip;
+ if (xfs_btree_at_iroot(cur, level)) {
+ struct xfs_inode *ip = cur->bc_ino.ip;
if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
/* A root block that can be made bigger. */
- xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
+ cur->bc_ops->broot_realloc(cur, numrecs + 1);
*stat = 1;
} else {
/* A root block that needs replacing */
@@ -3199,7 +3480,7 @@ xfs_btree_make_block_unfull(
return error;
if (*stat) {
- *oindex = *index = cur->bc_ptrs[level];
+ *oindex = *index = cur->bc_levels[level].ptr;
return 0;
}
@@ -3214,7 +3495,7 @@ xfs_btree_make_block_unfull(
return error;
- *index = cur->bc_ptrs[level];
+ *index = cur->bc_levels[level].ptr;
return 0;
}
@@ -3235,7 +3516,7 @@ xfs_btree_insrec(
struct xfs_btree_block *block; /* btree block */
struct xfs_buf *bp; /* buffer for block */
union xfs_btree_ptr nptr; /* new block ptr */
- struct xfs_btree_cur *ncur; /* new btree cursor */
+ struct xfs_btree_cur *ncur = NULL; /* new btree cursor */
union xfs_btree_key nkey; /* new block key */
union xfs_btree_key *lkey;
int optr; /* old key/record index */
@@ -3252,8 +3533,8 @@ xfs_btree_insrec(
* If we have an external root pointer, and we've made it to the
* root level, allocate a new root block and we're done.
*/
- if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- (level >= cur->bc_nlevels)) {
+ if (cur->bc_ops->type != XFS_BTREE_TYPE_INODE &&
+ level >= cur->bc_nlevels) {
error = xfs_btree_new_root(cur, stat);
xfs_btree_set_ptr_null(cur, ptrp);
@@ -3261,7 +3542,7 @@ xfs_btree_insrec(
}
/* If we're off the left edge, return failure. */
- ptr = cur->bc_ptrs[level];
+ ptr = cur->bc_levels[level].ptr;
if (ptr == 0) {
*stat = 0;
return 0;
@@ -3273,7 +3554,7 @@ xfs_btree_insrec(
/* Get pointers to the btree buffer and block. */
block = xfs_btree_get_block(cur, level, &bp);
- old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL;
+ old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL;
numrecs = xfs_btree_get_numrecs(block);
#ifdef DEBUG
@@ -3315,7 +3596,7 @@ xfs_btree_insrec(
#ifdef DEBUG
error = xfs_btree_check_block(cur, block, level, bp);
if (error)
- return error;
+ goto error0;
#endif
/*
@@ -3335,7 +3616,7 @@ xfs_btree_insrec(
for (i = numrecs - ptr; i >= 0; i--) {
error = xfs_btree_debug_check_ptr(cur, pp, i, level);
if (error)
- return error;
+ goto error0;
}
xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
@@ -3382,14 +3663,31 @@ xfs_btree_insrec(
xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
/*
- * If we just inserted into a new tree block, we have to
- * recalculate nkey here because nkey is out of date.
+ * Update btree keys to reflect the newly added record or keyptr.
+ * There are three cases here to be aware of. Normally, all we have to
+ * do is walk towards the root, updating keys as necessary.
+ *
+ * If the caller had us target a full block for the insertion, we dealt
+ * with that by calling the _make_block_unfull function. If the
+ * "make unfull" function splits the block, it'll hand us back the key
+ * and pointer of the new block. We haven't yet added the new block to
+ * the next level up, so if we decide to add the new record to the new
+ * block (bp->b_bn != old_bn), we have to update the caller's pointer
+ * so that the caller adds the new block with the correct key.
*
- * Otherwise we're just updating an existing block (having shoved
- * some records into the new tree block), so use the regular key
- * update mechanism.
+ * However, there is a third possibility-- if the selected block is the
+ * root block of an inode-rooted btree and cannot be expanded further,
+ * the "make unfull" function moves the root block contents to a new
+ * block and updates the root block to point to the new block. In this
+ * case, no block pointer is passed back because the block has already
+ * been added to the btree. In this case, we need to use the regular
+ * key update function, just like the first case. This is critical for
+ * overlapping btrees, because the high key must be updated to reflect
+ * the entire tree, not just the subtree accessible through the first
+ * child of the root (which is now two levels down from the root).
*/
- if (bp && bp->b_bn != old_bn) {
+ if (!xfs_btree_ptr_is_null(cur, &nptr) &&
+ bp && xfs_buf_daddr(bp) != old_bn) {
xfs_btree_get_keys(cur, block, lkey);
} else if (xfs_btree_needs_key_update(cur, optr)) {
error = xfs_btree_update_keys(cur, level);
@@ -3398,15 +3696,6 @@ xfs_btree_insrec(
}
/*
- * If we are tracking the last record in the tree and
- * we are at the far right edge of the tree, update it.
- */
- if (xfs_btree_is_lastrec(cur, block, level)) {
- cur->bc_ops->update_lastrec(cur, block, rec,
- ptr, LASTREC_INSREC);
- }
-
- /*
* Return the new block number, if any.
* If there is one, give back a record value and a cursor too.
*/
@@ -3420,6 +3709,8 @@ xfs_btree_insrec(
return 0;
error0:
+ if (ncur)
+ xfs_btree_del_cursor(ncur, error);
return error;
}
@@ -3474,7 +3765,11 @@ xfs_btree_insert(
goto error0;
}
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
level++;
/*
@@ -3485,7 +3780,8 @@ xfs_btree_insert(
if (pcur != cur &&
(ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
/* Save the state from the cursor before we trash it */
- if (cur->bc_ops->update_cursor)
+ if (cur->bc_ops->update_cursor &&
+ !(cur->bc_flags & XFS_BTREE_STAGING))
cur->bc_ops->update_cursor(pcur, cur);
cur->bc_nlevels = pcur->bc_nlevels;
xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
@@ -3503,6 +3799,97 @@ error0:
return error;
}
+/* Move the records from a child leaf block to the root block. */
+STATIC void
+xfs_btree_demote_leaf_child(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *cblock,
+ int numrecs)
+{
+ union xfs_btree_rec *rp;
+ union xfs_btree_rec *crp;
+ struct xfs_btree_block *broot;
+
+ /*
+ * Decrease the tree height.
+ *
+ * Trickery here: The amount of memory that we need per record for the
+ * ifork's btree root block may change when we convert the broot from a
+ * node to a leaf. Free the old node broot so that we can get a fresh
+ * leaf broot.
+ */
+ cur->bc_ops->broot_realloc(cur, 0);
+ cur->bc_nlevels--;
+
+ /*
+ * Allocate a new leaf broot and copy the records from the old child.
+ * Detach the old child from the cursor.
+ */
+ broot = cur->bc_ops->broot_realloc(cur, numrecs);
+ xfs_btree_init_block(cur->bc_mp, broot, cur->bc_ops, 0, numrecs,
+ cur->bc_ino.ip->i_ino);
+
+ rp = xfs_btree_rec_addr(cur, 1, broot);
+ crp = xfs_btree_rec_addr(cur, 1, cblock);
+ xfs_btree_copy_recs(cur, rp, crp, numrecs);
+
+ cur->bc_levels[0].bp = NULL;
+}
+
+/*
+ * Move the keyptrs from a child node block to the root block.
+ *
+ * Since the keyptr size does not change, all we have to do is increase the
+ * tree height, copy the keyptrs to the new internal node (cblock), shrink
+ * the root, and copy the pointers there.
+ */
+STATIC int
+xfs_btree_demote_node_child(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *cblock,
+ int level,
+ int numrecs)
+{
+ struct xfs_btree_block *block;
+ union xfs_btree_key *ckp;
+ union xfs_btree_key *kp;
+ union xfs_btree_ptr *cpp;
+ union xfs_btree_ptr *pp;
+ int i;
+ int error;
+
+ /*
+ * Adjust the root btree node size and the record count to match the
+ * doomed child so that we can copy the keyptrs ahead of changing the
+ * tree shape.
+ */
+ block = cur->bc_ops->broot_realloc(cur, numrecs);
+
+ xfs_btree_set_numrecs(block, numrecs);
+ ASSERT(block->bb_numrecs == cblock->bb_numrecs);
+
+ /* Copy keys from the doomed block. */
+ kp = xfs_btree_key_addr(cur, 1, block);
+ ckp = xfs_btree_key_addr(cur, 1, cblock);
+ xfs_btree_copy_keys(cur, kp, ckp, numrecs);
+
+ /* Copy pointers from the doomed block. */
+ pp = xfs_btree_ptr_addr(cur, 1, block);
+ cpp = xfs_btree_ptr_addr(cur, 1, cblock);
+ for (i = 0; i < numrecs; i++) {
+ error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1);
+ if (error)
+ return error;
+ }
+ xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
+
+ /* Decrease tree height, adjusting the root block level to match. */
+ cur->bc_levels[level - 1].bp = NULL;
+ be16_add_cpu(&block->bb_level, -1);
+ cur->bc_nlevels--;
+ return 0;
+}
+
/*
* Try to merge a non-leaf block back into the inode root.
*
@@ -3515,34 +3902,31 @@ STATIC int
xfs_btree_kill_iroot(
struct xfs_btree_cur *cur)
{
- int whichfork = cur->bc_private.b.whichfork;
- struct xfs_inode *ip = cur->bc_private.b.ip;
- struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+ struct xfs_inode *ip = cur->bc_ino.ip;
struct xfs_btree_block *block;
struct xfs_btree_block *cblock;
- union xfs_btree_key *kp;
- union xfs_btree_key *ckp;
- union xfs_btree_ptr *pp;
- union xfs_btree_ptr *cpp;
struct xfs_buf *cbp;
int level;
- int index;
int numrecs;
int error;
#ifdef DEBUG
union xfs_btree_ptr ptr;
#endif
- int i;
- ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
- ASSERT(cur->bc_nlevels > 1);
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE);
+ ASSERT((cur->bc_ops->geom_flags & XFS_BTGEO_IROOT_RECORDS) ||
+ cur->bc_nlevels > 1);
/*
* Don't deal with the root block needs to be a leaf case.
* We're just going to turn the thing back into extents anyway.
*/
level = cur->bc_nlevels - 1;
- if (level == 1)
+ if (level == 1 && !(cur->bc_ops->geom_flags & XFS_BTGEO_IROOT_RECORDS))
+ goto out0;
+
+ /* If we're already a leaf, jump out. */
+ if (level == 0)
goto out0;
/*
@@ -3572,40 +3956,20 @@ xfs_btree_kill_iroot(
ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
#endif
- index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
- if (index) {
- xfs_iroot_realloc(cur->bc_private.b.ip, index,
- cur->bc_private.b.whichfork);
- block = ifp->if_broot;
- }
-
- be16_add_cpu(&block->bb_numrecs, index);
- ASSERT(block->bb_numrecs == cblock->bb_numrecs);
-
- kp = xfs_btree_key_addr(cur, 1, block);
- ckp = xfs_btree_key_addr(cur, 1, cblock);
- xfs_btree_copy_keys(cur, kp, ckp, numrecs);
-
- pp = xfs_btree_ptr_addr(cur, 1, block);
- cpp = xfs_btree_ptr_addr(cur, 1, cblock);
-
- for (i = 0; i < numrecs; i++) {
- error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1);
+ if (level > 1) {
+ error = xfs_btree_demote_node_child(cur, cblock, level,
+ numrecs);
if (error)
return error;
- }
-
- xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
+ } else
+ xfs_btree_demote_leaf_child(cur, cblock, numrecs);
error = xfs_btree_free_block(cur, cbp);
if (error)
return error;
- cur->bc_bufs[level - 1] = NULL;
- be16_add_cpu(&block->bb_level, -1);
xfs_trans_log_inode(cur->bc_tp, ip,
- XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
- cur->bc_nlevels--;
+ XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork));
out0:
return 0;
}
@@ -3628,14 +3992,14 @@ xfs_btree_kill_root(
* Update the root pointer, decreasing the level by 1 and then
* free the old root.
*/
- cur->bc_ops->set_root(cur, newroot, -1);
+ xfs_btree_set_root(cur, newroot, -1);
error = xfs_btree_free_block(cur, bp);
if (error)
return error;
- cur->bc_bufs[level] = NULL;
- cur->bc_ra[level] = 0;
+ cur->bc_levels[level].bp = NULL;
+ cur->bc_levels[level].ra = 0;
cur->bc_nlevels--;
return 0;
@@ -3694,7 +4058,7 @@ xfs_btree_delrec(
tcur = NULL;
/* Get the index of the entry being deleted, check for nothing there. */
- ptr = cur->bc_ptrs[level];
+ ptr = cur->bc_levels[level].ptr;
if (ptr == 0) {
*stat = 0;
return 0;
@@ -3757,40 +4121,29 @@ xfs_btree_delrec(
xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
/*
- * If we are tracking the last record in the tree and
- * we are at the far right edge of the tree, update it.
- */
- if (xfs_btree_is_lastrec(cur, block, level)) {
- cur->bc_ops->update_lastrec(cur, block, NULL,
- ptr, LASTREC_DELREC);
- }
-
- /*
* We're at the root level. First, shrink the root block in-memory.
* Try to get rid of the next level down. If we can't then there's
- * nothing left to do.
+ * nothing left to do. numrecs was decremented above.
*/
- if (level == cur->bc_nlevels - 1) {
- if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
- xfs_iroot_realloc(cur->bc_private.b.ip, -1,
- cur->bc_private.b.whichfork);
+ if (xfs_btree_at_iroot(cur, level)) {
+ cur->bc_ops->broot_realloc(cur, numrecs);
- error = xfs_btree_kill_iroot(cur);
- if (error)
- goto error0;
+ error = xfs_btree_kill_iroot(cur);
+ if (error)
+ goto error0;
- error = xfs_btree_dec_cursor(cur, level, stat);
- if (error)
- goto error0;
- *stat = 1;
- return 0;
- }
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ *stat = 1;
+ return 0;
+ }
- /*
- * If this is the root level, and there's only one entry left,
- * and it's NOT the leaf level, then we can get rid of this
- * level.
- */
+ /*
+ * If this is the root level, and there's only one entry left, and it's
+ * NOT the leaf level, then we can get rid of this level.
+ */
+ if (level == cur->bc_nlevels - 1) {
if (numrecs == 1 && level > 0) {
union xfs_btree_ptr *pp;
/*
@@ -3839,7 +4192,7 @@ xfs_btree_delrec(
xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
- if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) {
/*
* One child of root, need to get a chance to copy its contents
* into the root and delete it. Can't go up to next level,
@@ -3878,15 +4231,27 @@ xfs_btree_delrec(
* Actually any entry but the first would suffice.
*/
i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
error = xfs_btree_increment(tcur, level, &i);
if (error)
goto error0;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
/* Grab a pointer to the block. */
right = xfs_btree_get_block(tcur, level, &rbp);
@@ -3930,12 +4295,20 @@ xfs_btree_delrec(
rrecs = xfs_btree_get_numrecs(right);
if (!xfs_btree_ptr_is_null(cur, &lptr)) {
i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
error = xfs_btree_decrement(tcur, level, &i);
if (error)
goto error0;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
}
}
@@ -3949,13 +4322,21 @@ xfs_btree_delrec(
* previous block.
*/
i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
error = xfs_btree_decrement(tcur, level, &i);
if (error)
goto error0;
i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
/* Grab a pointer to the block. */
left = xfs_btree_get_block(tcur, level, &lbp);
@@ -3983,7 +4364,7 @@ xfs_btree_delrec(
xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
tcur = NULL;
if (level == 0)
- cur->bc_ptrs[0]++;
+ cur->bc_levels[0].ptr++;
*stat = 1;
return 0;
@@ -4096,7 +4477,7 @@ xfs_btree_delrec(
* surviving block, and log it.
*/
xfs_btree_set_numrecs(left, lrecs + rrecs);
- xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
+ xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB);
xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
@@ -4120,16 +4501,16 @@ xfs_btree_delrec(
* cursor to the left block, and fix up the index.
*/
if (bp != lbp) {
- cur->bc_bufs[level] = lbp;
- cur->bc_ptrs[level] += lrecs;
- cur->bc_ra[level] = 0;
+ cur->bc_levels[level].bp = lbp;
+ cur->bc_levels[level].ptr += lrecs;
+ cur->bc_levels[level].ra = 0;
}
/*
* If we joined with the right neighbor and there's a level above
* us, increment the cursor at that level.
*/
- else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
- (level + 1 < cur->bc_nlevels)) {
+ else if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE ||
+ level + 1 < cur->bc_nlevels) {
error = xfs_btree_increment(cur, level + 1, &i);
if (error)
goto error0;
@@ -4142,16 +4523,16 @@ xfs_btree_delrec(
* We can't use decrement because it would change the next level up.
*/
if (level > 0)
- cur->bc_ptrs[level]--;
+ cur->bc_levels[level].ptr--;
/*
* We combined blocks, so we have to update the parent keys if the
- * btree supports overlapped intervals. However, bc_ptrs[level + 1]
- * points to the old block so that the caller knows which record to
- * delete. Therefore, the caller must be savvy enough to call updkeys
- * for us if we return stat == 2. The other exit points from this
- * function don't require deletions further up the tree, so they can
- * call updkeys directly.
+ * btree supports overlapped intervals. However,
+ * bc_levels[level + 1].ptr points to the old block so that the caller
+ * knows which record to delete. Therefore, the caller must be savvy
+ * enough to call updkeys for us if we return stat == 2. The other
+ * exit points from this function don't require deletions further up
+ * the tree, so they can call updkeys directly.
*/
/* Return value means the next level up has something to do. */
@@ -4197,7 +4578,7 @@ xfs_btree_delete(
* If we combined blocks as part of deleting the record, delrec won't
* have updated the parent high keys so we have to do that here.
*/
- if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
+ if (joined && (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) {
error = xfs_btree_updkeys_force(cur, 0);
if (error)
goto error0;
@@ -4205,7 +4586,7 @@ xfs_btree_delete(
if (i == 0) {
for (level = 1; level < cur->bc_nlevels; level++) {
- if (cur->bc_ptrs[level] == 0) {
+ if (cur->bc_levels[level].ptr == 0) {
error = xfs_btree_decrement(cur, level, &i);
if (error)
goto error0;
@@ -4236,7 +4617,7 @@ xfs_btree_get_rec(
int error; /* error return value */
#endif
- ptr = cur->bc_ptrs[0];
+ ptr = cur->bc_levels[0].ptr;
block = xfs_btree_get_block(cur, 0, &bp);
#ifdef DEBUG
@@ -4271,7 +4652,7 @@ xfs_btree_visit_block(
{
struct xfs_btree_block *block;
struct xfs_buf *bp;
- union xfs_btree_ptr rptr;
+ union xfs_btree_ptr rptr, bufptr;
int error;
/* do right sibling readahead */
@@ -4288,6 +4669,18 @@ xfs_btree_visit_block(
if (xfs_btree_ptr_is_null(cur, &rptr))
return -ENOENT;
+ /*
+ * We only visit blocks once in this walk, so we have to avoid the
+ * internal xfs_btree_lookup_get_block() optimisation where it will
+ * return the same block without checking if the right sibling points
+ * back to us and creates a cyclic reference in the btree.
+ */
+ xfs_btree_buf_to_ptr(cur, bp, &bufptr);
+ if (xfs_btree_ptrs_equal(cur, &rptr, &bufptr)) {
+ xfs_btree_mark_sick(cur);
+ return -EFSCORRUPTED;
+ }
+
return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
}
@@ -4297,6 +4690,7 @@ int
xfs_btree_visit_blocks(
struct xfs_btree_cur *cur,
xfs_btree_visit_blocks_fn fn,
+ unsigned int flags,
void *data)
{
union xfs_btree_ptr lptr;
@@ -4304,7 +4698,7 @@ xfs_btree_visit_blocks(
struct xfs_btree_block *block = NULL;
int error = 0;
- cur->bc_ops->init_ptr_from_cur(cur, &lptr);
+ xfs_btree_init_ptr_from_cur(cur, &lptr);
/* for each level */
for (level = cur->bc_nlevels - 1; level >= 0; level--) {
@@ -4322,6 +4716,11 @@ xfs_btree_visit_blocks(
/* save for the next iteration of the loop */
xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
+
+ if (!(flags & XFS_BTREE_VISIT_LEAVES))
+ continue;
+ } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) {
+ continue;
}
/* for each buffer in the level */
@@ -4377,7 +4776,7 @@ xfs_btree_block_change_owner(
/* modify the owner */
block = xfs_btree_get_block(cur, level, &bp);
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
return 0;
block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
@@ -4395,7 +4794,7 @@ xfs_btree_block_change_owner(
* though, so everything is consistent in memory.
*/
if (!bp) {
- ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE);
ASSERT(level == cur->bc_nlevels - 1);
return 0;
}
@@ -4424,23 +4823,23 @@ xfs_btree_change_owner(
bbcoi.buffer_list = buffer_list;
return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
- &bbcoi);
+ XFS_BTREE_VISIT_ALL, &bbcoi);
}
/* Verify the v5 fields of a long-format btree block. */
xfs_failaddr_t
-xfs_btree_lblock_v5hdr_verify(
+xfs_btree_fsblock_v5hdr_verify(
struct xfs_buf *bp,
uint64_t owner)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- if (!xfs_sb_version_hascrc(&mp->m_sb))
+ if (!xfs_has_crc(mp))
return __this_address;
if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
return __this_address;
- if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn))
+ if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
return __this_address;
if (owner != XFS_RMAP_OWN_UNKNOWN &&
be64_to_cpu(block->bb_u.l.bb_owner) != owner)
@@ -4450,102 +4849,194 @@ xfs_btree_lblock_v5hdr_verify(
/* Verify a long-format btree block. */
xfs_failaddr_t
-xfs_btree_lblock_verify(
+xfs_btree_fsblock_verify(
struct xfs_buf *bp,
unsigned int max_recs)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ xfs_fsblock_t fsb;
+ xfs_failaddr_t fa;
+
+ ASSERT(!xfs_buftarg_is_mem(bp->b_target));
/* numrecs verification */
if (be16_to_cpu(block->bb_numrecs) > max_recs)
return __this_address;
/* sibling pointer verification */
- if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
- !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))
- return __this_address;
- if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
- !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))
+ fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
+ fa = xfs_btree_check_fsblock_siblings(mp, fsb,
+ block->bb_u.l.bb_leftsib);
+ if (!fa)
+ fa = xfs_btree_check_fsblock_siblings(mp, fsb,
+ block->bb_u.l.bb_rightsib);
+ return fa;
+}
+
+/* Verify an in-memory btree block. */
+xfs_failaddr_t
+xfs_btree_memblock_verify(
+ struct xfs_buf *bp,
+ unsigned int max_recs)
+{
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_buftarg *btp = bp->b_target;
+ xfs_failaddr_t fa;
+ xfbno_t bno;
+
+ ASSERT(xfs_buftarg_is_mem(bp->b_target));
+
+ /* numrecs verification */
+ if (be16_to_cpu(block->bb_numrecs) > max_recs)
return __this_address;
+ /* sibling pointer verification */
+ bno = xfs_daddr_to_xfbno(xfs_buf_daddr(bp));
+ fa = xfs_btree_check_memblock_siblings(btp, bno,
+ block->bb_u.l.bb_leftsib);
+ if (fa)
+ return fa;
+ fa = xfs_btree_check_memblock_siblings(btp, bno,
+ block->bb_u.l.bb_rightsib);
+ if (fa)
+ return fa;
+
return NULL;
}
-
/**
- * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
+ * xfs_btree_agblock_v5hdr_verify() -- verify the v5 fields of a short-format
* btree block
*
* @bp: buffer containing the btree block
- * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
- * @pag_max_level: pointer to the per-ag max level field
*/
xfs_failaddr_t
-xfs_btree_sblock_v5hdr_verify(
+xfs_btree_agblock_v5hdr_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
- if (!xfs_sb_version_hascrc(&mp->m_sb))
+ if (!xfs_has_crc(mp))
return __this_address;
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
return __this_address;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
return __this_address;
- if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag_agno(pag))
return __this_address;
return NULL;
}
/**
- * xfs_btree_sblock_verify() -- verify a short-format btree block
+ * xfs_btree_agblock_verify() -- verify a short-format btree block
*
* @bp: buffer containing the btree block
* @max_recs: maximum records allowed in this btree node
*/
xfs_failaddr_t
-xfs_btree_sblock_verify(
+xfs_btree_agblock_verify(
struct xfs_buf *bp,
unsigned int max_recs)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- xfs_agblock_t agno;
+ xfs_agblock_t agbno;
+ xfs_failaddr_t fa;
+
+ ASSERT(!xfs_buftarg_is_mem(bp->b_target));
/* numrecs verification */
if (be16_to_cpu(block->bb_numrecs) > max_recs)
return __this_address;
/* sibling pointer verification */
- agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
- if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
- !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib)))
- return __this_address;
- if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
- !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib)))
- return __this_address;
-
- return NULL;
+ agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
+ fa = xfs_btree_check_agblock_siblings(bp->b_pag, agbno,
+ block->bb_u.s.bb_leftsib);
+ if (!fa)
+ fa = xfs_btree_check_agblock_siblings(bp->b_pag, agbno,
+ block->bb_u.s.bb_rightsib);
+ return fa;
}
/*
- * Calculate the number of btree levels needed to store a given number of
- * records in a short-format btree.
+ * For the given limits on leaf and keyptr records per block, calculate the
+ * height of the tree needed to index the number of leaf records.
*/
-uint
+unsigned int
xfs_btree_compute_maxlevels(
- uint *limits,
- unsigned long len)
+ const unsigned int *limits,
+ unsigned long long records)
+{
+ unsigned long long level_blocks = howmany_64(records, limits[0]);
+ unsigned int height = 1;
+
+ while (level_blocks > 1) {
+ level_blocks = howmany_64(level_blocks, limits[1]);
+ height++;
+ }
+
+ return height;
+}
+
+/*
+ * For the given limits on leaf and keyptr records per block, calculate the
+ * number of blocks needed to index the given number of leaf records.
+ */
+unsigned long long
+xfs_btree_calc_size(
+ const unsigned int *limits,
+ unsigned long long records)
+{
+ unsigned long long level_blocks = howmany_64(records, limits[0]);
+ unsigned long long blocks = level_blocks;
+
+ while (level_blocks > 1) {
+ level_blocks = howmany_64(level_blocks, limits[1]);
+ blocks += level_blocks;
+ }
+
+ return blocks;
+}
+
+/*
+ * Given a number of available blocks for the btree to consume with records and
+ * pointers, calculate the height of the tree needed to index all the records
+ * that space can hold based on the number of pointers each interior node
+ * holds.
+ *
+ * We start by assuming a single level tree consumes a single block, then track
+ * the number of blocks each node level consumes until we no longer have space
+ * to store the next node level. At this point, we are indexing all the leaf
+ * blocks in the space, and there's no more free space to split the tree any
+ * further. That's our maximum btree height.
+ */
+unsigned int
+xfs_btree_space_to_height(
+ const unsigned int *limits,
+ unsigned long long leaf_blocks)
{
- uint level;
- unsigned long maxblocks;
+ /*
+ * The root btree block can have fewer than minrecs pointers in it
+ * because the tree might not be big enough to require that amount of
+ * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
+ */
+ unsigned long long node_blocks = 2;
+ unsigned long long blocks_left = leaf_blocks - 1;
+ unsigned int height = 1;
+
+ if (leaf_blocks < 1)
+ return 0;
- maxblocks = (len + limits[0] - 1) / limits[0];
- for (level = 1; maxblocks > 1; level++)
- maxblocks = (maxblocks + limits[1] - 1) / limits[1];
- return level;
+ while (node_blocks < blocks_left) {
+ blocks_left -= node_blocks;
+ node_blocks *= limits[1];
+ height++;
+ }
+
+ return height;
}
/*
@@ -4556,20 +5047,19 @@ xfs_btree_compute_maxlevels(
STATIC int
xfs_btree_simple_query_range(
struct xfs_btree_cur *cur,
- union xfs_btree_key *low_key,
- union xfs_btree_key *high_key,
+ const union xfs_btree_key *low_key,
+ const union xfs_btree_key *high_key,
xfs_btree_query_range_fn fn,
void *priv)
{
union xfs_btree_rec *recp;
union xfs_btree_key rec_key;
- int64_t diff;
int stat;
bool firstrec = true;
int error;
ASSERT(cur->bc_ops->init_high_key_from_rec);
- ASSERT(cur->bc_ops->diff_two_keys);
+ ASSERT(cur->bc_ops->cmp_two_keys);
/*
* Find the leftmost record. The btree cursor must be set
@@ -4593,25 +5083,22 @@ xfs_btree_simple_query_range(
if (error || !stat)
break;
- /* Skip if high_key(rec) < low_key. */
+ /* Skip if low_key > high_key(rec). */
if (firstrec) {
cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
firstrec = false;
- diff = cur->bc_ops->diff_two_keys(cur, low_key,
- &rec_key);
- if (diff > 0)
+ if (xfs_btree_keycmp_gt(cur, low_key, &rec_key))
goto advloop;
}
- /* Stop if high_key < low_key(rec). */
+ /* Stop if low_key(rec) > high_key. */
cur->bc_ops->init_key_from_rec(&rec_key, recp);
- diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
- if (diff > 0)
+ if (xfs_btree_keycmp_gt(cur, &rec_key, high_key))
break;
/* Callback */
error = fn(cur, recp, priv);
- if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT)
+ if (error)
break;
advloop:
@@ -4647,8 +5134,8 @@ out:
STATIC int
xfs_btree_overlapped_query_range(
struct xfs_btree_cur *cur,
- union xfs_btree_key *low_key,
- union xfs_btree_key *high_key,
+ const union xfs_btree_key *low_key,
+ const union xfs_btree_key *high_key,
xfs_btree_query_range_fn fn,
void *priv)
{
@@ -4660,8 +5147,6 @@ xfs_btree_overlapped_query_range(
union xfs_btree_key *hkp;
union xfs_btree_rec *recp;
struct xfs_btree_block *block;
- int64_t ldiff;
- int64_t hdiff;
int level;
struct xfs_buf *bp;
int i;
@@ -4669,7 +5154,7 @@ xfs_btree_overlapped_query_range(
/* Load the root of the btree. */
level = cur->bc_nlevels - 1;
- cur->bc_ops->init_ptr_from_cur(cur, &ptr);
+ xfs_btree_init_ptr_from_cur(cur, &ptr);
error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
if (error)
return error;
@@ -4680,64 +5165,67 @@ xfs_btree_overlapped_query_range(
if (error)
goto out;
#endif
- cur->bc_ptrs[level] = 1;
+ cur->bc_levels[level].ptr = 1;
while (level < cur->bc_nlevels) {
block = xfs_btree_get_block(cur, level, &bp);
/* End of node, pop back towards the root. */
- if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
+ if (cur->bc_levels[level].ptr >
+ be16_to_cpu(block->bb_numrecs)) {
pop_up:
if (level < cur->bc_nlevels - 1)
- cur->bc_ptrs[level + 1]++;
+ cur->bc_levels[level + 1].ptr++;
level++;
continue;
}
if (level == 0) {
/* Handle a leaf node. */
- recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
+ recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
+ block);
cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
- ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey,
- low_key);
-
cur->bc_ops->init_key_from_rec(&rec_key, recp);
- hdiff = cur->bc_ops->diff_two_keys(cur, high_key,
- &rec_key);
/*
+ * If (query's high key < record's low key), then there
+ * are no more interesting records in this block. Pop
+ * up to the leaf level to find more record blocks.
+ *
* If (record's high key >= query's low key) and
* (query's high key >= record's low key), then
* this record overlaps the query range; callback.
*/
- if (ldiff >= 0 && hdiff >= 0) {
+ if (xfs_btree_keycmp_lt(cur, high_key, &rec_key))
+ goto pop_up;
+ if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) {
error = fn(cur, recp, priv);
- if (error < 0 ||
- error == XFS_BTREE_QUERY_RANGE_ABORT)
+ if (error)
break;
- } else if (hdiff < 0) {
- /* Record is larger than high key; pop. */
- goto pop_up;
}
- cur->bc_ptrs[level]++;
+ cur->bc_levels[level].ptr++;
continue;
}
/* Handle an internal node. */
- lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
- hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
- pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
-
- ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key);
- hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp);
+ lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
+ hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr,
+ block);
+ pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
/*
+ * If (query's high key < pointer's low key), then there are no
+ * more interesting keys in this block. Pop up one leaf level
+ * to continue looking for records.
+ *
* If (pointer's high key >= query's low key) and
* (query's high key >= pointer's low key), then
* this record overlaps the query range; follow pointer.
*/
- if (ldiff >= 0 && hdiff >= 0) {
+ if (xfs_btree_keycmp_lt(cur, high_key, lkp))
+ goto pop_up;
+ if (xfs_btree_keycmp_ge(cur, hkp, low_key)) {
level--;
error = xfs_btree_lookup_get_block(cur, level, pp,
&block);
@@ -4750,13 +5238,10 @@ pop_up:
if (error)
goto out;
#endif
- cur->bc_ptrs[level] = 1;
+ cur->bc_levels[level].ptr = 1;
continue;
- } else if (hdiff < 0) {
- /* The low key is larger than the upper range; pop. */
- goto pop_up;
}
- cur->bc_ptrs[level]++;
+ cur->bc_levels[level].ptr++;
}
out:
@@ -4767,13 +5252,14 @@ out:
* with a zero-results range query, so release the buffers if we
* failed to return any results.
*/
- if (cur->bc_bufs[0] == NULL) {
+ if (cur->bc_levels[0].bp == NULL) {
for (i = 0; i < cur->bc_nlevels; i++) {
- if (cur->bc_bufs[i]) {
- xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
- cur->bc_bufs[i] = NULL;
- cur->bc_ptrs[i] = 0;
- cur->bc_ra[i] = 0;
+ if (cur->bc_levels[i].bp) {
+ xfs_trans_brelse(cur->bc_tp,
+ cur->bc_levels[i].bp);
+ cur->bc_levels[i].bp = NULL;
+ cur->bc_levels[i].ptr = 0;
+ cur->bc_levels[i].ra = 0;
}
}
}
@@ -4781,39 +5267,45 @@ out:
return error;
}
+static inline void
+xfs_btree_key_from_irec(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key,
+ const union xfs_btree_irec *irec)
+{
+ union xfs_btree_rec rec;
+
+ cur->bc_rec = *irec;
+ cur->bc_ops->init_rec_from_cur(cur, &rec);
+ cur->bc_ops->init_key_from_rec(key, &rec);
+}
+
/*
* Query a btree for all records overlapping a given interval of keys. The
* supplied function will be called with each record found; return one of the
* XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
- * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a
- * negative error code.
+ * code. This function returns -ECANCELED, zero, or a negative error code.
*/
int
xfs_btree_query_range(
struct xfs_btree_cur *cur,
- union xfs_btree_irec *low_rec,
- union xfs_btree_irec *high_rec,
+ const union xfs_btree_irec *low_rec,
+ const union xfs_btree_irec *high_rec,
xfs_btree_query_range_fn fn,
void *priv)
{
- union xfs_btree_rec rec;
union xfs_btree_key low_key;
union xfs_btree_key high_key;
/* Find the keys of both ends of the interval. */
- cur->bc_rec = *high_rec;
- cur->bc_ops->init_rec_from_cur(cur, &rec);
- cur->bc_ops->init_key_from_rec(&high_key, &rec);
+ xfs_btree_key_from_irec(cur, &high_key, high_rec);
+ xfs_btree_key_from_irec(cur, &low_key, low_rec);
- cur->bc_rec = *low_rec;
- cur->bc_ops->init_rec_from_cur(cur, &rec);
- cur->bc_ops->init_key_from_rec(&low_key, &rec);
-
- /* Enforce low key < high key. */
- if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0)
+ /* Enforce low key <= high key. */
+ if (!xfs_btree_keycmp_le(cur, &low_key, &high_key))
return -EINVAL;
- if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
+ if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
return xfs_btree_simple_query_range(cur, &low_key,
&high_key, fn, priv);
return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
@@ -4837,36 +5329,13 @@ xfs_btree_query_all(
return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
}
-/*
- * Calculate the number of blocks needed to store a given number of records
- * in a short-format (per-AG metadata) btree.
- */
-unsigned long long
-xfs_btree_calc_size(
- uint *limits,
- unsigned long long len)
-{
- int level;
- int maxrecs;
- unsigned long long rval;
-
- maxrecs = limits[0];
- for (level = 0, rval = 0; len > 1; level++) {
- len += maxrecs - 1;
- do_div(len, maxrecs);
- maxrecs = limits[1];
- rval += len;
- }
- return rval;
-}
-
static int
xfs_btree_count_blocks_helper(
struct xfs_btree_cur *cur,
int level,
void *data)
{
- xfs_extlen_t *blocks = data;
+ xfs_filblks_t *blocks = data;
(*blocks)++;
return 0;
@@ -4876,53 +5345,151 @@ xfs_btree_count_blocks_helper(
int
xfs_btree_count_blocks(
struct xfs_btree_cur *cur,
- xfs_extlen_t *blocks)
+ xfs_filblks_t *blocks)
{
*blocks = 0;
return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
- blocks);
+ XFS_BTREE_VISIT_ALL, blocks);
}
/* Compare two btree pointers. */
-int64_t
-xfs_btree_diff_two_ptrs(
+int
+xfs_btree_cmp_two_ptrs(
struct xfs_btree_cur *cur,
const union xfs_btree_ptr *a,
const union xfs_btree_ptr *b)
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
- return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
+ return cmp_int(be64_to_cpu(a->l), be64_to_cpu(b->l));
+ return cmp_int(be32_to_cpu(a->s), be32_to_cpu(b->s));
}
-/* If there's an extent, we're done. */
+struct xfs_btree_has_records {
+ /* Keys for the start and end of the range we want to know about. */
+ union xfs_btree_key start_key;
+ union xfs_btree_key end_key;
+
+ /* Mask for key comparisons, if desired. */
+ const union xfs_btree_key *key_mask;
+
+ /* Highest record key we've seen so far. */
+ union xfs_btree_key high_key;
+
+ enum xbtree_recpacking outcome;
+};
+
STATIC int
-xfs_btree_has_record_helper(
+xfs_btree_has_records_helper(
struct xfs_btree_cur *cur,
- union xfs_btree_rec *rec,
+ const union xfs_btree_rec *rec,
void *priv)
{
- return XFS_BTREE_QUERY_RANGE_ABORT;
+ union xfs_btree_key rec_key;
+ union xfs_btree_key rec_high_key;
+ struct xfs_btree_has_records *info = priv;
+ enum xbtree_key_contig key_contig;
+
+ cur->bc_ops->init_key_from_rec(&rec_key, rec);
+
+ if (info->outcome == XBTREE_RECPACKING_EMPTY) {
+ info->outcome = XBTREE_RECPACKING_SPARSE;
+
+ /*
+ * If the first record we find does not overlap the start key,
+ * then there is a hole at the start of the search range.
+ * Classify this as sparse and stop immediately.
+ */
+ if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key,
+ info->key_mask))
+ return -ECANCELED;
+ } else {
+ /*
+ * If a subsequent record does not overlap with the any record
+ * we've seen so far, there is a hole in the middle of the
+ * search range. Classify this as sparse and stop.
+ * If the keys overlap and this btree does not allow overlap,
+ * signal corruption.
+ */
+ key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key,
+ &rec_key, info->key_mask);
+ if (key_contig == XBTREE_KEY_OVERLAP &&
+ !(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
+ return -EFSCORRUPTED;
+ if (key_contig == XBTREE_KEY_GAP)
+ return -ECANCELED;
+ }
+
+ /*
+ * If high_key(rec) is larger than any other high key we've seen,
+ * remember it for later.
+ */
+ cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
+ if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key,
+ info->key_mask))
+ info->high_key = rec_high_key; /* struct copy */
+
+ return 0;
}
-/* Is there a record covering a given range of keys? */
+/*
+ * Scan part of the keyspace of a btree and tell us if that keyspace does not
+ * map to any records; is fully mapped to records; or is partially mapped to
+ * records. This is the btree record equivalent to determining if a file is
+ * sparse.
+ *
+ * For most btree types, the record scan should use all available btree key
+ * fields to compare the keys encountered. These callers should pass NULL for
+ * @mask. However, some callers (e.g. scanning physical space in the rmapbt)
+ * want to ignore some part of the btree record keyspace when performing the
+ * comparison. These callers should pass in a union xfs_btree_key object with
+ * the fields that *should* be a part of the comparison set to any nonzero
+ * value, and the rest zeroed.
+ */
int
-xfs_btree_has_record(
- struct xfs_btree_cur *cur,
- union xfs_btree_irec *low,
- union xfs_btree_irec *high,
- bool *exists)
+xfs_btree_has_records(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_irec *low,
+ const union xfs_btree_irec *high,
+ const union xfs_btree_key *mask,
+ enum xbtree_recpacking *outcome)
{
- int error;
+ struct xfs_btree_has_records info = {
+ .outcome = XBTREE_RECPACKING_EMPTY,
+ .key_mask = mask,
+ };
+ int error;
- error = xfs_btree_query_range(cur, low, high,
- &xfs_btree_has_record_helper, NULL);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
- *exists = true;
- return 0;
+ /* Not all btrees support this operation. */
+ if (!cur->bc_ops->keys_contiguous) {
+ ASSERT(0);
+ return -EOPNOTSUPP;
}
- *exists = false;
- return error;
+
+ xfs_btree_key_from_irec(cur, &info.start_key, low);
+ xfs_btree_key_from_irec(cur, &info.end_key, high);
+
+ error = xfs_btree_query_range(cur, low, high,
+ xfs_btree_has_records_helper, &info);
+ if (error == -ECANCELED)
+ goto out;
+ if (error)
+ return error;
+
+ if (info.outcome == XBTREE_RECPACKING_EMPTY)
+ goto out;
+
+ /*
+ * If the largest high_key(rec) we saw during the walk is greater than
+ * the end of the search range, classify this as full. Otherwise,
+ * there is a hole at the end of the search range.
+ */
+ if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key,
+ mask))
+ info.outcome = XBTREE_RECPACKING_FULL;
+
+out:
+ *outcome = info.outcome;
+ return 0;
}
/* Are there more records in this btree? */
@@ -4936,12 +5503,150 @@ xfs_btree_has_more_records(
block = xfs_btree_get_block(cur, 0, &bp);
/* There are still records in this block. */
- if (cur->bc_ptrs[0] < xfs_btree_get_numrecs(block))
+ if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block))
return true;
/* There are more record blocks. */
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
else
return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
}
+
+/* Set up all the btree cursor caches. */
+int __init
+xfs_btree_init_cur_caches(void)
+{
+ int error;
+
+ error = xfs_allocbt_init_cur_cache();
+ if (error)
+ return error;
+ error = xfs_inobt_init_cur_cache();
+ if (error)
+ goto err;
+ error = xfs_bmbt_init_cur_cache();
+ if (error)
+ goto err;
+ error = xfs_rmapbt_init_cur_cache();
+ if (error)
+ goto err;
+ error = xfs_refcountbt_init_cur_cache();
+ if (error)
+ goto err;
+ error = xfs_rtrmapbt_init_cur_cache();
+ if (error)
+ goto err;
+ error = xfs_rtrefcountbt_init_cur_cache();
+ if (error)
+ goto err;
+
+ return 0;
+err:
+ xfs_btree_destroy_cur_caches();
+ return error;
+}
+
+/* Destroy all the btree cursor caches, if they've been allocated. */
+void
+xfs_btree_destroy_cur_caches(void)
+{
+ xfs_allocbt_destroy_cur_cache();
+ xfs_inobt_destroy_cur_cache();
+ xfs_bmbt_destroy_cur_cache();
+ xfs_rmapbt_destroy_cur_cache();
+ xfs_refcountbt_destroy_cur_cache();
+ xfs_rtrmapbt_destroy_cur_cache();
+ xfs_rtrefcountbt_destroy_cur_cache();
+}
+
+/* Move the btree cursor before the first record. */
+int
+xfs_btree_goto_left_edge(
+ struct xfs_btree_cur *cur)
+{
+ int stat = 0;
+ int error;
+
+ memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
+ error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
+ if (error)
+ return error;
+ if (!stat)
+ return 0;
+
+ error = xfs_btree_decrement(cur, 0, &stat);
+ if (error)
+ return error;
+ if (stat != 0) {
+ ASSERT(0);
+ xfs_btree_mark_sick(cur);
+ return -EFSCORRUPTED;
+ }
+
+ return 0;
+}
+
+/* Allocate a block for an inode-rooted metadata btree. */
+int
+xfs_btree_alloc_metafile_block(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int *stat)
+{
+ struct xfs_alloc_arg args = {
+ .mp = cur->bc_mp,
+ .tp = cur->bc_tp,
+ .resv = XFS_AG_RESV_METAFILE,
+ .minlen = 1,
+ .maxlen = 1,
+ .prod = 1,
+ };
+ struct xfs_inode *ip = cur->bc_ino.ip;
+ int error;
+
+ ASSERT(xfs_is_metadir_inode(ip));
+
+ xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, cur->bc_ino.whichfork);
+ error = xfs_alloc_vextent_start_ag(&args,
+ XFS_INO_TO_FSB(cur->bc_mp, ip->i_ino));
+ if (error)
+ return error;
+ if (args.fsbno == NULLFSBLOCK) {
+ *stat = 0;
+ return 0;
+ }
+ ASSERT(args.len == 1);
+
+ xfs_metafile_resv_alloc_space(ip, &args);
+
+ new->l = cpu_to_be64(args.fsbno);
+ *stat = 1;
+ return 0;
+}
+
+/* Free a block from an inode-rooted metadata btree. */
+int
+xfs_btree_free_metafile_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_inode *ip = cur->bc_ino.ip;
+ struct xfs_trans *tp = cur->bc_tp;
+ xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
+ int error;
+
+ ASSERT(xfs_is_metadir_inode(ip));
+
+ xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
+ error = xfs_free_extent_later(tp, fsbno, 1, &oinfo, XFS_AG_RESV_METAFILE,
+ 0);
+ if (error)
+ return error;
+
+ xfs_metafile_resv_free_space(ip, tp, 1);
+ return 0;
+}