summaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs/xfs_ialloc.c
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2015-05-29 09:03:04 +1000
committerDave Chinner <david@fromorbit.com>2015-05-29 09:03:04 +1000
commit5419040fc0f3afc31c857b4d7f006bd9afbdb462 (patch)
tree2a2b2a92bc6a8a0bfa69fe427cad6a2e9203dfc3 /fs/xfs/libxfs/xfs_ialloc.c
parent502a4e72b8707f3a45fb51f873c2865928db0771 (diff)
xfs: introduce inode record hole mask for sparse inode chunks
The inode btrees track 64 inodes per record regardless of inode size. Thus, inode chunks on disk vary in size depending on the size of the inodes. This creates a contiguous allocation requirement for new inode chunks that can be difficult to satisfy on an aged and fragmented (free space) filesystems. The inode record freecount currently uses 4 bytes on disk to track the free inode count. With a maximum freecount value of 64, only one byte is required. Convert the freecount field to a single byte and use two of the remaining 3 higher order bytes left for the hole mask field. Use the final leftover byte for the total count field. The hole mask field tracks holes in the chunks of physical space that the inode record refers to. This facilitates the sparse allocation of inode chunks when contiguous chunks are not available and allows the inode btrees to identify what portions of the chunk contain valid inodes. The total count field contains the total number of valid inodes referred to by the record. This can also be deduced from the hole mask. The count field provides clarity and redundancy for internal record verification. Note that neither of the new fields can be written to disk on fs' without sparse inode support. Doing so writes to the high-order bytes of freecount and causes corruption from the perspective of older kernels. The on-disk inobt record data structure is updated with a union to distinguish between the original, "full" format and the new, "sparse" format. The conversion routines to get, insert and update records are updated to translate to and from the on-disk record accordingly such that freecount remains a 4-byte value on non-supported fs, yet the new fields of the in-core record are always valid with respect to the record. This means that higher level code can refer to the current in-core record format unconditionally and lower level code ensures that records are translated to/from disk according to the capabilities of the fs. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/libxfs/xfs_ialloc.c')
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c48
1 files changed, 40 insertions, 8 deletions
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 269d9cac5c87..85a477a5f41c 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -65,6 +65,8 @@ xfs_inobt_lookup(
int *stat) /* success/failure */
{
cur->bc_rec.i.ir_startino = ino;
+ cur->bc_rec.i.ir_holemask = 0;
+ cur->bc_rec.i.ir_count = 0;
cur->bc_rec.i.ir_freecount = 0;
cur->bc_rec.i.ir_free = 0;
return xfs_btree_lookup(cur, dir, stat);
@@ -82,7 +84,14 @@ xfs_inobt_update(
union xfs_btree_rec rec;
rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
- rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);
+ if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+ rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask);
+ rec.inobt.ir_u.sp.ir_count = irec->ir_count;
+ rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount;
+ } else {
+ /* ir_holemask/ir_count not supported on-disk */
+ rec.inobt.ir_u.f.ir_freecount = cpu_to_be32(irec->ir_freecount);
+ }
rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
return xfs_btree_update(cur, &rec);
}
@@ -100,12 +109,27 @@ xfs_inobt_get_rec(
int error;
error = xfs_btree_get_rec(cur, &rec, stat);
- if (!error && *stat == 1) {
- irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
- irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
- irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+ if (error || *stat == 0)
+ return error;
+
+ irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
+ if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+ irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
+ irec->ir_count = rec->inobt.ir_u.sp.ir_count;
+ irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
+ } else {
+ /*
+ * ir_holemask/ir_count not supported on-disk. Fill in hardcoded
+ * values for full inode chunks.
+ */
+ irec->ir_holemask = XFS_INOBT_HOLEMASK_FULL;
+ irec->ir_count = XFS_INODES_PER_CHUNK;
+ irec->ir_freecount =
+ be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
}
- return error;
+ irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+
+ return 0;
}
/*
@@ -114,10 +138,14 @@ xfs_inobt_get_rec(
STATIC int
xfs_inobt_insert_rec(
struct xfs_btree_cur *cur,
+ __uint16_t holemask,
+ __uint8_t count,
__int32_t freecount,
xfs_inofree_t free,
int *stat)
{
+ cur->bc_rec.i.ir_holemask = holemask;
+ cur->bc_rec.i.ir_count = count;
cur->bc_rec.i.ir_freecount = freecount;
cur->bc_rec.i.ir_free = free;
return xfs_btree_insert(cur, stat);
@@ -154,7 +182,9 @@ xfs_inobt_insert(
}
ASSERT(i == 0);
- error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK,
+ error = xfs_inobt_insert_rec(cur, XFS_INOBT_HOLEMASK_FULL,
+ XFS_INODES_PER_CHUNK,
+ XFS_INODES_PER_CHUNK,
XFS_INOBT_ALL_FREE, &i);
if (error) {
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
@@ -1609,7 +1639,9 @@ xfs_difree_finobt(
*/
XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
- error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
+ error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask,
+ ibtrec->ir_count,
+ ibtrec->ir_freecount,
ibtrec->ir_free, &i);
if (error)
goto error;