summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c234
1 files changed, 139 insertions, 95 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 4ec5b7f45401..3e3aab3888fa 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -16,6 +16,7 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <linux/log2.h>
+#include <linux/iversion.h>
#include "xfs.h"
#include "xfs_fs.h"
@@ -39,6 +40,7 @@
#include "xfs_ialloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
+#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_filestream.h"
@@ -384,14 +386,6 @@ xfs_isilocked(
}
#endif
-#ifdef DEBUG
-int xfs_locked_n;
-int xfs_small_retries;
-int xfs_middle_retries;
-int xfs_lots_retries;
-int xfs_lock_delays;
-#endif
-
/*
* xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when
* DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined
@@ -544,45 +538,45 @@ again:
if ((attempts % 5) == 0) {
delay(1); /* Don't just spin the CPU */
-#ifdef DEBUG
- xfs_lock_delays++;
-#endif
}
i = 0;
try_lock = 0;
goto again;
}
-
-#ifdef DEBUG
- if (attempts) {
- if (attempts < 5) xfs_small_retries++;
- else if (attempts < 100) xfs_middle_retries++;
- else xfs_lots_retries++;
- } else {
- xfs_locked_n++;
- }
-#endif
}
/*
* xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
- * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
- * lock more than one at a time, lockdep will report false positives saying we
- * have violated locking orders.
+ * the mmaplock or the ilock, but not more than one type at a time. If we lock
+ * more than one at a time, lockdep will report false positives saying we have
+ * violated locking orders. The iolock must be double-locked separately since
+ * we use i_rwsem for that. We now support taking one lock EXCL and the other
+ * SHARED.
*/
void
xfs_lock_two_inodes(
- xfs_inode_t *ip0,
- xfs_inode_t *ip1,
- uint lock_mode)
+ struct xfs_inode *ip0,
+ uint ip0_mode,
+ struct xfs_inode *ip1,
+ uint ip1_mode)
{
- xfs_inode_t *temp;
+ struct xfs_inode *temp;
+ uint mode_temp;
int attempts = 0;
xfs_log_item_t *lp;
- ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
- if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
- ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+ ASSERT(hweight32(ip0_mode) == 1);
+ ASSERT(hweight32(ip1_mode) == 1);
+ ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
+ ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
+ ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
+ !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+ ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
+ !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+ ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
+ !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+ ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
+ !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
ASSERT(ip0->i_ino != ip1->i_ino);
@@ -590,10 +584,13 @@ xfs_lock_two_inodes(
temp = ip0;
ip0 = ip1;
ip1 = temp;
+ mode_temp = ip0_mode;
+ ip0_mode = ip1_mode;
+ ip1_mode = mode_temp;
}
again:
- xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
+ xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0));
/*
* If the first lock we have locked is in the AIL, we must TRY to get
@@ -602,18 +599,17 @@ xfs_lock_two_inodes(
*/
lp = (xfs_log_item_t *)ip0->i_itemp;
if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
- if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
- xfs_iunlock(ip0, lock_mode);
+ if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
+ xfs_iunlock(ip0, ip0_mode);
if ((++attempts % 5) == 0)
delay(1); /* Don't just spin the CPU */
goto again;
}
} else {
- xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
+ xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1));
}
}
-
void
__xfs_iflock(
struct xfs_inode *ip)
@@ -767,9 +763,8 @@ xfs_ialloc(
xfs_inode_t *pip,
umode_t mode,
xfs_nlink_t nlink,
- xfs_dev_t rdev,
+ dev_t rdev,
prid_t prid,
- int okalloc,
xfs_buf_t **ialloc_context,
xfs_inode_t **ipp)
{
@@ -785,7 +780,7 @@ xfs_ialloc(
* Call the space management code to pick
* the on-disk inode to be allocated.
*/
- error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
+ error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
ialloc_context, &ino);
if (error)
return error;
@@ -819,6 +814,7 @@ xfs_ialloc(
set_nlink(inode, nlink);
ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
+ inode->i_rdev = rdev;
xfs_set_projid(ip, prid);
if (pip && XFS_INHERIT_GID(pip)) {
@@ -852,7 +848,7 @@ xfs_ialloc(
ip->i_d.di_flags = 0;
if (ip->i_d.di_version == 3) {
- inode->i_version = 1;
+ inode_set_iversion(inode, 1);
ip->i_d.di_flags2 = 0;
ip->i_d.di_cowextsize = 0;
ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec;
@@ -867,7 +863,6 @@ xfs_ialloc(
case S_IFBLK:
case S_IFSOCK:
ip->i_d.di_format = XFS_DINODE_FMT_DEV;
- ip->i_df.if_u2.if_rdev = rdev;
ip->i_df.if_flags = 0;
flags |= XFS_ILOG_DEV;
break;
@@ -933,7 +928,7 @@ xfs_ialloc(
ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
ip->i_df.if_flags = XFS_IFEXTENTS;
ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
- ip->i_df.if_u1.if_extents = NULL;
+ ip->i_df.if_u1.if_root = NULL;
break;
default:
ASSERT(0);
@@ -975,9 +970,8 @@ xfs_dir_ialloc(
the inode. */
umode_t mode,
xfs_nlink_t nlink,
- xfs_dev_t rdev,
+ dev_t rdev,
prid_t prid, /* project id */
- int okalloc, /* ok to allocate new space */
xfs_inode_t **ipp, /* pointer to inode; it will be
locked. */
int *committed)
@@ -1008,8 +1002,8 @@ xfs_dir_ialloc(
* transaction commit so that no other process can steal
* the inode(s) that we've just allocated.
*/
- code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
- &ialloc_context, &ip);
+ code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context,
+ &ip);
/*
* Return an error if we were unable to allocate a new inode.
@@ -1081,7 +1075,7 @@ xfs_dir_ialloc(
* this call should always succeed.
*/
code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
- okalloc, &ialloc_context, &ip);
+ &ialloc_context, &ip);
/*
* If we get an error at this point, return to the caller
@@ -1147,7 +1141,7 @@ xfs_create(
xfs_inode_t *dp,
struct xfs_name *name,
umode_t mode,
- xfs_dev_t rdev,
+ dev_t rdev,
xfs_inode_t **ipp)
{
int is_dir = S_ISDIR(mode);
@@ -1183,7 +1177,6 @@ xfs_create(
return error;
if (is_dir) {
- rdev = 0;
resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
tres = &M_RES(mp)->tr_mkdir;
} else {
@@ -1203,11 +1196,6 @@ xfs_create(
xfs_flush_inodes(mp);
error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
}
- if (error == -ENOSPC) {
- /* No space at all so try a "no-allocation" reservation */
- resblks = 0;
- error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
- }
if (error)
goto out_release_inode;
@@ -1224,19 +1212,13 @@ xfs_create(
if (error)
goto out_trans_cancel;
- if (!resblks) {
- error = xfs_dir_canenter(tp, dp, name);
- if (error)
- goto out_trans_cancel;
- }
-
/*
* A newly created regular or special file just has one directory
* entry pointing to them, but a directory also the "." entry
* pointing to itself.
*/
- error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
- prid, resblks > 0, &ip, NULL);
+ error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip,
+ NULL);
if (error)
goto out_trans_cancel;
@@ -1361,11 +1343,6 @@ xfs_create_tmpfile(
tres = &M_RES(mp)->tr_create_tmpfile;
error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
- if (error == -ENOSPC) {
- /* No space at all so try a "no-allocation" reservation */
- resblks = 0;
- error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
- }
if (error)
goto out_release_inode;
@@ -1374,8 +1351,7 @@ xfs_create_tmpfile(
if (error)
goto out_trans_cancel;
- error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
- prid, resblks > 0, &ip, NULL);
+ error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL);
if (error)
goto out_trans_cancel;
@@ -1461,7 +1437,7 @@ xfs_link(
if (error)
goto std_return;
- xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
+ xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
@@ -1527,6 +1503,24 @@ xfs_link(
return error;
}
+/* Clear the reflink flag and the cowblocks tag if possible. */
+static void
+xfs_itruncate_clear_reflink_flags(
+ struct xfs_inode *ip)
+{
+ struct xfs_ifork *dfork;
+ struct xfs_ifork *cfork;
+
+ if (!xfs_is_reflink_inode(ip))
+ return;
+ dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
+ ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+ if (cfork->if_bytes == 0)
+ xfs_inode_clear_cowblocks_tag(ip);
+}
+
/*
* Free up the underlying blocks past new_size. The new size must be smaller
* than the current size. This routine can be used both for the attribute and
@@ -1623,15 +1617,7 @@ xfs_itruncate_extents(
if (error)
goto out;
- /*
- * Clear the reflink flag if there are no data fork blocks and
- * there are no extents staged in the cow fork.
- */
- if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
- if (ip->i_d.di_nblocks == 0)
- ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
- xfs_inode_clear_cowblocks_tag(ip);
- }
+ xfs_itruncate_clear_reflink_flags(ip);
/*
* Always re-log the inode so that our permanent transaction can keep
@@ -1886,6 +1872,7 @@ xfs_inactive(
xfs_inode_t *ip)
{
struct xfs_mount *mp;
+ struct xfs_ifork *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
int error;
int truncate = 0;
@@ -1906,6 +1893,10 @@ xfs_inactive(
if (mp->m_flags & XFS_MOUNT_RDONLY)
return;
+ /* Try to clean out the cow blocks if there are any. */
+ if (xfs_is_reflink_inode(ip) && cow_ifp->if_bytes > 0)
+ xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
+
if (VFS_I(ip)->i_nlink != 0) {
/*
* force is true because we are evicting an inode from the
@@ -2244,7 +2235,7 @@ xfs_ifree_cluster(
xfs_buf_t *bp;
xfs_inode_t *ip;
xfs_inode_log_item_t *iip;
- xfs_log_item_t *lip;
+ struct xfs_log_item *lip;
struct xfs_perag *pag;
xfs_ino_t inum;
@@ -2302,8 +2293,7 @@ xfs_ifree_cluster(
* stale first, we will not attempt to lock them in the loop
* below as the XFS_ISTALE flag will be set.
*/
- lip = bp->b_fspriv;
- while (lip) {
+ list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
if (lip->li_type == XFS_LI_INODE) {
iip = (xfs_inode_log_item_t *)lip;
ASSERT(iip->ili_logged == 1);
@@ -2313,7 +2303,6 @@ xfs_ifree_cluster(
&iip->ili_item.li_lsn);
xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
}
- lip = lip->li_bio_list;
}
@@ -2378,6 +2367,7 @@ retry:
*/
if (ip->i_ino != inum + i) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ rcu_read_unlock();
continue;
}
}
@@ -2421,6 +2411,24 @@ retry:
}
/*
+ * Free any local-format buffers sitting around before we reset to
+ * extents format.
+ */
+static inline void
+xfs_ifree_local_data(
+ struct xfs_inode *ip,
+ int whichfork)
+{
+ struct xfs_ifork *ifp;
+
+ if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+ return;
+
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
+}
+
+/*
* This is called to return an inode to the inode free list.
* The inode should already be truncated to 0 length and have
* no pages associated with it. This routine also assumes that
@@ -2457,12 +2465,20 @@ xfs_ifree(
if (error)
return error;
+ xfs_ifree_local_data(ip, XFS_DATA_FORK);
+ xfs_ifree_local_data(ip, XFS_ATTR_FORK);
+
VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
ip->i_d.di_flags = 0;
+ ip->i_d.di_flags2 = 0;
ip->i_d.di_dmevmask = 0;
ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+
+ /* Don't attempt to replay owner changes for a deleted inode */
+ ip->i_itemp->ili_fields &= ~(XFS_ILOG_AOWNER|XFS_ILOG_DOWNER);
+
/*
* Bump the generation count so no one will be confused
* by reincarnations of this inode.
@@ -2490,7 +2506,7 @@ xfs_iunpin(
trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
/* Give the log a push to start the unpinning I/O */
- xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
+ xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL);
}
@@ -2594,7 +2610,7 @@ xfs_remove(
goto std_return;
}
- xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
+ xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
@@ -3487,6 +3503,36 @@ abort_out:
return error;
}
+/*
+ * If there are inline format data / attr forks attached to this inode,
+ * make sure they're not corrupt.
+ */
+bool
+xfs_inode_verify_forks(
+ struct xfs_inode *ip)
+{
+ struct xfs_ifork *ifp;
+ xfs_failaddr_t fa;
+
+ fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops);
+ if (fa) {
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
+ ifp->if_u1.if_data, ifp->if_bytes, fa);
+ return false;
+ }
+
+ fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops);
+ if (fa) {
+ ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",
+ ifp ? ifp->if_u1.if_data : NULL,
+ ifp ? ifp->if_bytes : 0, fa);
+ return false;
+ }
+ return true;
+}
+
STATIC int
xfs_iflush_int(
struct xfs_inode *ip,
@@ -3509,7 +3555,7 @@ xfs_iflush_int(
if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
mp, XFS_ERRTAG_IFLUSH_1)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
+ "%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT,
__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
goto corrupt_out;
}
@@ -3519,7 +3565,7 @@ xfs_iflush_int(
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
mp, XFS_ERRTAG_IFLUSH_3)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: Bad regular inode %Lu, ptr 0x%p",
+ "%s: Bad regular inode %Lu, ptr "PTR_FMT,
__func__, ip->i_ino, ip);
goto corrupt_out;
}
@@ -3530,7 +3576,7 @@ xfs_iflush_int(
(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
mp, XFS_ERRTAG_IFLUSH_4)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: Bad directory inode %Lu, ptr 0x%p",
+ "%s: Bad directory inode %Lu, ptr "PTR_FMT,
__func__, ip->i_ino, ip);
goto corrupt_out;
}
@@ -3539,7 +3585,7 @@ xfs_iflush_int(
ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: detected corrupt incore inode %Lu, "
- "total extents = %d, nblocks = %Ld, ptr 0x%p",
+ "total extents = %d, nblocks = %Ld, ptr "PTR_FMT,
__func__, ip->i_ino,
ip->i_d.di_nextents + ip->i_d.di_anextents,
ip->i_d.di_nblocks, ip);
@@ -3548,7 +3594,7 @@ xfs_iflush_int(
if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
mp, XFS_ERRTAG_IFLUSH_6)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
+ "%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT,
__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
goto corrupt_out;
}
@@ -3565,10 +3611,8 @@ xfs_iflush_int(
if (ip->i_d.di_version < 3)
ip->i_d.di_flushiter++;
- /* Check the inline directory data. */
- if (S_ISDIR(VFS_I(ip)->i_mode) &&
- ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
- xfs_dir2_sf_verify(ip))
+ /* Check the inline fork data before we write out. */
+ if (!xfs_inode_verify_forks(ip))
goto corrupt_out;
/*
@@ -3631,7 +3675,7 @@ xfs_iflush_int(
/* generate the checksum. */
xfs_dinode_calc_crc(mp, dip);
- ASSERT(bp->b_fspriv != NULL);
+ ASSERT(!list_empty(&bp->b_li_list));
ASSERT(bp->b_iodone != NULL);
return 0;