diff options
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r-- | fs/xfs/xfs_inode.c | 234 |
1 files changed, 139 insertions, 95 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4ec5b7f45401..3e3aab3888fa 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -16,6 +16,7 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include <linux/log2.h> +#include <linux/iversion.h> #include "xfs.h" #include "xfs_fs.h" @@ -39,6 +40,7 @@ #include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" +#include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_filestream.h" @@ -384,14 +386,6 @@ xfs_isilocked( } #endif -#ifdef DEBUG -int xfs_locked_n; -int xfs_small_retries; -int xfs_middle_retries; -int xfs_lots_retries; -int xfs_lock_delays; -#endif - /* * xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when * DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined @@ -544,45 +538,45 @@ again: if ((attempts % 5) == 0) { delay(1); /* Don't just spin the CPU */ -#ifdef DEBUG - xfs_lock_delays++; -#endif } i = 0; try_lock = 0; goto again; } - -#ifdef DEBUG - if (attempts) { - if (attempts < 5) xfs_small_retries++; - else if (attempts < 100) xfs_middle_retries++; - else xfs_lots_retries++; - } else { - xfs_locked_n++; - } -#endif } /* * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - - * the iolock, the mmaplock or the ilock, but not more than one at a time. If we - * lock more than one at a time, lockdep will report false positives saying we - * have violated locking orders. + * the mmaplock or the ilock, but not more than one type at a time. If we lock + * more than one at a time, lockdep will report false positives saying we have + * violated locking orders. The iolock must be double-locked separately since + * we use i_rwsem for that. We now support taking one lock EXCL and the other + * SHARED. */ void xfs_lock_two_inodes( - xfs_inode_t *ip0, - xfs_inode_t *ip1, - uint lock_mode) + struct xfs_inode *ip0, + uint ip0_mode, + struct xfs_inode *ip1, + uint ip1_mode) { - xfs_inode_t *temp; + struct xfs_inode *temp; + uint mode_temp; int attempts = 0; xfs_log_item_t *lp; - ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); - if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) - ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(hweight32(ip0_mode) == 1); + ASSERT(hweight32(ip1_mode) == 1); + ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); + ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); + ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || + !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || + !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || + !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) || + !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); ASSERT(ip0->i_ino != ip1->i_ino); @@ -590,10 +584,13 @@ xfs_lock_two_inodes( temp = ip0; ip0 = ip1; ip1 = temp; + mode_temp = ip0_mode; + ip0_mode = ip1_mode; + ip1_mode = mode_temp; } again: - xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); + xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0)); /* * If the first lock we have locked is in the AIL, we must TRY to get @@ -602,18 +599,17 @@ xfs_lock_two_inodes( */ lp = (xfs_log_item_t *)ip0->i_itemp; if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { - if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { - xfs_iunlock(ip0, lock_mode); + if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) { + xfs_iunlock(ip0, ip0_mode); if ((++attempts % 5) == 0) delay(1); /* Don't just spin the CPU */ goto again; } } else { - xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); + xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1)); } } - void __xfs_iflock( struct xfs_inode *ip) @@ -767,9 +763,8 @@ xfs_ialloc( xfs_inode_t *pip, umode_t mode, xfs_nlink_t nlink, - xfs_dev_t rdev, + dev_t rdev, prid_t prid, - int okalloc, xfs_buf_t **ialloc_context, xfs_inode_t **ipp) { @@ -785,7 +780,7 @@ xfs_ialloc( * Call the space management code to pick * the on-disk inode to be allocated. */ - error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, + error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, ialloc_context, &ino); if (error) return error; @@ -819,6 +814,7 @@ xfs_ialloc( set_nlink(inode, nlink); ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid()); ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid()); + inode->i_rdev = rdev; xfs_set_projid(ip, prid); if (pip && XFS_INHERIT_GID(pip)) { @@ -852,7 +848,7 @@ xfs_ialloc( ip->i_d.di_flags = 0; if (ip->i_d.di_version == 3) { - inode->i_version = 1; + inode_set_iversion(inode, 1); ip->i_d.di_flags2 = 0; ip->i_d.di_cowextsize = 0; ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec; @@ -867,7 +863,6 @@ xfs_ialloc( case S_IFBLK: case S_IFSOCK: ip->i_d.di_format = XFS_DINODE_FMT_DEV; - ip->i_df.if_u2.if_rdev = rdev; ip->i_df.if_flags = 0; flags |= XFS_ILOG_DEV; break; @@ -933,7 +928,7 @@ xfs_ialloc( ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; ip->i_df.if_flags = XFS_IFEXTENTS; ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; - ip->i_df.if_u1.if_extents = NULL; + ip->i_df.if_u1.if_root = NULL; break; default: ASSERT(0); @@ -975,9 +970,8 @@ xfs_dir_ialloc( the inode. */ umode_t mode, xfs_nlink_t nlink, - xfs_dev_t rdev, + dev_t rdev, prid_t prid, /* project id */ - int okalloc, /* ok to allocate new space */ xfs_inode_t **ipp, /* pointer to inode; it will be locked. */ int *committed) @@ -1008,8 +1002,8 @@ xfs_dir_ialloc( * transaction commit so that no other process can steal * the inode(s) that we've just allocated. */ - code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, - &ialloc_context, &ip); + code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context, + &ip); /* * Return an error if we were unable to allocate a new inode. @@ -1081,7 +1075,7 @@ xfs_dir_ialloc( * this call should always succeed. */ code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, - okalloc, &ialloc_context, &ip); + &ialloc_context, &ip); /* * If we get an error at this point, return to the caller @@ -1147,7 +1141,7 @@ xfs_create( xfs_inode_t *dp, struct xfs_name *name, umode_t mode, - xfs_dev_t rdev, + dev_t rdev, xfs_inode_t **ipp) { int is_dir = S_ISDIR(mode); @@ -1183,7 +1177,6 @@ xfs_create( return error; if (is_dir) { - rdev = 0; resblks = XFS_MKDIR_SPACE_RES(mp, name->len); tres = &M_RES(mp)->tr_mkdir; } else { @@ -1203,11 +1196,6 @@ xfs_create( xfs_flush_inodes(mp); error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp); } - if (error == -ENOSPC) { - /* No space at all so try a "no-allocation" reservation */ - resblks = 0; - error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp); - } if (error) goto out_release_inode; @@ -1224,19 +1212,13 @@ xfs_create( if (error) goto out_trans_cancel; - if (!resblks) { - error = xfs_dir_canenter(tp, dp, name); - if (error) - goto out_trans_cancel; - } - /* * A newly created regular or special file just has one directory * entry pointing to them, but a directory also the "." entry * pointing to itself. */ - error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, - prid, resblks > 0, &ip, NULL); + error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip, + NULL); if (error) goto out_trans_cancel; @@ -1361,11 +1343,6 @@ xfs_create_tmpfile( tres = &M_RES(mp)->tr_create_tmpfile; error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp); - if (error == -ENOSPC) { - /* No space at all so try a "no-allocation" reservation */ - resblks = 0; - error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp); - } if (error) goto out_release_inode; @@ -1374,8 +1351,7 @@ xfs_create_tmpfile( if (error) goto out_trans_cancel; - error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, - prid, resblks > 0, &ip, NULL); + error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL); if (error) goto out_trans_cancel; @@ -1461,7 +1437,7 @@ xfs_link( if (error) goto std_return; - xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); @@ -1527,6 +1503,24 @@ xfs_link( return error; } +/* Clear the reflink flag and the cowblocks tag if possible. */ +static void +xfs_itruncate_clear_reflink_flags( + struct xfs_inode *ip) +{ + struct xfs_ifork *dfork; + struct xfs_ifork *cfork; + + if (!xfs_is_reflink_inode(ip)) + return; + dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK); + if (dfork->if_bytes == 0 && cfork->if_bytes == 0) + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + if (cfork->if_bytes == 0) + xfs_inode_clear_cowblocks_tag(ip); +} + /* * Free up the underlying blocks past new_size. The new size must be smaller * than the current size. This routine can be used both for the attribute and @@ -1623,15 +1617,7 @@ xfs_itruncate_extents( if (error) goto out; - /* - * Clear the reflink flag if there are no data fork blocks and - * there are no extents staged in the cow fork. - */ - if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) { - if (ip->i_d.di_nblocks == 0) - ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; - xfs_inode_clear_cowblocks_tag(ip); - } + xfs_itruncate_clear_reflink_flags(ip); /* * Always re-log the inode so that our permanent transaction can keep @@ -1886,6 +1872,7 @@ xfs_inactive( xfs_inode_t *ip) { struct xfs_mount *mp; + struct xfs_ifork *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); int error; int truncate = 0; @@ -1906,6 +1893,10 @@ xfs_inactive( if (mp->m_flags & XFS_MOUNT_RDONLY) return; + /* Try to clean out the cow blocks if there are any. */ + if (xfs_is_reflink_inode(ip) && cow_ifp->if_bytes > 0) + xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (VFS_I(ip)->i_nlink != 0) { /* * force is true because we are evicting an inode from the @@ -2244,7 +2235,7 @@ xfs_ifree_cluster( xfs_buf_t *bp; xfs_inode_t *ip; xfs_inode_log_item_t *iip; - xfs_log_item_t *lip; + struct xfs_log_item *lip; struct xfs_perag *pag; xfs_ino_t inum; @@ -2302,8 +2293,7 @@ xfs_ifree_cluster( * stale first, we will not attempt to lock them in the loop * below as the XFS_ISTALE flag will be set. */ - lip = bp->b_fspriv; - while (lip) { + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { if (lip->li_type == XFS_LI_INODE) { iip = (xfs_inode_log_item_t *)lip; ASSERT(iip->ili_logged == 1); @@ -2313,7 +2303,6 @@ xfs_ifree_cluster( &iip->ili_item.li_lsn); xfs_iflags_set(iip->ili_inode, XFS_ISTALE); } - lip = lip->li_bio_list; } @@ -2378,6 +2367,7 @@ retry: */ if (ip->i_ino != inum + i) { xfs_iunlock(ip, XFS_ILOCK_EXCL); + rcu_read_unlock(); continue; } } @@ -2421,6 +2411,24 @@ retry: } /* + * Free any local-format buffers sitting around before we reset to + * extents format. + */ +static inline void +xfs_ifree_local_data( + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return; + + ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); +} + +/* * This is called to return an inode to the inode free list. * The inode should already be truncated to 0 length and have * no pages associated with it. This routine also assumes that @@ -2457,12 +2465,20 @@ xfs_ifree( if (error) return error; + xfs_ifree_local_data(ip, XFS_DATA_FORK); + xfs_ifree_local_data(ip, XFS_ATTR_FORK); + VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; + ip->i_d.di_flags2 = 0; ip->i_d.di_dmevmask = 0; ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; + + /* Don't attempt to replay owner changes for a deleted inode */ + ip->i_itemp->ili_fields &= ~(XFS_ILOG_AOWNER|XFS_ILOG_DOWNER); + /* * Bump the generation count so no one will be confused * by reincarnations of this inode. @@ -2490,7 +2506,7 @@ xfs_iunpin( trace_xfs_inode_unpin_nowait(ip, _RET_IP_); /* Give the log a push to start the unpinning I/O */ - xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); + xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL); } @@ -2594,7 +2610,7 @@ xfs_remove( goto std_return; } - xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); @@ -3487,6 +3503,36 @@ abort_out: return error; } +/* + * If there are inline format data / attr forks attached to this inode, + * make sure they're not corrupt. + */ +bool +xfs_inode_verify_forks( + struct xfs_inode *ip) +{ + struct xfs_ifork *ifp; + xfs_failaddr_t fa; + + fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops); + if (fa) { + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork", + ifp->if_u1.if_data, ifp->if_bytes, fa); + return false; + } + + fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops); + if (fa) { + ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork", + ifp ? ifp->if_u1.if_data : NULL, + ifp ? ifp->if_bytes : 0, fa); + return false; + } + return true; +} + STATIC int xfs_iflush_int( struct xfs_inode *ip, @@ -3509,7 +3555,7 @@ xfs_iflush_int( if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), mp, XFS_ERRTAG_IFLUSH_1)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", + "%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT, __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); goto corrupt_out; } @@ -3519,7 +3565,7 @@ xfs_iflush_int( (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), mp, XFS_ERRTAG_IFLUSH_3)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: Bad regular inode %Lu, ptr 0x%p", + "%s: Bad regular inode %Lu, ptr "PTR_FMT, __func__, ip->i_ino, ip); goto corrupt_out; } @@ -3530,7 +3576,7 @@ xfs_iflush_int( (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), mp, XFS_ERRTAG_IFLUSH_4)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: Bad directory inode %Lu, ptr 0x%p", + "%s: Bad directory inode %Lu, ptr "PTR_FMT, __func__, ip->i_ino, ip); goto corrupt_out; } @@ -3539,7 +3585,7 @@ xfs_iflush_int( ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, "%s: detected corrupt incore inode %Lu, " - "total extents = %d, nblocks = %Ld, ptr 0x%p", + "total extents = %d, nblocks = %Ld, ptr "PTR_FMT, __func__, ip->i_ino, ip->i_d.di_nextents + ip->i_d.di_anextents, ip->i_d.di_nblocks, ip); @@ -3548,7 +3594,7 @@ xfs_iflush_int( if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, mp, XFS_ERRTAG_IFLUSH_6)) { xfs_alert_tag(mp, XFS_PTAG_IFLUSH, - "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", + "%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT, __func__, ip->i_ino, ip->i_d.di_forkoff, ip); goto corrupt_out; } @@ -3565,10 +3611,8 @@ xfs_iflush_int( if (ip->i_d.di_version < 3) ip->i_d.di_flushiter++; - /* Check the inline directory data. */ - if (S_ISDIR(VFS_I(ip)->i_mode) && - ip->i_d.di_format == XFS_DINODE_FMT_LOCAL && - xfs_dir2_sf_verify(ip)) + /* Check the inline fork data before we write out. */ + if (!xfs_inode_verify_forks(ip)) goto corrupt_out; /* @@ -3631,7 +3675,7 @@ xfs_iflush_int( /* generate the checksum. */ xfs_dinode_calc_crc(mp, dip); - ASSERT(bp->b_fspriv != NULL); + ASSERT(!list_empty(&bp->b_li_list)); ASSERT(bp->b_iodone != NULL); return 0; |