diff options
Diffstat (limited to 'fs/nfs/inode.c')
| -rw-r--r-- | fs/nfs/inode.c | 652 |
1 files changed, 501 insertions, 151 deletions
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a918c3a834b6..f76fe406937a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -72,18 +72,15 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) return nfs_fileid_to_ino_t(fattr->fileid); } -static int nfs_wait_killable(int mode) +int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) { - freezable_schedule_unsafe(); + if (unlikely(nfs_current_task_exiting())) + return -EINTR; + schedule(); if (signal_pending_state(mode, current)) return -ERESTARTSYS; return 0; } - -int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) -{ - return nfs_wait_killable(mode); -} EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); /** @@ -111,7 +108,7 @@ u64 nfs_compat_user_ino64(u64 fileid) int nfs_drop_inode(struct inode *inode) { - return NFS_STALE(inode) || generic_drop_inode(inode); + return NFS_STALE(inode) || inode_generic_drop(inode); } EXPORT_SYMBOL_GPL(nfs_drop_inode); @@ -195,30 +192,33 @@ static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi) void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); - bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); - if (have_delegation) { + if (nfs_have_delegated_attributes(inode)) { if (!(flags & NFS_INO_REVAL_FORCED)) flags &= ~(NFS_INO_INVALID_MODE | NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_BTIME | NFS_INO_INVALID_XATTR); flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE); - } else if (flags & NFS_INO_REVAL_PAGECACHE) - flags |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE; + } if (!nfs_has_xattr_cache(nfsi)) flags &= ~NFS_INO_INVALID_XATTR; if (flags & NFS_INO_INVALID_DATA) nfs_fscache_invalidate(inode, 0); - flags &= ~(NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); - - nfsi->cache_validity |= flags; + flags &= ~NFS_INO_REVAL_FORCED; + flags |= nfsi->cache_validity; if (inode->i_mapping->nrpages == 0) - nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA | - NFS_INO_DATA_INVAL_DEFER); - else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER; + flags &= ~NFS_INO_INVALID_DATA; + + /* pairs with nfs_clear_invalid_mapping()'s smp_load_acquire() */ + smp_store_release(&nfsi->cache_validity, flags); + + if (inode->i_mapping->nrpages == 0 || + nfsi->cache_validity & NFS_INO_INVALID_DATA) { + nfs_ooo_clear(nfsi); + } trace_nfs_set_cache_invalid(inode, 0); } EXPORT_SYMBOL_GPL(nfs_set_cache_invalid); @@ -236,19 +236,17 @@ static void nfs_zap_caches_locked(struct inode *inode) nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; - if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_XATTR - | NFS_INO_REVAL_PAGECACHE); - } else - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_XATTR - | NFS_INO_REVAL_PAGECACHE); + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | + NFS_INO_INVALID_DATA | + NFS_INO_INVALID_ACCESS | + NFS_INO_INVALID_ACL | + NFS_INO_INVALID_XATTR); + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | + NFS_INO_INVALID_ACCESS | + NFS_INO_INVALID_ACL | + NFS_INO_INVALID_XATTR); nfs_zap_label_cache_locked(nfsi); } @@ -283,6 +281,8 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache); void nfs_invalidate_atime(struct inode *inode) { + if (nfs_have_delegated_atime(inode)) + return; spin_lock(&inode->i_lock); nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); spin_unlock(&inode->i_lock); @@ -321,7 +321,7 @@ struct nfs_find_desc { static int nfs_find_actor(struct inode *inode, void *opaque) { - struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; + struct nfs_find_desc *desc = opaque; struct nfs_fh *fh = desc->fh; struct nfs_fattr *fattr = desc->fattr; @@ -339,7 +339,7 @@ nfs_find_actor(struct inode *inode, void *opaque) static int nfs_init_locked(struct inode *inode, void *opaque) { - struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; + struct nfs_find_desc *desc = opaque; struct nfs_fattr *fattr = desc->fattr; set_nfs_fileid(inode, fattr->fileid); @@ -429,6 +429,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) static void nfs_inode_init_regular(struct nfs_inode *nfsi) { atomic_long_set(&nfsi->nrequests, 0); + atomic_long_set(&nfsi->redirtied_pages, 0); INIT_LIST_HEAD(&nfsi->commit_info.list); atomic_long_set(&nfsi->commit_info.ncommit, 0); atomic_set(&nfsi->commit_info.rpcs_out, 0); @@ -474,7 +475,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) goto out_no_inode; } - if (inode->i_state & I_NEW) { + if (inode_state_read_once(inode) & I_NEW) { struct nfs_inode *nfsi = NFS_I(inode); unsigned long now = jiffies; @@ -497,6 +498,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; inode->i_data.a_ops = &nfs_file_aops; nfs_inode_init_regular(nfsi); + mapping_set_large_folios(inode->i_mapping); } else if (S_ISDIR(inode->i_mode)) { inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; @@ -518,9 +520,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) } else init_special_inode(inode, inode->i_mode, fattr->rdev); - memset(&inode->i_atime, 0, sizeof(inode->i_atime)); - memset(&inode->i_mtime, 0, sizeof(inode->i_mtime)); - memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); + inode_set_atime(inode, 0, 0); + inode_set_mtime(inode, 0, 0); + inode_set_ctime(inode, 0, 0); + memset(&nfsi->btime, 0, sizeof(nfsi->btime)); inode_set_iversion_raw(inode, 0); inode->i_size = 0; clear_nlink(inode); @@ -533,17 +536,21 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) nfsi->read_cache_jiffies = fattr->time_start; nfsi->attr_gencount = fattr->gencount; if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = fattr->atime; + inode_set_atime_to_ts(inode, fattr->atime); else if (fattr_supported & NFS_ATTR_FATTR_ATIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = fattr->mtime; + inode_set_mtime_to_ts(inode, fattr->mtime); else if (fattr_supported & NFS_ATTR_FATTR_MTIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode_set_ctime_to_ts(inode, fattr->ctime); else if (fattr_supported & NFS_ATTR_FATTR_CTIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); + if (fattr->valid & NFS_ATTR_FATTR_BTIME) + nfsi->btime = fattr->btime; + else if (fattr_supported & NFS_ATTR_FATTR_BTIME) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BTIME); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode_set_iversion_raw(inode, fattr->change_attr); else @@ -556,6 +563,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) set_nlink(inode, fattr->nlink); else if (fattr_supported & NFS_ATTR_FATTR_NLINK) nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK); + else + set_nlink(inode, 1); if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; else if (fattr_supported & NFS_ATTR_FATTR_OWNER) @@ -564,8 +573,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_gid = fattr->gid; else if (fattr_supported & NFS_ATTR_FATTR_GROUP) nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); - if (nfs_server_capable(inode, NFS_CAP_XATTR)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED && @@ -601,7 +608,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), nfs_display_fhandle_hash(fh), - atomic_read(&inode->i_count)); + icount_read(inode)); out: return inode; @@ -612,15 +619,107 @@ out_no_inode: } EXPORT_SYMBOL_GPL(nfs_fhget); +static void +nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr) +{ + unsigned long cache_validity = NFS_I(inode)->cache_validity; + + if (nfs_have_delegated_mtime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_CTIME)) + fattr->valid &= ~(NFS_ATTR_FATTR_PRECTIME | + NFS_ATTR_FATTR_CTIME); + + if (!(cache_validity & NFS_INO_INVALID_MTIME)) + fattr->valid &= ~(NFS_ATTR_FATTR_PREMTIME | + NFS_ATTR_FATTR_MTIME); + + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + fattr->valid &= ~NFS_ATTR_FATTR_ATIME; + } else if (nfs_have_delegated_atime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + fattr->valid &= ~NFS_ATTR_FATTR_ATIME; + } +} + +static void nfs_set_timestamps_to_ts(struct inode *inode, struct iattr *attr) +{ + unsigned int cache_flags = 0; + + if (attr->ia_valid & ATTR_MTIME_SET) { + struct timespec64 ctime = inode_get_ctime(inode); + struct timespec64 mtime = inode_get_mtime(inode); + struct timespec64 now; + int updated = 0; + + now = inode_set_ctime_current(inode); + if (!timespec64_equal(&now, &ctime)) + updated |= S_CTIME; + + inode_set_mtime_to_ts(inode, attr->ia_mtime); + if (!timespec64_equal(&now, &mtime)) + updated |= S_MTIME; + + inode_maybe_inc_iversion(inode, updated); + cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + } + if (attr->ia_valid & ATTR_ATIME_SET) { + inode_set_atime_to_ts(inode, attr->ia_atime); + cache_flags |= NFS_INO_INVALID_ATIME; + } + NFS_I(inode)->cache_validity &= ~cache_flags; +} + +static void nfs_update_timestamps(struct inode *inode, unsigned int ia_valid) +{ + enum file_time_flags time_flags = 0; + unsigned int cache_flags = 0; + + if (ia_valid & ATTR_MTIME) { + time_flags |= S_MTIME | S_CTIME; + cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + } + if (ia_valid & ATTR_ATIME) { + time_flags |= S_ATIME; + cache_flags |= NFS_INO_INVALID_ATIME; + } + inode_update_timestamps(inode, time_flags); + NFS_I(inode)->cache_validity &= ~cache_flags; +} + +void nfs_update_delegated_atime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (nfs_have_delegated_atime(inode)) + nfs_update_timestamps(inode, ATTR_ATIME); + spin_unlock(&inode->i_lock); +} + +void nfs_update_delegated_mtime_locked(struct inode *inode) +{ + if (nfs_have_delegated_mtime(inode)) + nfs_update_timestamps(inode, ATTR_MTIME); +} + +void nfs_update_delegated_mtime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + nfs_update_delegated_mtime_locked(inode); + spin_unlock(&inode->i_lock); +} +EXPORT_SYMBOL_GPL(nfs_update_delegated_mtime); + #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) int -nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); struct nfs_fattr *fattr; + loff_t oldsize = i_size_read(inode); int error = 0; + kuid_t task_uid = current_fsuid(); + kuid_t owner_uid = inode->i_uid; nfs_inc_stats(inode, NFSIOS_VFSSETATTR); @@ -635,10 +734,39 @@ nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (error) return error; - if (attr->ia_size == i_size_read(inode)) + if (attr->ia_size == oldsize) attr->ia_valid &= ~ATTR_SIZE; } + if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) { + spin_lock(&inode->i_lock); + if (attr->ia_valid & ATTR_MTIME_SET) { + if (uid_eq(task_uid, owner_uid)) { + nfs_set_timestamps_to_ts(inode, attr); + attr->ia_valid &= ~(ATTR_MTIME|ATTR_MTIME_SET| + ATTR_ATIME|ATTR_ATIME_SET); + } + } else { + nfs_update_timestamps(inode, attr->ia_valid); + attr->ia_valid &= ~(ATTR_MTIME|ATTR_ATIME); + } + spin_unlock(&inode->i_lock); + } else if (nfs_have_delegated_atime(inode) && + attr->ia_valid & ATTR_ATIME && + !(attr->ia_valid & ATTR_MTIME)) { + if (attr->ia_valid & ATTR_ATIME_SET) { + if (uid_eq(task_uid, owner_uid)) { + spin_lock(&inode->i_lock); + nfs_set_timestamps_to_ts(inode, attr); + spin_unlock(&inode->i_lock); + attr->ia_valid &= ~(ATTR_ATIME|ATTR_ATIME_SET); + } + } else { + nfs_update_delegated_atime(inode); + attr->ia_valid &= ~ATTR_ATIME; + } + } + /* Optimization: if the end result is no change, don't RPC */ if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; @@ -646,8 +774,10 @@ nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, trace_nfs_setattr_enter(inode); /* Write all dirty data */ - if (S_ISREG(inode->i_mode)) + if (S_ISREG(inode->i_mode)) { + nfs_file_block_o_direct(NFS_I(inode)); nfs_sync_inode(inode); + } fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); if (fattr == NULL) { @@ -656,8 +786,12 @@ nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, } error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); - if (error == 0) + if (error == 0) { + if (attr->ia_valid & ATTR_SIZE) + nfs_truncate_last_folio(inode->i_mapping, oldsize, + attr->ia_size); error = nfs_refresh_inode(inode, fattr); + } nfs_free_fattr(fattr); out: trace_nfs_setattr_exit(inode, error); @@ -686,13 +820,15 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) trace_nfs_size_truncate(inode, offset); i_size_write(inode, offset); /* Optimisation */ - if (offset == 0) - NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_DATA | - NFS_INO_DATA_INVAL_DEFER); + if (offset == 0) { + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; + nfs_ooo_clear(NFS_I(inode)); + } NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; spin_unlock(&inode->i_lock); truncate_pagecache(inode, offset); + nfs_update_delegated_mtime_locked(inode); spin_lock(&inode->i_lock); out: return err; @@ -716,8 +852,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, spin_lock(&inode->i_lock); NFS_I(inode)->attr_gencount = fattr->gencount; if ((attr->ia_valid & ATTR_SIZE) != 0) { - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME | - NFS_INO_INVALID_BLOCKS); + if (!nfs_have_delegated_mtime(inode)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_vmtruncate(inode, attr->ia_size); } @@ -726,9 +863,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, if ((attr->ia_valid & ATTR_KILL_SUID) != 0 && inode->i_mode & S_ISUID) inode->i_mode &= ~S_ISUID; - if ((attr->ia_valid & ATTR_KILL_SGID) != 0 && - (inode->i_mode & (S_ISGID | S_IXGRP)) == - (S_ISGID | S_IXGRP)) + if (setattr_should_drop_sgid(&nop_mnt_idmap, inode)) inode->i_mode &= ~S_ISGID; if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; @@ -740,7 +875,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode_set_ctime_to_ts(inode, fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -751,14 +886,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = fattr->atime; + inode_set_atime_to_ts(inode, fattr->atime); else if (attr->ia_valid & ATTR_ATIME_SET) - inode->i_atime = attr->ia_atime; + inode_set_atime_to_ts(inode, attr->ia_atime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode_set_ctime_to_ts(inode, fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -767,14 +902,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = fattr->mtime; + inode_set_mtime_to_ts(inode, fattr->mtime); else if (attr->ia_valid & ATTR_MTIME_SET) - inode->i_mtime = attr->ia_mtime; + inode_set_mtime_to_ts(inode, attr->ia_mtime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode_set_ctime_to_ts(inode, fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -785,30 +920,37 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); -static void nfs_readdirplus_parent_cache_miss(struct dentry *dentry) +/* + * Don't request help from readdirplus if the file is being written to, + * or if attribute caching is turned off + */ +static bool nfs_getattr_readdirplus_enable(const struct inode *inode) { - struct dentry *parent; + return nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && + !nfs_have_writebacks(inode) && NFS_MAXATTRTIMEO(inode) > 5 * HZ; +} - if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS)) - return; - parent = dget_parent(dentry); - nfs_force_use_readdirplus(d_inode(parent)); - dput(parent); +static void nfs_readdirplus_parent_cache_miss(struct dentry *dentry) +{ + if (!IS_ROOT(dentry)) { + struct dentry *parent = dget_parent(dentry); + nfs_readdir_record_entry_cache_miss(d_inode(parent)); + dput(parent); + } } static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry) { - struct dentry *parent; - - if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS)) - return; - parent = dget_parent(dentry); - nfs_advise_use_readdirplus(d_inode(parent)); - dput(parent); + if (!IS_ROOT(dentry)) { + struct dentry *parent = dget_parent(dentry); + nfs_readdir_record_entry_cache_hit(d_inode(parent)); + dput(parent); + } } static u32 nfs_get_valid_attrmask(struct inode *inode) { + u64 fattr_valid = NFS_SERVER(inode)->fattr_valid; unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); u32 reply_mask = STATX_INO | STATX_TYPE; @@ -828,36 +970,49 @@ static u32 nfs_get_valid_attrmask(struct inode *inode) reply_mask |= STATX_UID | STATX_GID; if (!(cache_validity & NFS_INO_INVALID_BLOCKS)) reply_mask |= STATX_BLOCKS; + if (!(cache_validity & NFS_INO_INVALID_BTIME) && + (fattr_valid & NFS_ATTR_FATTR_BTIME)) + reply_mask |= STATX_BTIME; + if (!(cache_validity & NFS_INO_INVALID_CHANGE)) + reply_mask |= STATX_CHANGE_COOKIE; return reply_mask; } -int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct nfs_server *server = NFS_SERVER(inode); + u64 fattr_valid = server->fattr_valid; unsigned long cache_validity; int err = 0; bool force_sync = query_flags & AT_STATX_FORCE_SYNC; bool do_update = false; + bool readdirplus_enabled = nfs_getattr_readdirplus_enable(inode); trace_nfs_getattr_enter(inode); request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID | STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME | - STATX_INO | STATX_SIZE | STATX_BLOCKS; + STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME | + STATX_CHANGE_COOKIE; + + if (!(fattr_valid & NFS_ATTR_FATTR_BTIME)) + request_mask &= ~STATX_BTIME; if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) { - nfs_readdirplus_parent_cache_hit(path->dentry); + if (readdirplus_enabled) + nfs_readdirplus_parent_cache_hit(path->dentry); goto out_no_revalidate; } - /* Flush out writes to the server in order to update c/mtime. */ - if ((request_mask & (STATX_CTIME|STATX_MTIME)) && - S_ISREG(inode->i_mode)) { - err = filemap_write_and_wait(inode->i_mapping); - if (err) - goto out; + /* Flush out writes to the server in order to update c/mtime/version. */ + if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) && + S_ISREG(inode->i_mode)) { + if (nfs_have_delegated_mtime(inode)) + filemap_fdatawrite(inode->i_mapping); + else + filemap_write_and_wait(inode->i_mapping); } /* @@ -876,7 +1031,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, /* Is the user requesting attributes that might need revalidation? */ if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME| STATX_MTIME|STATX_UID|STATX_GID| - STATX_SIZE|STATX_BLOCKS))) + STATX_SIZE|STATX_BLOCKS|STATX_BTIME| + STATX_CHANGE_COOKIE))) goto out_no_revalidate; /* Check whether the cached attributes are stale */ @@ -899,26 +1055,45 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, do_update |= cache_validity & NFS_INO_INVALID_OTHER; if (request_mask & STATX_BLOCKS) do_update |= cache_validity & NFS_INO_INVALID_BLOCKS; + if (request_mask & STATX_BTIME) + do_update |= cache_validity & NFS_INO_INVALID_BTIME; if (do_update) { - /* Update the attribute cache */ - if (!(server->flags & NFS_MOUNT_NOAC)) + if (readdirplus_enabled) nfs_readdirplus_parent_cache_miss(path->dentry); - else - nfs_readdirplus_parent_cache_hit(path->dentry); err = __nfs_revalidate_inode(server, inode); if (err) goto out; - } else + } else if (readdirplus_enabled) nfs_readdirplus_parent_cache_hit(path->dentry); out_no_revalidate: /* Only return attributes that were revalidated. */ stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask; - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); + stat->change_cookie = inode_peek_iversion_raw(inode); + stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC; + if (server->change_attr_type != NFS4_CHANGE_TYPE_IS_UNDEFINED) + stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC; if (S_ISDIR(inode->i_mode)) stat->blksize = NFS_SERVER(inode)->dtsize; + stat->btime = NFS_I(inode)->btime; + + /* Special handling for STATX_DIOALIGN and STATX_DIO_READ_ALIGN + * - NFS doesn't have DIO alignment constraints, avoid getting + * these DIO attrs from remote and just respond with most + * accommodating limits (so client will issue supported DIO). + * - this is unintuitive, but the most coarse-grained + * dio_offset_align is the most accommodating. + */ + if ((request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN)) && + S_ISREG(inode->i_mode)) { + stat->result_mask |= STATX_DIOALIGN | STATX_DIO_READ_ALIGN; + stat->dio_mem_align = 4; /* 4-byte alignment */ + stat->dio_offset_align = PAGE_SIZE; + stat->dio_read_offset_align = stat->dio_offset_align; + } out: trace_nfs_getattr_exit(inode, err); return err; @@ -955,7 +1130,7 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) res = __nfs_find_lock_context(ctx); rcu_read_unlock(); if (res == NULL) { - new = kmalloc(sizeof(*new), GFP_KERNEL); + new = kmalloc(sizeof(*new), GFP_KERNEL_ACCOUNT); if (new == NULL) return ERR_PTR(-ENOMEM); nfs_init_lock_context(new); @@ -1011,7 +1186,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) if (!is_sync) return; inode = d_inode(ctx->dentry); - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_read_or_write_delegation(inode)) return; nfsi = NFS_I(inode); if (inode->i_mapping->nrpages == 0) @@ -1033,7 +1208,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, { struct nfs_open_context *ctx; - ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT); if (!ctx) return ERR_PTR(-ENOMEM); nfs_sb_active(dentry->d_sb); @@ -1052,6 +1227,8 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, ctx->lock_context.open_context = ctx; INIT_LIST_HEAD(&ctx->list); ctx->mdsthreshold = NULL; + nfs_localio_file_init(&ctx->nfl); + return ctx; } EXPORT_SYMBOL_GPL(alloc_nfs_open_context); @@ -1083,6 +1260,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) nfs_sb_deactive(sb); put_rpccred(rcu_dereference_protected(ctx->ll_cred, 1)); kfree(ctx->mdsthreshold); + nfs_close_local_fh(&ctx->nfl); kfree_rcu(ctx, rcu_head); } @@ -1108,7 +1286,7 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx) spin_lock(&inode->i_lock); if (list_empty(&nfsi->open_files) && - (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) + nfs_ooo_test(nfsi)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA | NFS_INO_REVAL_FORCED); list_add_tail_rcu(&ctx->list, &nfsi->open_files); @@ -1175,7 +1353,8 @@ int nfs_open(struct inode *inode, struct file *filp) { struct nfs_open_context *ctx; - ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp); + ctx = alloc_nfs_open_context(file_dentry(filp), + flags_to_mode(filp->f_flags), filp); if (IS_ERR(ctx)) return PTR_ERR(ctx); nfs_file_set_open_context(filp, ctx); @@ -1183,7 +1362,6 @@ int nfs_open(struct inode *inode, struct file *filp) nfs_fscache_open_file(inode, filp); return 0; } -EXPORT_SYMBOL_GPL(nfs_open); /* * This function is called whenever some part of NFS notices that @@ -1335,9 +1513,17 @@ int nfs_clear_invalid_mapping(struct address_space *mapping) */ for (;;) { ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING, - nfs_wait_bit_killable, TASK_KILLABLE); + nfs_wait_bit_killable, + TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); if (ret) goto out; + smp_rmb(); /* pairs with smp_wmb() below */ + if (test_bit(NFS_INO_INVALIDATING, bitlock)) + continue; + /* pairs with nfs_set_cache_invalid()'s smp_store_release() */ + if (!(smp_load_acquire(&nfsi->cache_validity) & NFS_INO_INVALID_DATA)) + goto out; + /* Slow-path that double-checks with spinlock held */ spin_lock(&inode->i_lock); if (test_bit(NFS_INO_INVALIDATING, bitlock)) { spin_unlock(&inode->i_lock); @@ -1351,8 +1537,8 @@ int nfs_clear_invalid_mapping(struct address_space *mapping) set_bit(NFS_INO_INVALIDATING, bitlock); smp_wmb(); - nfsi->cache_validity &= - ~(NFS_INO_INVALID_DATA | NFS_INO_DATA_INVAL_DEFER); + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + nfs_ooo_clear(nfsi); spin_unlock(&inode->i_lock); trace_nfs_invalidate_mapping_enter(inode); ret = nfs_invalidate_mapping(inode, mapping); @@ -1442,18 +1628,18 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); } /* If we have atomic WCC data, we may update some attributes */ - ts = inode->i_ctime; + ts = inode_get_ctime(inode); if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) && (fattr->valid & NFS_ATTR_FATTR_CTIME) && timespec64_equal(&ts, &fattr->pre_ctime)) { - inode->i_ctime = fattr->ctime; + inode_set_ctime_to_ts(inode, fattr->ctime); } - ts = inode->i_mtime; + ts = inode_get_mtime(inode); if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) && (fattr->valid & NFS_ATTR_FATTR_MTIME) && timespec64_equal(&ts, &fattr->pre_mtime)) { - inode->i_mtime = fattr->mtime; + inode_set_mtime_to_ts(inode, fattr->mtime); } if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) @@ -1480,7 +1666,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat unsigned long invalid = 0; struct timespec64 ts; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_delegated_attributes(inode)) return 0; if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) { @@ -1504,11 +1690,11 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr)) invalid |= NFS_INO_INVALID_CHANGE; - ts = inode->i_mtime; + ts = inode_get_mtime(inode); if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime)) invalid |= NFS_INO_INVALID_MTIME; - ts = inode->i_ctime; + ts = inode_get_ctime(inode); if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime)) invalid |= NFS_INO_INVALID_CTIME; @@ -1532,7 +1718,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) invalid |= NFS_INO_INVALID_NLINK; - ts = inode->i_atime; + ts = inode_get_atime(inode); if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime)) invalid |= NFS_INO_INVALID_ATIME; @@ -1563,6 +1749,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr) fattr->gencount = nfs_inc_attr_generation_counter(); fattr->owner_name = NULL; fattr->group_name = NULL; + fattr->mdsthreshold = NULL; } EXPORT_SYMBOL_GPL(nfs_fattr_init); @@ -1586,7 +1773,7 @@ struct nfs_fattr *nfs_alloc_fattr(void) { struct nfs_fattr *fattr; - fattr = kmalloc(sizeof(*fattr), GFP_NOFS); + fattr = kmalloc(sizeof(*fattr), GFP_KERNEL); if (fattr != NULL) { nfs_fattr_init(fattr); fattr->label = NULL; @@ -1602,7 +1789,7 @@ struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server) if (!fattr) return NULL; - fattr->label = nfs4_label_alloc(server, GFP_NOFS); + fattr->label = nfs4_label_alloc(server, GFP_KERNEL); if (IS_ERR(fattr->label)) { kfree(fattr); return NULL; @@ -1616,7 +1803,7 @@ struct nfs_fh *nfs_alloc_fhandle(void) { struct nfs_fh *fh; - fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS); + fh = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); if (fh != NULL) fh->size = 0; return fh; @@ -1801,7 +1988,7 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr, NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE | NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER | - NFS_INO_INVALID_NLINK; + NFS_INO_INVALID_NLINK | NFS_INO_INVALID_BTIME; unsigned long cache_validity = NFS_I(inode)->cache_validity; enum nfs4_change_attr_type ctype = NFS_SERVER(inode)->change_attr_type; @@ -1814,6 +2001,66 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr, return 0; } +static void nfs_ooo_merge(struct nfs_inode *nfsi, + u64 start, u64 end) +{ + int i, cnt; + + if (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER) + /* No point merging anything */ + return; + + if (!nfsi->ooo) { + nfsi->ooo = kmalloc(sizeof(*nfsi->ooo), GFP_ATOMIC); + if (!nfsi->ooo) { + nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; + return; + } + nfsi->ooo->cnt = 0; + } + + /* add this range, merging if possible */ + cnt = nfsi->ooo->cnt; + for (i = 0; i < cnt; i++) { + if (end == nfsi->ooo->gap[i].start) + end = nfsi->ooo->gap[i].end; + else if (start == nfsi->ooo->gap[i].end) + start = nfsi->ooo->gap[i].start; + else + continue; + /* Remove 'i' from table and loop to insert the new range */ + cnt -= 1; + nfsi->ooo->gap[i] = nfsi->ooo->gap[cnt]; + i = -1; + } + if (start != end) { + if (cnt >= ARRAY_SIZE(nfsi->ooo->gap)) { + nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; + kfree(nfsi->ooo); + nfsi->ooo = NULL; + return; + } + nfsi->ooo->gap[cnt].start = start; + nfsi->ooo->gap[cnt].end = end; + cnt += 1; + } + nfsi->ooo->cnt = cnt; +} + +static void nfs_ooo_record(struct nfs_inode *nfsi, + struct nfs_fattr *fattr) +{ + /* This reply was out-of-order, so record in the + * pre/post change id, possibly cancelling + * gaps created when iversion was jumpped forward. + */ + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) && + (fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) + nfs_ooo_merge(nfsi, + fattr->change_attr, + fattr->pre_change_attr); +} + static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) { @@ -1824,8 +2071,12 @@ static int nfs_refresh_inode_locked(struct inode *inode, if (attr_cmp > 0 || nfs_inode_finish_partial_attr_update(fattr, inode)) ret = nfs_update_inode(inode, fattr); - else if (attr_cmp == 0) - ret = nfs_check_inode_attributes(inode, fattr); + else { + nfs_ooo_record(NFS_I(inode), fattr); + + if (attr_cmp == 0) + ret = nfs_check_inode_attributes(inode, fattr); + } trace_nfs_refresh_inode_exit(inode, ret); return ret; @@ -1916,6 +2167,8 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa if (attr_cmp < 0) return 0; if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !attr_cmp) { + /* Record the pre/post change info before clearing PRECHANGE */ + nfs_ooo_record(NFS_I(inode), fattr); fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE | NFS_ATTR_FATTR_PRESIZE | NFS_ATTR_FATTR_PREMTIME @@ -1929,12 +2182,12 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa } if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) { - fattr->pre_ctime = inode->i_ctime; + fattr->pre_ctime = inode_get_ctime(inode); fattr->valid |= NFS_ATTR_FATTR_PRECTIME; } if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) { - fattr->pre_mtime = inode->i_mtime; + fattr->pre_mtime = inode_get_mtime(inode); fattr->valid |= NFS_ATTR_FATTR_PREMTIME; } if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 && @@ -2001,10 +2254,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) bool attr_changed = false; bool have_delegation; - dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", + dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%llx)\n", __func__, inode->i_sb->s_id, inode->i_ino, nfs_display_fhandle_hash(NFS_FH(inode)), - atomic_read(&inode->i_count), fattr->valid); + icount_read(inode), fattr->valid); if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) { /* Only a mounted-on-fileid? Just exit */ @@ -2050,6 +2303,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ nfsi->read_cache_jiffies = fattr->time_start; + /* Fix up any delegated attributes in the struct nfs_fattr */ + nfs_fattr_fixup_delegated(inode, fattr); + save_cache_validity = nfsi->cache_validity; nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME @@ -2070,6 +2326,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { + if (!have_writers && nfsi->ooo && nfsi->ooo->cnt == 1 && + nfsi->ooo->gap[0].end == inode_peek_iversion_raw(inode)) { + /* There is one remaining gap that hasn't been + * merged into iversion - do that now. + */ + inode_set_iversion_raw(inode, nfsi->ooo->gap[0].start); + kfree(nfsi->ooo); + nfsi->ooo = NULL; + } if (!inode_eq_iversion_raw(inode, fattr->change_attr)) { /* Could it be a race with writeback? */ if (!(have_writers || have_delegation)) { @@ -2084,15 +2349,19 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK | NFS_INO_INVALID_MODE - | NFS_INO_INVALID_OTHER; + | NFS_INO_INVALID_OTHER + | NFS_INO_INVALID_BTIME; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); attr_changed = true; dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); - } else if (!have_delegation) - nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; + } else if (!have_delegation) { + nfs_ooo_record(nfsi, fattr); + nfs_ooo_merge(nfsi, inode_peek_iversion_raw(inode), + fattr->change_attr); + } inode_set_iversion_raw(inode, fattr->change_attr); } } else { @@ -2104,17 +2373,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = fattr->mtime; + inode_set_mtime_to_ts(inode, fattr->mtime); else if (fattr_supported & NFS_ATTR_FATTR_MTIME) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_MTIME; if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = fattr->ctime; + inode_set_ctime_to_ts(inode, fattr->ctime); else if (fattr_supported & NFS_ATTR_FATTR_CTIME) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_CTIME; + if (fattr->valid & NFS_ATTR_FATTR_BTIME) + nfsi->btime = fattr->btime; + else if (fattr_supported & NFS_ATTR_FATTR_BTIME) + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_BTIME; + /* Check if our cached file size is stale */ if (fattr->valid & NFS_ATTR_FATTR_SIZE) { new_isize = nfs_size_to_loff_t(fattr->size); @@ -2140,7 +2415,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) save_cache_validity & NFS_INO_INVALID_SIZE; if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = fattr->atime; + inode_set_atime_to_ts(inode, fattr->atime); else if (fattr_supported & NFS_ATTR_FATTR_ATIME) nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATIME; @@ -2241,23 +2516,27 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) struct inode *nfs_alloc_inode(struct super_block *sb) { struct nfs_inode *nfsi; - nfsi = kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL); + nfsi = alloc_inode_sb(sb, nfs_inode_cachep, GFP_KERNEL); if (!nfsi) return NULL; nfsi->flags = 0UL; nfsi->cache_validity = 0UL; + nfsi->ooo = NULL; #if IS_ENABLED(CONFIG_NFS_V4) nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_V4_2 nfsi->xattr_cache = NULL; #endif + nfs_netfs_inode_init(nfsi); + return &nfsi->vfs_inode; } EXPORT_SYMBOL_GPL(nfs_alloc_inode); void nfs_free_inode(struct inode *inode) { + kfree(NFS_I(inode)->ooo); kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } EXPORT_SYMBOL_GPL(nfs_free_inode); @@ -2274,7 +2553,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) static void init_once(void *foo) { - struct nfs_inode *nfsi = (struct nfs_inode *) foo; + struct nfs_inode *nfsi = foo; inode_init_once(&nfsi->vfs_inode); INIT_LIST_HEAD(&nfsi->open_files); @@ -2288,7 +2567,7 @@ static int __init nfs_init_inodecache(void) nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", sizeof(struct nfs_inode), 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), + SLAB_ACCOUNT), init_once); if (nfs_inode_cachep == NULL) return -ENOMEM; @@ -2306,35 +2585,54 @@ static void nfs_destroy_inodecache(void) kmem_cache_destroy(nfs_inode_cachep); } +struct workqueue_struct *nfslocaliod_workqueue; struct workqueue_struct *nfsiod_workqueue; EXPORT_SYMBOL_GPL(nfsiod_workqueue); /* - * start up the nfsiod workqueue + * Destroy the nfsiod workqueues */ -static int nfsiod_start(void) +static void nfsiod_stop(void) { struct workqueue_struct *wq; - dprintk("RPC: creating workqueue nfsiod\n"); - wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); - if (wq == NULL) - return -ENOMEM; - nfsiod_workqueue = wq; - return 0; + + wq = nfsiod_workqueue; + if (wq != NULL) { + nfsiod_workqueue = NULL; + destroy_workqueue(wq); + } +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + wq = nfslocaliod_workqueue; + if (wq != NULL) { + nfslocaliod_workqueue = NULL; + destroy_workqueue(wq); + } +#endif /* CONFIG_NFS_LOCALIO */ } /* - * Destroy the nfsiod workqueue + * Start the nfsiod workqueues */ -static void nfsiod_stop(void) +static int nfsiod_start(void) { - struct workqueue_struct *wq; - - wq = nfsiod_workqueue; - if (wq == NULL) - return; - nfsiod_workqueue = NULL; - destroy_workqueue(wq); + dprintk("RPC: creating workqueue nfsiod\n"); + nfsiod_workqueue = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); + if (nfsiod_workqueue == NULL) + return -ENOMEM; +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + /* + * localio writes need to use a normal (non-memreclaim) workqueue. + * When we start getting low on space, XFS goes and calls flush_work() on + * a non-memreclaim work queue, which causes a priority inversion problem. + */ + dprintk("RPC: creating workqueue nfslocaliod\n"); + nfslocaliod_workqueue = alloc_workqueue("nfslocaliod", WQ_UNBOUND, 0); + if (unlikely(nfslocaliod_workqueue == NULL)) { + nfsiod_stop(); + return -ENOMEM; + } +#endif /* CONFIG_NFS_LOCALIO */ + return 0; } unsigned int nfs_net_id; @@ -2342,12 +2640,32 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { + struct nfs_net *nn = net_generic(net, nfs_net_id); + int err; + nfs_clients_init(net); - return nfs_fs_proc_net_init(net); + + if (!rpc_proc_register(net, &nn->rpcstats)) { + err = -ENOMEM; + goto err_proc_rpc; + } + + err = nfs_fs_proc_net_init(net); + if (err) + goto err_proc_nfs; + + return 0; + +err_proc_nfs: + rpc_proc_unregister(net, "nfs"); +err_proc_rpc: + nfs_clients_exit(net); + return err; } static void nfs_net_exit(struct net *net) { + rpc_proc_unregister(net, "nfs"); nfs_fs_proc_net_exit(net); nfs_clients_exit(net); } @@ -2359,6 +2677,35 @@ static struct pernet_operations nfs_net_ops = { .size = sizeof(struct nfs_net), }; +#ifdef CONFIG_KEYS +static struct key *nfs_keyring; + +static int __init nfs_init_keyring(void) +{ + nfs_keyring = keyring_alloc(".nfs", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + current_cred(), + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + (KEY_USR_ALL & ~KEY_USR_SETATTR), + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); + return PTR_ERR_OR_ZERO(nfs_keyring); +} + +static void nfs_exit_keyring(void) +{ + key_put(nfs_keyring); +} +#else +static inline int nfs_init_keyring(void) +{ + return 0; +} + +static inline void nfs_exit_keyring(void) +{ +} +#endif /* CONFIG_KEYS */ + /* * Initialize NFS */ @@ -2366,6 +2713,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfs_init_keyring(); + if (err) + return err; + err = nfs_sysfs_init(); if (err < 0) goto out10; @@ -2402,15 +2753,12 @@ static int __init init_nfs_fs(void) if (err) goto out1; - rpc_proc_register(&init_net, &nfs_rpcstat); - err = register_nfs_fs(); if (err) goto out0; return 0; out0: - rpc_proc_unregister(&init_net, "nfs"); nfs_destroy_directcache(); out1: nfs_destroy_writepagecache(); @@ -2429,6 +2777,7 @@ out7: out9: nfs_sysfs_exit(); out10: + nfs_exit_keyring(); return err; } @@ -2440,15 +2789,16 @@ static void __exit exit_nfs_fs(void) nfs_destroy_inodecache(); nfs_destroy_nfspagecache(); unregister_pernet_subsys(&nfs_net_ops); - rpc_proc_unregister(&init_net, "nfs"); unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); nfs_sysfs_exit(); + nfs_exit_keyring(); } /* Not quite true; I just maintain it */ MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); +MODULE_DESCRIPTION("NFS client support"); MODULE_LICENSE("GPL"); module_param(enable_ino64, bool, 0644); |
