diff options
Diffstat (limited to 'fs/nfs/inode.c')
| -rw-r--r-- | fs/nfs/inode.c | 1480 |
1 files changed, 1008 insertions, 472 deletions
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 094775ea0781..f76fe406937a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/inode.c * @@ -50,6 +51,7 @@ #include "pnfs.h" #include "nfs.h" #include "netns.h" +#include "sysfs.h" #include "nfstrace.h" @@ -60,7 +62,6 @@ /* Default is to see 64-bit inode numbers */ static bool enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; -static void nfs_invalidate_inode(struct inode *); static int nfs_update_inode(struct inode *, struct nfs_fattr *); static struct kmem_cache * nfs_inode_cachep; @@ -71,18 +72,15 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) return nfs_fileid_to_ino_t(fattr->fileid); } -static int nfs_wait_killable(int mode) +int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) { - freezable_schedule_unsafe(); + if (unlikely(nfs_current_task_exiting())) + return -EINTR; + schedule(); if (signal_pending_state(mode, current)) return -ERESTARTSYS; return 0; } - -int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) -{ - return nfs_wait_killable(mode); -} EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); /** @@ -110,7 +108,7 @@ u64 nfs_compat_user_ino64(u64 fileid) int nfs_drop_inode(struct inode *inode) { - return NFS_STALE(inode) || generic_drop_inode(inode); + return NFS_STALE(inode) || inode_generic_drop(inode); } EXPORT_SYMBOL_GPL(nfs_drop_inode); @@ -143,6 +141,7 @@ EXPORT_SYMBOL_GPL(nfs_sync_inode); /** * nfs_sync_mapping - helper to flush all mmapped dirty data to disk + * @mapping: pointer to struct address_space */ int nfs_sync_mapping(struct address_space *mapping) { @@ -162,55 +161,67 @@ static int nfs_attribute_timeout(struct inode *inode) return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); } -static bool nfs_check_cache_invalid_delegated(struct inode *inode, unsigned long flags) +static bool nfs_check_cache_flags_invalid(struct inode *inode, + unsigned long flags) { unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); - /* Special case for the pagecache or access cache */ - if (flags == NFS_INO_REVAL_PAGECACHE && - !(cache_validity & NFS_INO_REVAL_FORCED)) - return false; return (cache_validity & flags) != 0; } -static bool nfs_check_cache_invalid_not_delegated(struct inode *inode, unsigned long flags) +bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) { - unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); - - if ((cache_validity & flags) != 0) + if (nfs_check_cache_flags_invalid(inode, flags)) return true; - if (nfs_attribute_timeout(inode)) - return true; - return false; + return nfs_attribute_cache_expired(inode); } +EXPORT_SYMBOL_GPL(nfs_check_cache_invalid); -bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) +#ifdef CONFIG_NFS_V4_2 +static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi) { - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) - return nfs_check_cache_invalid_delegated(inode, flags); - - return nfs_check_cache_invalid_not_delegated(inode, flags); + return nfsi->xattr_cache != NULL; } +#else +static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi) +{ + return false; +} +#endif -static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) +void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); - bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ); - if (have_delegation) { + if (nfs_have_delegated_attributes(inode)) { if (!(flags & NFS_INO_REVAL_FORCED)) - flags &= ~NFS_INO_INVALID_OTHER; - flags &= ~(NFS_INO_INVALID_CHANGE - | NFS_INO_INVALID_SIZE - | NFS_INO_REVAL_PAGECACHE); + flags &= ~(NFS_INO_INVALID_MODE | + NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_BTIME | + NFS_INO_INVALID_XATTR); + flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE); } + if (!nfs_has_xattr_cache(nfsi)) + flags &= ~NFS_INO_INVALID_XATTR; + if (flags & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode, 0); + flags &= ~NFS_INO_REVAL_FORCED; + + flags |= nfsi->cache_validity; if (inode->i_mapping->nrpages == 0) flags &= ~NFS_INO_INVALID_DATA; - nfsi->cache_validity |= flags; - if (flags & NFS_INO_INVALID_DATA) - nfs_fscache_invalidate(inode); + + /* pairs with nfs_clear_invalid_mapping()'s smp_load_acquire() */ + smp_store_release(&nfsi->cache_validity, flags); + + if (inode->i_mapping->nrpages == 0 || + nfsi->cache_validity & NFS_INO_INVALID_DATA) { + nfs_ooo_clear(nfsi); + } + trace_nfs_set_cache_invalid(inode, 0); } +EXPORT_SYMBOL_GPL(nfs_set_cache_invalid); /* * Invalidate the local caches @@ -225,18 +236,17 @@ static void nfs_zap_caches_locked(struct inode *inode) nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; - memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); - if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_REVAL_PAGECACHE); - } else - nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_REVAL_PAGECACHE); + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | + NFS_INO_INVALID_DATA | + NFS_INO_INVALID_ACCESS | + NFS_INO_INVALID_ACL | + NFS_INO_INVALID_XATTR); + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | + NFS_INO_INVALID_ACCESS | + NFS_INO_INVALID_ACL | + NFS_INO_INVALID_XATTR); nfs_zap_label_cache_locked(nfsi); } @@ -271,6 +281,8 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache); void nfs_invalidate_atime(struct inode *inode) { + if (nfs_have_delegated_atime(inode)) + return; spin_lock(&inode->i_lock); nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); spin_unlock(&inode->i_lock); @@ -281,10 +293,18 @@ EXPORT_SYMBOL_GPL(nfs_invalidate_atime); * Invalidate, but do not unhash, the inode. * NB: must be called with inode->i_lock held! */ -static void nfs_invalidate_inode(struct inode *inode) +static void nfs_set_inode_stale_locked(struct inode *inode) { set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); nfs_zap_caches_locked(inode); + trace_nfs_set_inode_stale(inode); +} + +void nfs_set_inode_stale(struct inode *inode) +{ + spin_lock(&inode->i_lock); + nfs_set_inode_stale_locked(inode); + spin_unlock(&inode->i_lock); } struct nfs_find_desc { @@ -301,13 +321,13 @@ struct nfs_find_desc { static int nfs_find_actor(struct inode *inode, void *opaque) { - struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; + struct nfs_find_desc *desc = opaque; struct nfs_fh *fh = desc->fh; struct nfs_fattr *fattr = desc->fattr; if (NFS_FILEID(inode) != fattr->fileid) return 0; - if ((S_IFMT & inode->i_mode) != (S_IFMT & fattr->mode)) + if (inode_wrong_type(inode, fattr->mode)) return 0; if (nfs_compare_fh(NFS_FH(inode), fh)) return 0; @@ -319,7 +339,7 @@ nfs_find_actor(struct inode *inode, void *opaque) static int nfs_init_locked(struct inode *inode, void *opaque) { - struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; + struct nfs_find_desc *desc = opaque; struct nfs_fattr *fattr = desc->fattr; set_nfs_fileid(inode, fattr->fileid); @@ -336,37 +356,32 @@ static void nfs_clear_label_invalid(struct inode *inode) spin_unlock(&inode->i_lock); } -void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, - struct nfs4_label *label) +void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr) { int error; - if (label == NULL) + if (fattr->label == NULL) return; if ((fattr->valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL) && inode->i_security) { - error = security_inode_notifysecctx(inode, label->label, - label->len); + error = security_inode_notifysecctx(inode, fattr->label->label, + fattr->label->len); if (error) printk(KERN_ERR "%s() %s %d " "security_inode_notifysecctx() %d\n", __func__, - (char *)label->label, - label->len, error); + (char *)fattr->label->label, + fattr->label->len, error); nfs_clear_label_invalid(inode); } } struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { - struct nfs4_label *label = NULL; - int minor_version = server->nfs_client->cl_minorversion; - - if (minor_version < 2) - return label; + struct nfs4_label *label; if (!(server->caps & NFS_CAP_SECURITY_LABEL)) - return label; + return NULL; label = kzalloc(sizeof(struct nfs4_label), flags); if (label == NULL) @@ -383,8 +398,7 @@ struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) } EXPORT_SYMBOL_GPL(nfs4_label_alloc); #else -void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, - struct nfs4_label *label) +void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr) { } #endif @@ -412,18 +426,36 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) return inode; } +static void nfs_inode_init_regular(struct nfs_inode *nfsi) +{ + atomic_long_set(&nfsi->nrequests, 0); + atomic_long_set(&nfsi->redirtied_pages, 0); + INIT_LIST_HEAD(&nfsi->commit_info.list); + atomic_long_set(&nfsi->commit_info.ncommit, 0); + atomic_set(&nfsi->commit_info.rpcs_out, 0); + mutex_init(&nfsi->commit_mutex); +} + +static void nfs_inode_init_dir(struct nfs_inode *nfsi) +{ + nfsi->cache_change_attribute = 0; + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + init_rwsem(&nfsi->rmdir_sem); +} + /* * This is our front-end to iget that looks up inodes by file handle * instead of inode number. */ struct inode * -nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label) +nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) { struct nfs_find_desc desc = { .fh = fh, .fattr = fattr }; struct inode *inode = ERR_PTR(-ENOENT); + u64 fattr_supported = NFS_SB(sb)->fattr_valid; unsigned long hash; nfs_attr_check_mountpoint(sb, fattr); @@ -443,7 +475,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st goto out_no_inode; } - if (inode->i_state & I_NEW) { + if (inode_state_read_once(inode) & I_NEW) { struct nfs_inode *nfsi = NFS_I(inode); unsigned long now = jiffies; @@ -456,8 +488,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st inode->i_mode = fattr->mode; nfsi->cache_validity = 0; if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 - && nfs_server_capable(inode, NFS_CAP_MODE)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); + && (fattr_supported & NFS_ATTR_FATTR_MODE)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE); /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ @@ -465,10 +497,13 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st if (S_ISREG(inode->i_mode)) { inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; inode->i_data.a_ops = &nfs_file_aops; + nfs_inode_init_regular(nfsi); + mapping_set_large_folios(inode->i_mapping); } else if (S_ISDIR(inode->i_mode)) { inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; inode->i_data.a_ops = &nfs_dir_aops; + nfs_inode_init_dir(nfsi); /* Deal with crossing mountpoints */ if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { @@ -485,33 +520,37 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st } else init_special_inode(inode, inode->i_mode, fattr->rdev); - memset(&inode->i_atime, 0, sizeof(inode->i_atime)); - memset(&inode->i_mtime, 0, sizeof(inode->i_mtime)); - memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); + inode_set_atime(inode, 0, 0); + inode_set_mtime(inode, 0, 0); + inode_set_ctime(inode, 0, 0); + memset(&nfsi->btime, 0, sizeof(nfsi->btime)); inode_set_iversion_raw(inode, 0); inode->i_size = 0; clear_nlink(inode); inode->i_uid = make_kuid(&init_user_ns, -2); inode->i_gid = make_kgid(&init_user_ns, -2); inode->i_blocks = 0; - memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); nfsi->write_io = 0; nfsi->read_io = 0; nfsi->read_cache_jiffies = fattr->time_start; nfsi->attr_gencount = fattr->gencount; if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = timespec_to_timespec64(fattr->atime); - else if (nfs_server_capable(inode, NFS_CAP_ATIME)) + inode_set_atime_to_ts(inode, fattr->atime); + else if (fattr_supported & NFS_ATTR_FATTR_ATIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = timespec_to_timespec64(fattr->mtime); - else if (nfs_server_capable(inode, NFS_CAP_MTIME)) + inode_set_mtime_to_ts(inode, fattr->mtime); + else if (fattr_supported & NFS_ATTR_FATTR_MTIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = timespec_to_timespec64(fattr->ctime); - else if (nfs_server_capable(inode, NFS_CAP_CTIME)) + inode_set_ctime_to_ts(inode, fattr->ctime); + else if (fattr_supported & NFS_ATTR_FATTR_CTIME) nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); + if (fattr->valid & NFS_ATTR_FATTR_BTIME) + nfsi->btime = fattr->btime; + else if (fattr_supported & NFS_ATTR_FATTR_BTIME) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BTIME); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode_set_iversion_raw(inode, fattr->change_attr); else @@ -522,29 +561,33 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfs_set_cache_invalid(inode, NFS_INO_INVALID_SIZE); if (fattr->valid & NFS_ATTR_FATTR_NLINK) set_nlink(inode, fattr->nlink); - else if (nfs_server_capable(inode, NFS_CAP_NLINK)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); + else if (fattr_supported & NFS_ATTR_FATTR_NLINK) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK); + else + set_nlink(inode, 1); if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; - else if (nfs_server_capable(inode, NFS_CAP_OWNER)) + else if (fattr_supported & NFS_ATTR_FATTR_OWNER) nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); if (fattr->valid & NFS_ATTR_FATTR_GROUP) inode->i_gid = fattr->gid; - else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) + else if (fattr_supported & NFS_ATTR_FATTR_GROUP) nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; + else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED && + fattr->size != 0) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - } + } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED && + fattr->size != 0) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); - if (nfsi->cache_validity != 0) - nfsi->cache_validity |= NFS_INO_REVAL_FORCED; - - nfs_setsecurity(inode, fattr, label); + nfs_setsecurity(inode, fattr); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; @@ -565,7 +608,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), nfs_display_fhandle_hash(fh), - atomic_read(&inode->i_count)); + icount_read(inode)); out: return inode; @@ -576,14 +619,107 @@ out_no_inode: } EXPORT_SYMBOL_GPL(nfs_fhget); +static void +nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr) +{ + unsigned long cache_validity = NFS_I(inode)->cache_validity; + + if (nfs_have_delegated_mtime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_CTIME)) + fattr->valid &= ~(NFS_ATTR_FATTR_PRECTIME | + NFS_ATTR_FATTR_CTIME); + + if (!(cache_validity & NFS_INO_INVALID_MTIME)) + fattr->valid &= ~(NFS_ATTR_FATTR_PREMTIME | + NFS_ATTR_FATTR_MTIME); + + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + fattr->valid &= ~NFS_ATTR_FATTR_ATIME; + } else if (nfs_have_delegated_atime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + fattr->valid &= ~NFS_ATTR_FATTR_ATIME; + } +} + +static void nfs_set_timestamps_to_ts(struct inode *inode, struct iattr *attr) +{ + unsigned int cache_flags = 0; + + if (attr->ia_valid & ATTR_MTIME_SET) { + struct timespec64 ctime = inode_get_ctime(inode); + struct timespec64 mtime = inode_get_mtime(inode); + struct timespec64 now; + int updated = 0; + + now = inode_set_ctime_current(inode); + if (!timespec64_equal(&now, &ctime)) + updated |= S_CTIME; + + inode_set_mtime_to_ts(inode, attr->ia_mtime); + if (!timespec64_equal(&now, &mtime)) + updated |= S_MTIME; + + inode_maybe_inc_iversion(inode, updated); + cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + } + if (attr->ia_valid & ATTR_ATIME_SET) { + inode_set_atime_to_ts(inode, attr->ia_atime); + cache_flags |= NFS_INO_INVALID_ATIME; + } + NFS_I(inode)->cache_validity &= ~cache_flags; +} + +static void nfs_update_timestamps(struct inode *inode, unsigned int ia_valid) +{ + enum file_time_flags time_flags = 0; + unsigned int cache_flags = 0; + + if (ia_valid & ATTR_MTIME) { + time_flags |= S_MTIME | S_CTIME; + cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + } + if (ia_valid & ATTR_ATIME) { + time_flags |= S_ATIME; + cache_flags |= NFS_INO_INVALID_ATIME; + } + inode_update_timestamps(inode, time_flags); + NFS_I(inode)->cache_validity &= ~cache_flags; +} + +void nfs_update_delegated_atime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (nfs_have_delegated_atime(inode)) + nfs_update_timestamps(inode, ATTR_ATIME); + spin_unlock(&inode->i_lock); +} + +void nfs_update_delegated_mtime_locked(struct inode *inode) +{ + if (nfs_have_delegated_mtime(inode)) + nfs_update_timestamps(inode, ATTR_MTIME); +} + +void nfs_update_delegated_mtime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + nfs_update_delegated_mtime_locked(inode); + spin_unlock(&inode->i_lock); +} +EXPORT_SYMBOL_GPL(nfs_update_delegated_mtime); + #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) int -nfs_setattr(struct dentry *dentry, struct iattr *attr) +nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr) { struct inode *inode = d_inode(dentry); struct nfs_fattr *fattr; + loff_t oldsize = i_size_read(inode); int error = 0; + kuid_t task_uid = current_fsuid(); + kuid_t owner_uid = inode->i_uid; nfs_inc_stats(inode, NFSIOS_VFSSETATTR); @@ -598,30 +734,64 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; - if (attr->ia_size == i_size_read(inode)) + if (attr->ia_size == oldsize) attr->ia_valid &= ~ATTR_SIZE; } + if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) { + spin_lock(&inode->i_lock); + if (attr->ia_valid & ATTR_MTIME_SET) { + if (uid_eq(task_uid, owner_uid)) { + nfs_set_timestamps_to_ts(inode, attr); + attr->ia_valid &= ~(ATTR_MTIME|ATTR_MTIME_SET| + ATTR_ATIME|ATTR_ATIME_SET); + } + } else { + nfs_update_timestamps(inode, attr->ia_valid); + attr->ia_valid &= ~(ATTR_MTIME|ATTR_ATIME); + } + spin_unlock(&inode->i_lock); + } else if (nfs_have_delegated_atime(inode) && + attr->ia_valid & ATTR_ATIME && + !(attr->ia_valid & ATTR_MTIME)) { + if (attr->ia_valid & ATTR_ATIME_SET) { + if (uid_eq(task_uid, owner_uid)) { + spin_lock(&inode->i_lock); + nfs_set_timestamps_to_ts(inode, attr); + spin_unlock(&inode->i_lock); + attr->ia_valid &= ~(ATTR_ATIME|ATTR_ATIME_SET); + } + } else { + nfs_update_delegated_atime(inode); + attr->ia_valid &= ~ATTR_ATIME; + } + } + /* Optimization: if the end result is no change, don't RPC */ - attr->ia_valid &= NFS_VALID_ATTRS; - if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0) + if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; trace_nfs_setattr_enter(inode); /* Write all dirty data */ - if (S_ISREG(inode->i_mode)) + if (S_ISREG(inode->i_mode)) { + nfs_file_block_o_direct(NFS_I(inode)); nfs_sync_inode(inode); + } - fattr = nfs_alloc_fattr(); + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); if (fattr == NULL) { error = -ENOMEM; goto out; } error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); - if (error == 0) + if (error == 0) { + if (attr->ia_valid & ATTR_SIZE) + nfs_truncate_last_folio(inode->i_mapping, oldsize, + attr->ia_size); error = nfs_refresh_inode(inode, fattr); + } nfs_free_fattr(fattr); out: trace_nfs_setattr_exit(inode, error); @@ -647,14 +817,18 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) if (err) goto out; + trace_nfs_size_truncate(inode, offset); i_size_write(inode, offset); /* Optimisation */ - if (offset == 0) + if (offset == 0) { NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; + nfs_ooo_clear(NFS_I(inode)); + } NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; spin_unlock(&inode->i_lock); truncate_pagecache(inode, offset); + nfs_update_delegated_mtime_locked(inode); spin_lock(&inode->i_lock); out: return err; @@ -678,12 +852,19 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, spin_lock(&inode->i_lock); NFS_I(inode)->attr_gencount = fattr->gencount; if ((attr->ia_valid & ATTR_SIZE) != 0) { - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); + if (!nfs_have_delegated_mtime(inode)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS); nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_vmtruncate(inode, attr->ia_size); } if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_CTIME; + if ((attr->ia_valid & ATTR_KILL_SUID) != 0 && + inode->i_mode & S_ISUID) + inode->i_mode &= ~S_ISUID; + if (setattr_should_drop_sgid(&nop_mnt_idmap, inode)) + inode->i_mode &= ~S_ISGID; if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; @@ -694,7 +875,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + inode_set_ctime_to_ts(inode, fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -705,14 +886,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = timespec_to_timespec64(fattr->atime); + inode_set_atime_to_ts(inode, fattr->atime); else if (attr->ia_valid & ATTR_ATIME_SET) - inode->i_atime = attr->ia_atime; + inode_set_atime_to_ts(inode, attr->ia_atime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + inode_set_ctime_to_ts(inode, fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -721,14 +902,14 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_MTIME | NFS_INO_INVALID_CTIME); if (fattr->valid & NFS_ATTR_FATTR_MTIME) - inode->i_mtime = timespec_to_timespec64(fattr->mtime); + inode_set_mtime_to_ts(inode, fattr->mtime); else if (attr->ia_valid & ATTR_MTIME_SET) - inode->i_mtime = attr->ia_mtime; + inode_set_mtime_to_ts(inode, attr->ia_mtime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); if (fattr->valid & NFS_ATTR_FATTR_CTIME) - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + inode_set_ctime_to_ts(inode, fattr->ctime); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); @@ -739,59 +920,99 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); -static void nfs_readdirplus_parent_cache_miss(struct dentry *dentry) +/* + * Don't request help from readdirplus if the file is being written to, + * or if attribute caching is turned off + */ +static bool nfs_getattr_readdirplus_enable(const struct inode *inode) { - struct dentry *parent; - - if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS)) - return; - parent = dget_parent(dentry); - nfs_force_use_readdirplus(d_inode(parent)); - dput(parent); + return nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && + !nfs_have_writebacks(inode) && NFS_MAXATTRTIMEO(inode) > 5 * HZ; } -static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry) +static void nfs_readdirplus_parent_cache_miss(struct dentry *dentry) { - struct dentry *parent; - - if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS)) - return; - parent = dget_parent(dentry); - nfs_advise_use_readdirplus(d_inode(parent)); - dput(parent); + if (!IS_ROOT(dentry)) { + struct dentry *parent = dget_parent(dentry); + nfs_readdir_record_entry_cache_miss(d_inode(parent)); + dput(parent); + } } -static bool nfs_need_revalidate_inode(struct inode *inode) +static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry) { - if (NFS_I(inode)->cache_validity & - (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) - return true; - if (nfs_attribute_cache_expired(inode)) - return true; - return false; + if (!IS_ROOT(dentry)) { + struct dentry *parent = dget_parent(dentry); + nfs_readdir_record_entry_cache_hit(d_inode(parent)); + dput(parent); + } } -int nfs_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int query_flags) +static u32 nfs_get_valid_attrmask(struct inode *inode) +{ + u64 fattr_valid = NFS_SERVER(inode)->fattr_valid; + unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + u32 reply_mask = STATX_INO | STATX_TYPE; + + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + reply_mask |= STATX_ATIME; + if (!(cache_validity & NFS_INO_INVALID_CTIME)) + reply_mask |= STATX_CTIME; + if (!(cache_validity & NFS_INO_INVALID_MTIME)) + reply_mask |= STATX_MTIME; + if (!(cache_validity & NFS_INO_INVALID_SIZE)) + reply_mask |= STATX_SIZE; + if (!(cache_validity & NFS_INO_INVALID_NLINK)) + reply_mask |= STATX_NLINK; + if (!(cache_validity & NFS_INO_INVALID_MODE)) + reply_mask |= STATX_MODE; + if (!(cache_validity & NFS_INO_INVALID_OTHER)) + reply_mask |= STATX_UID | STATX_GID; + if (!(cache_validity & NFS_INO_INVALID_BLOCKS)) + reply_mask |= STATX_BLOCKS; + if (!(cache_validity & NFS_INO_INVALID_BTIME) && + (fattr_valid & NFS_ATTR_FATTR_BTIME)) + reply_mask |= STATX_BTIME; + if (!(cache_validity & NFS_INO_INVALID_CHANGE)) + reply_mask |= STATX_CHANGE_COOKIE; + return reply_mask; +} + +int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct nfs_server *server = NFS_SERVER(inode); + u64 fattr_valid = server->fattr_valid; unsigned long cache_validity; int err = 0; bool force_sync = query_flags & AT_STATX_FORCE_SYNC; bool do_update = false; + bool readdirplus_enabled = nfs_getattr_readdirplus_enable(inode); trace_nfs_getattr_enter(inode); - if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) - goto out_no_update; + request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID | + STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME | + STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME | + STATX_CHANGE_COOKIE; - /* Flush out writes to the server in order to update c/mtime. */ - if ((request_mask & (STATX_CTIME|STATX_MTIME)) && - S_ISREG(inode->i_mode)) { - err = filemap_write_and_wait(inode->i_mapping); - if (err) - goto out; + if (!(fattr_valid & NFS_ATTR_FATTR_BTIME)) + request_mask &= ~STATX_BTIME; + + if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) { + if (readdirplus_enabled) + nfs_readdirplus_parent_cache_hit(path->dentry); + goto out_no_revalidate; + } + + /* Flush out writes to the server in order to update c/mtime/version. */ + if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) && + S_ISREG(inode->i_mode)) { + if (nfs_have_delegated_mtime(inode)) + filemap_fdatawrite(inode->i_mapping); + else + filemap_write_and_wait(inode->i_mapping); } /* @@ -810,37 +1031,69 @@ int nfs_getattr(const struct path *path, struct kstat *stat, /* Is the user requesting attributes that might need revalidation? */ if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME| STATX_MTIME|STATX_UID|STATX_GID| - STATX_SIZE|STATX_BLOCKS))) + STATX_SIZE|STATX_BLOCKS|STATX_BTIME| + STATX_CHANGE_COOKIE))) goto out_no_revalidate; /* Check whether the cached attributes are stale */ do_update |= force_sync || nfs_attribute_cache_expired(inode); cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); - do_update |= cache_validity & - (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL); + do_update |= cache_validity & NFS_INO_INVALID_CHANGE; if (request_mask & STATX_ATIME) do_update |= cache_validity & NFS_INO_INVALID_ATIME; - if (request_mask & (STATX_CTIME|STATX_MTIME)) - do_update |= cache_validity & NFS_INO_REVAL_PAGECACHE; + if (request_mask & STATX_CTIME) + do_update |= cache_validity & NFS_INO_INVALID_CTIME; + if (request_mask & STATX_MTIME) + do_update |= cache_validity & NFS_INO_INVALID_MTIME; + if (request_mask & STATX_SIZE) + do_update |= cache_validity & NFS_INO_INVALID_SIZE; + if (request_mask & STATX_NLINK) + do_update |= cache_validity & NFS_INO_INVALID_NLINK; + if (request_mask & STATX_MODE) + do_update |= cache_validity & NFS_INO_INVALID_MODE; + if (request_mask & (STATX_UID | STATX_GID)) + do_update |= cache_validity & NFS_INO_INVALID_OTHER; + if (request_mask & STATX_BLOCKS) + do_update |= cache_validity & NFS_INO_INVALID_BLOCKS; + if (request_mask & STATX_BTIME) + do_update |= cache_validity & NFS_INO_INVALID_BTIME; + if (do_update) { - /* Update the attribute cache */ - if (!(server->flags & NFS_MOUNT_NOAC)) + if (readdirplus_enabled) nfs_readdirplus_parent_cache_miss(path->dentry); - else - nfs_readdirplus_parent_cache_hit(path->dentry); err = __nfs_revalidate_inode(server, inode); if (err) goto out; - } else + } else if (readdirplus_enabled) nfs_readdirplus_parent_cache_hit(path->dentry); out_no_revalidate: /* Only return attributes that were revalidated. */ - stat->result_mask &= request_mask; -out_no_update: - generic_fillattr(inode, stat); + stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask; + + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); + stat->change_cookie = inode_peek_iversion_raw(inode); + stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC; + if (server->change_attr_type != NFS4_CHANGE_TYPE_IS_UNDEFINED) + stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC; if (S_ISDIR(inode->i_mode)) stat->blksize = NFS_SERVER(inode)->dtsize; + stat->btime = NFS_I(inode)->btime; + + /* Special handling for STATX_DIOALIGN and STATX_DIO_READ_ALIGN + * - NFS doesn't have DIO alignment constraints, avoid getting + * these DIO attrs from remote and just respond with most + * accommodating limits (so client will issue supported DIO). + * - this is unintuitive, but the most coarse-grained + * dio_offset_align is the most accommodating. + */ + if ((request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN)) && + S_ISREG(inode->i_mode)) { + stat->result_mask |= STATX_DIOALIGN | STATX_DIO_READ_ALIGN; + stat->dio_mem_align = 4; /* 4-byte alignment */ + stat->dio_offset_align = PAGE_SIZE; + stat->dio_read_offset_align = stat->dio_offset_align; + } out: trace_nfs_getattr_exit(inode, err); return err; @@ -877,17 +1130,21 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) res = __nfs_find_lock_context(ctx); rcu_read_unlock(); if (res == NULL) { - new = kmalloc(sizeof(*new), GFP_KERNEL); + new = kmalloc(sizeof(*new), GFP_KERNEL_ACCOUNT); if (new == NULL) return ERR_PTR(-ENOMEM); nfs_init_lock_context(new); spin_lock(&inode->i_lock); res = __nfs_find_lock_context(ctx); if (res == NULL) { - list_add_tail_rcu(&new->list, &ctx->lock_context.list); - new->open_context = ctx; - res = new; - new = NULL; + new->open_context = get_nfs_open_context(ctx); + if (new->open_context) { + list_add_tail_rcu(&new->list, + &ctx->lock_context.list); + res = new; + new = NULL; + } else + res = ERR_PTR(-EBADF); } spin_unlock(&inode->i_lock); kfree(new); @@ -905,6 +1162,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx) return; list_del_rcu(&l_ctx->list); spin_unlock(&inode->i_lock); + put_nfs_open_context(ctx); kfree_rcu(l_ctx, rcu_head); } EXPORT_SYMBOL_GPL(nfs_put_lock_context); @@ -922,14 +1180,13 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) { struct nfs_inode *nfsi; struct inode *inode; - struct nfs_server *server; if (!(ctx->mode & FMODE_WRITE)) return; if (!is_sync) return; inode = d_inode(ctx->dentry); - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_read_or_write_delegation(inode)) return; nfsi = NFS_I(inode); if (inode->i_mapping->nrpages == 0) @@ -938,10 +1195,10 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) return; if (!list_empty(&nfsi->open_files)) return; - server = NFS_SERVER(inode); - if (server->flags & NFS_MOUNT_NOCTO) + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO) return; - nfs_revalidate_inode(server, inode); + nfs_revalidate_inode(inode, + NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE); } EXPORT_SYMBOL_GPL(nfs_close_context); @@ -950,17 +1207,17 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct file *filp) { struct nfs_open_context *ctx; - const struct cred *cred = get_current_cred(); - ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) { - put_cred(cred); + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT); + if (!ctx) return ERR_PTR(-ENOMEM); - } nfs_sb_active(dentry->d_sb); ctx->dentry = dget(dentry); - ctx->cred = cred; - ctx->ll_cred = NULL; + if (filp) + ctx->cred = get_cred(filp->f_cred); + else + ctx->cred = get_current_cred(); + rcu_assign_pointer(ctx->ll_cred, NULL); ctx->state = NULL; ctx->mode = f_mode; ctx->flags = 0; @@ -970,6 +1227,8 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, ctx->lock_context.open_context = ctx; INIT_LIST_HEAD(&ctx->list); ctx->mdsthreshold = NULL; + nfs_localio_file_init(&ctx->nfl); + return ctx; } EXPORT_SYMBOL_GPL(alloc_nfs_open_context); @@ -999,8 +1258,9 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) put_cred(ctx->cred); dput(ctx->dentry); nfs_sb_deactive(sb); - put_rpccred(ctx->ll_cred); + put_rpccred(rcu_dereference_protected(ctx->ll_cred, 1)); kfree(ctx->mdsthreshold); + nfs_close_local_fh(&ctx->nfl); kfree_rcu(ctx, rcu_head); } @@ -1025,6 +1285,10 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); + if (list_empty(&nfsi->open_files) && + nfs_ooo_test(nfsi)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA | + NFS_INO_REVAL_FORCED); list_add_tail_rcu(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } @@ -1033,6 +1297,7 @@ EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context); void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) { filp->private_data = get_nfs_open_context(ctx); + set_bit(NFS_CONTEXT_FILE_OPEN, &ctx->flags); if (list_empty(&ctx->list)) nfs_inode_attach_open_context(ctx); } @@ -1048,10 +1313,12 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct rcu_read_lock(); list_for_each_entry_rcu(pos, &nfsi->open_files, list) { - if (cred != NULL && pos->cred != cred) + if (cred != NULL && cred_fscmp(pos->cred, cred) != 0) continue; if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) continue; + if (!test_bit(NFS_CONTEXT_FILE_OPEN, &pos->flags)) + continue; ctx = get_nfs_open_context(pos); if (ctx) break; @@ -1067,6 +1334,7 @@ void nfs_file_clear_open_context(struct file *filp) if (ctx) { struct inode *inode = d_inode(ctx->dentry); + clear_bit(NFS_CONTEXT_FILE_OPEN, &ctx->flags); /* * We fatal error on write before. Try to writeback * every page again. @@ -1085,7 +1353,8 @@ int nfs_open(struct inode *inode, struct file *filp) { struct nfs_open_context *ctx; - ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp); + ctx = alloc_nfs_open_context(file_dentry(filp), + flags_to_mode(filp->f_flags), filp); if (IS_ERR(ctx)) return PTR_ERR(ctx); nfs_file_set_open_context(filp, ctx); @@ -1102,7 +1371,6 @@ int __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { int status = -ESTALE; - struct nfs4_label *label = NULL; struct nfs_fattr *fattr = NULL; struct nfs_inode *nfsi = NFS_I(inode); @@ -1124,30 +1392,30 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } status = -ENOMEM; - fattr = nfs_alloc_fattr(); + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); if (fattr == NULL) goto out; nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); - if (IS_ERR(label)) { - status = PTR_ERR(label); - goto out; - } - - status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, - label, inode); + status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, inode); if (status != 0) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), status); - if (status == -ESTALE) { - nfs_zap_caches(inode); + switch (status) { + case -ETIMEDOUT: + /* A soft timeout occurred. Use cached information? */ + if (server->flags & NFS_MOUNT_SOFTREVAL) + status = 0; + break; + case -ESTALE: if (!S_ISDIR(inode->i_mode)) - set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); + nfs_set_inode_stale(inode); + else + nfs_zap_caches(inode); } - goto err_out; + goto out; } status = nfs_refresh_inode(inode, fattr); @@ -1155,20 +1423,18 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) refresh failed, error=%d\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), status); - goto err_out; + goto out; } if (nfsi->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); - nfs_setsecurity(inode, fattr, label); + nfs_setsecurity(inode, fattr); dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode)); -err_out: - nfs4_label_free(label); out: nfs_free_fattr(fattr); trace_nfs_revalidate_inode_exit(inode, status); @@ -1184,24 +1450,24 @@ int nfs_attribute_cache_expired(struct inode *inode) /** * nfs_revalidate_inode - Revalidate the inode attributes - * @server - pointer to nfs_server struct - * @inode - pointer to inode struct + * @inode: pointer to inode struct + * @flags: cache flags to check * * Updates inode attribute information by retrieving the data from the server. */ -int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) +int nfs_revalidate_inode(struct inode *inode, unsigned long flags) { - if (!nfs_need_revalidate_inode(inode)) + if (!nfs_check_cache_invalid(inode, flags)) return NFS_STALE(inode) ? -ESTALE : 0; - return __nfs_revalidate_inode(server, inode); + return __nfs_revalidate_inode(NFS_SERVER(inode), inode); } EXPORT_SYMBOL_GPL(nfs_revalidate_inode); static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { - struct nfs_inode *nfsi = NFS_I(inode); int ret; + nfs_fscache_invalidate(inode, 0); if (mapping->nrpages != 0) { if (S_ISREG(inode->i_mode)) { ret = nfs_sync_mapping(mapping); @@ -1212,13 +1478,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map if (ret < 0) return ret; } - if (S_ISDIR(inode->i_mode)) { - spin_lock(&inode->i_lock); - memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); - spin_unlock(&inode->i_lock); - } nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); - nfs_fscache_wait_on_invalidate(inode); dfprintk(PAGECACHE, "NFS: (%s/%Lu) data cache invalidated\n", inode->i_sb->s_id, @@ -1226,55 +1486,19 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map return 0; } -bool nfs_mapping_need_revalidate_inode(struct inode *inode) -{ - return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) || - NFS_STALE(inode); -} - -int nfs_revalidate_mapping_rcu(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - unsigned long *bitlock = &nfsi->flags; - int ret = 0; - - if (IS_SWAPFILE(inode)) - goto out; - if (nfs_mapping_need_revalidate_inode(inode)) { - ret = -ECHILD; - goto out; - } - spin_lock(&inode->i_lock); - if (test_bit(NFS_INO_INVALIDATING, bitlock) || - (nfsi->cache_validity & NFS_INO_INVALID_DATA)) - ret = -ECHILD; - spin_unlock(&inode->i_lock); -out: - return ret; -} - /** - * nfs_revalidate_mapping - Revalidate the pagecache - * @inode - pointer to host inode - * @mapping - pointer to mapping + * nfs_clear_invalid_mapping - Conditionally clear a mapping + * @mapping: pointer to mapping + * + * If the NFS_INO_INVALID_DATA inode flag is set, clear the mapping. */ -int nfs_revalidate_mapping(struct inode *inode, - struct address_space *mapping) +int nfs_clear_invalid_mapping(struct address_space *mapping) { + struct inode *inode = mapping->host; struct nfs_inode *nfsi = NFS_I(inode); unsigned long *bitlock = &nfsi->flags; int ret = 0; - /* swapfiles are not supposed to be shared. */ - if (IS_SWAPFILE(inode)) - goto out; - - if (nfs_mapping_need_revalidate_inode(inode)) { - ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (ret < 0) - goto out; - } - /* * We must clear NFS_INO_INVALID_DATA first to ensure that * invalidations that come in while we're shooting down the mappings @@ -1289,9 +1513,17 @@ int nfs_revalidate_mapping(struct inode *inode, */ for (;;) { ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING, - nfs_wait_bit_killable, TASK_KILLABLE); + nfs_wait_bit_killable, + TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); if (ret) goto out; + smp_rmb(); /* pairs with smp_wmb() below */ + if (test_bit(NFS_INO_INVALIDATING, bitlock)) + continue; + /* pairs with nfs_set_cache_invalid()'s smp_store_release() */ + if (!(smp_load_acquire(&nfsi->cache_validity) & NFS_INO_INVALID_DATA)) + goto out; + /* Slow-path that double-checks with spinlock held */ spin_lock(&inode->i_lock); if (test_bit(NFS_INO_INVALIDATING, bitlock)) { spin_unlock(&inode->i_lock); @@ -1306,6 +1538,7 @@ int nfs_revalidate_mapping(struct inode *inode, set_bit(NFS_INO_INVALIDATING, bitlock); smp_wmb(); nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + nfs_ooo_clear(nfsi); spin_unlock(&inode->i_lock); trace_nfs_invalidate_mapping_enter(inode); ret = nfs_invalidate_mapping(inode, mapping); @@ -1318,6 +1551,53 @@ out: return ret; } +bool nfs_mapping_need_revalidate_inode(struct inode *inode) +{ + return nfs_check_cache_invalid(inode, NFS_INO_INVALID_CHANGE) || + NFS_STALE(inode); +} + +int nfs_revalidate_mapping_rcu(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + unsigned long *bitlock = &nfsi->flags; + int ret = 0; + + if (IS_SWAPFILE(inode)) + goto out; + if (nfs_mapping_need_revalidate_inode(inode)) { + ret = -ECHILD; + goto out; + } + spin_lock(&inode->i_lock); + if (test_bit(NFS_INO_INVALIDATING, bitlock) || + (nfsi->cache_validity & NFS_INO_INVALID_DATA)) + ret = -ECHILD; + spin_unlock(&inode->i_lock); +out: + return ret; +} + +/** + * nfs_revalidate_mapping - Revalidate the pagecache + * @inode: pointer to host inode + * @mapping: pointer to mapping + */ +int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + /* swapfiles are not supposed to be shared. */ + if (IS_SWAPFILE(inode)) + return 0; + + if (nfs_mapping_need_revalidate_inode(inode)) { + int ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (ret < 0) + return ret; + } + + return nfs_clear_invalid_mapping(mapping); +} + static bool nfs_file_has_writers(struct nfs_inode *nfsi) { struct inode *inode = &nfsi->vfs_inode; @@ -1336,7 +1616,7 @@ static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi) static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { - struct timespec ts; + struct timespec64 ts; if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) @@ -1344,35 +1624,36 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode_set_iversion_raw(inode, fattr->change_attr); if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); + else if (nfs_server_capable(inode, NFS_CAP_XATTR)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); } /* If we have atomic WCC data, we may update some attributes */ - ts = timespec64_to_timespec(inode->i_ctime); + ts = inode_get_ctime(inode); if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) && (fattr->valid & NFS_ATTR_FATTR_CTIME) - && timespec_equal(&ts, &fattr->pre_ctime)) { - inode->i_ctime = timespec_to_timespec64(fattr->ctime); + && timespec64_equal(&ts, &fattr->pre_ctime)) { + inode_set_ctime_to_ts(inode, fattr->ctime); } - ts = timespec64_to_timespec(inode->i_mtime); + ts = inode_get_mtime(inode); if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) && (fattr->valid & NFS_ATTR_FATTR_MTIME) - && timespec_equal(&ts, &fattr->pre_mtime)) { - inode->i_mtime = timespec_to_timespec64(fattr->mtime); - if (S_ISDIR(inode->i_mode)) - nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); + && timespec64_equal(&ts, &fattr->pre_mtime)) { + inode_set_mtime_to_ts(inode, fattr->mtime); } if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) && !nfs_have_writebacks(inode)) { + trace_nfs_size_wcc(inode, fattr->size); i_size_write(inode, nfs_size_to_loff_t(fattr->size)); } } /** * nfs_check_inode_attributes - verify consistency of the inode attribute cache - * @inode - pointer to inode - * @fattr - updated attributes + * @inode: pointer to inode + * @fattr: updated attributes * * Verifies the attribute cache. If we have just changed the attributes, * so that fattr carries weak cache consistency data, then it may @@ -1383,60 +1664,62 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; unsigned long invalid = 0; - struct timespec ts; + struct timespec64 ts; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_delegated_attributes(inode)) return 0; + if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) { + /* Only a mounted-on-fileid? Just exit */ + if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) + return 0; /* Has the inode gone and changed behind our back? */ - if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) + } else if (nfsi->fileid != fattr->fileid) { + /* Is this perhaps the mounted-on fileid? */ + if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) && + nfsi->fileid == fattr->mounted_on_fileid) + return 0; return -ESTALE; - if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + } + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && inode_wrong_type(inode, fattr->mode)) return -ESTALE; + if (!nfs_file_has_buffered_writers(nfsi)) { /* Verify a few of the more important attributes */ if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr)) - invalid |= NFS_INO_INVALID_CHANGE - | NFS_INO_REVAL_PAGECACHE; + invalid |= NFS_INO_INVALID_CHANGE; - ts = timespec64_to_timespec(inode->i_mtime); - if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&ts, &fattr->mtime)) + ts = inode_get_mtime(inode); + if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime)) invalid |= NFS_INO_INVALID_MTIME; - ts = timespec64_to_timespec(inode->i_ctime); - if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&ts, &fattr->ctime)) + ts = inode_get_ctime(inode); + if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime)) invalid |= NFS_INO_INVALID_CTIME; if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); if (cur_size != new_isize) - invalid |= NFS_INO_INVALID_SIZE - | NFS_INO_REVAL_PAGECACHE; + invalid |= NFS_INO_INVALID_SIZE; } } /* Have any file permissions changed? */ if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) - invalid |= NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER; + invalid |= NFS_INO_INVALID_MODE; if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid)) - invalid |= NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER; + invalid |= NFS_INO_INVALID_OTHER; if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid)) - invalid |= NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL - | NFS_INO_INVALID_OTHER; + invalid |= NFS_INO_INVALID_OTHER; /* Has the link count changed? */ if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) - invalid |= NFS_INO_INVALID_OTHER; + invalid |= NFS_INO_INVALID_NLINK; - ts = timespec64_to_timespec(inode->i_atime); - if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&ts, &fattr->atime)) + ts = inode_get_atime(inode); + if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime)) invalid |= NFS_INO_INVALID_ATIME; if (invalid != 0) @@ -1466,6 +1749,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr) fattr->gencount = nfs_inc_attr_generation_counter(); fattr->owner_name = NULL; fattr->group_name = NULL; + fattr->mdsthreshold = NULL; } EXPORT_SYMBOL_GPL(nfs_fattr_init); @@ -1489,18 +1773,37 @@ struct nfs_fattr *nfs_alloc_fattr(void) { struct nfs_fattr *fattr; - fattr = kmalloc(sizeof(*fattr), GFP_NOFS); - if (fattr != NULL) + fattr = kmalloc(sizeof(*fattr), GFP_KERNEL); + if (fattr != NULL) { nfs_fattr_init(fattr); + fattr->label = NULL; + } return fattr; } EXPORT_SYMBOL_GPL(nfs_alloc_fattr); +struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server) +{ + struct nfs_fattr *fattr = nfs_alloc_fattr(); + + if (!fattr) + return NULL; + + fattr->label = nfs4_label_alloc(server, GFP_KERNEL); + if (IS_ERR(fattr->label)) { + kfree(fattr); + return NULL; + } + + return fattr; +} +EXPORT_SYMBOL_GPL(nfs_alloc_fattr_with_label); + struct nfs_fh *nfs_alloc_fhandle(void) { struct nfs_fh *fh; - fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS); + fh = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); if (fh != NULL) fh->size = 0; return fh; @@ -1571,42 +1874,209 @@ EXPORT_SYMBOL_GPL(_nfs_display_fhandle); #endif /** - * nfs_inode_attrs_need_update - check if the inode attributes need updating - * @inode - pointer to inode - * @fattr - attributes + * nfs_inode_attrs_cmp_generic - compare attributes + * @fattr: attributes + * @inode: pointer to inode * * Attempt to divine whether or not an RPC call reply carrying stale * attributes got scheduled after another call carrying updated ones. - * - * To do so, the function first assumes that a more recent ctime means - * that the attributes in fattr are newer, however it also attempt to - * catch the case where ctime either didn't change, or went backwards - * (if someone reset the clock on the server) by looking at whether - * or not this RPC call was started after the inode was last updated. * Note also the check for wraparound of 'attr_gencount' * - * The function returns 'true' if it thinks the attributes in 'fattr' are - * more recent than the ones cached in the inode. + * The function returns '1' if it thinks the attributes in @fattr are + * more recent than the ones cached in @inode. Otherwise it returns + * the value '0'. + */ +static int nfs_inode_attrs_cmp_generic(const struct nfs_fattr *fattr, + const struct inode *inode) +{ + unsigned long attr_gencount = NFS_I(inode)->attr_gencount; + + return (long)(fattr->gencount - attr_gencount) > 0 || + (long)(attr_gencount - nfs_read_attr_generation_counter()) > 0; +} + +/** + * nfs_inode_attrs_cmp_monotonic - compare attributes + * @fattr: attributes + * @inode: pointer to inode + * + * Attempt to divine whether or not an RPC call reply carrying stale + * attributes got scheduled after another call carrying updated ones. * + * We assume that the server observes monotonic semantics for + * the change attribute, so a larger value means that the attributes in + * @fattr are more recent, in which case the function returns the + * value '1'. + * A return value of '0' indicates no measurable change + * A return value of '-1' means that the attributes in @inode are + * more recent. */ -static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr) +static int nfs_inode_attrs_cmp_monotonic(const struct nfs_fattr *fattr, + const struct inode *inode) { - const struct nfs_inode *nfsi = NFS_I(inode); + s64 diff = fattr->change_attr - inode_peek_iversion_raw(inode); + if (diff > 0) + return 1; + return diff == 0 ? 0 : -1; +} - return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || - ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); +/** + * nfs_inode_attrs_cmp_strict_monotonic - compare attributes + * @fattr: attributes + * @inode: pointer to inode + * + * Attempt to divine whether or not an RPC call reply carrying stale + * attributes got scheduled after another call carrying updated ones. + * + * We assume that the server observes strictly monotonic semantics for + * the change attribute, so a larger value means that the attributes in + * @fattr are more recent, in which case the function returns the + * value '1'. + * A return value of '-1' means that the attributes in @inode are + * more recent or unchanged. + */ +static int nfs_inode_attrs_cmp_strict_monotonic(const struct nfs_fattr *fattr, + const struct inode *inode) +{ + return nfs_inode_attrs_cmp_monotonic(fattr, inode) > 0 ? 1 : -1; +} + +/** + * nfs_inode_attrs_cmp - compare attributes + * @fattr: attributes + * @inode: pointer to inode + * + * This function returns '1' if it thinks the attributes in @fattr are + * more recent than the ones cached in @inode. It returns '-1' if + * the attributes in @inode are more recent than the ones in @fattr, + * and it returns 0 if not sure. + */ +static int nfs_inode_attrs_cmp(const struct nfs_fattr *fattr, + const struct inode *inode) +{ + if (nfs_inode_attrs_cmp_generic(fattr, inode) > 0) + return 1; + switch (NFS_SERVER(inode)->change_attr_type) { + case NFS4_CHANGE_TYPE_IS_UNDEFINED: + break; + case NFS4_CHANGE_TYPE_IS_TIME_METADATA: + if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE)) + break; + return nfs_inode_attrs_cmp_monotonic(fattr, inode); + default: + if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE)) + break; + return nfs_inode_attrs_cmp_strict_monotonic(fattr, inode); + } + return 0; } -static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) +/** + * nfs_inode_finish_partial_attr_update - complete a previous inode update + * @fattr: attributes + * @inode: pointer to inode + * + * Returns '1' if the last attribute update left the inode cached + * attributes in a partially unrevalidated state, and @fattr + * matches the change attribute of that partial update. + * Otherwise returns '0'. + */ +static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr, + const struct inode *inode) +{ + const unsigned long check_valid = + NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME | + NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE | + NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_NLINK | NFS_INO_INVALID_BTIME; + unsigned long cache_validity = NFS_I(inode)->cache_validity; + enum nfs4_change_attr_type ctype = NFS_SERVER(inode)->change_attr_type; + + if (ctype != NFS4_CHANGE_TYPE_IS_UNDEFINED && + !(cache_validity & NFS_INO_INVALID_CHANGE) && + (cache_validity & check_valid) != 0 && + (fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && + nfs_inode_attrs_cmp_monotonic(fattr, inode) == 0) + return 1; + return 0; +} + +static void nfs_ooo_merge(struct nfs_inode *nfsi, + u64 start, u64 end) { - int ret; + int i, cnt; + + if (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER) + /* No point merging anything */ + return; + + if (!nfsi->ooo) { + nfsi->ooo = kmalloc(sizeof(*nfsi->ooo), GFP_ATOMIC); + if (!nfsi->ooo) { + nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; + return; + } + nfsi->ooo->cnt = 0; + } + + /* add this range, merging if possible */ + cnt = nfsi->ooo->cnt; + for (i = 0; i < cnt; i++) { + if (end == nfsi->ooo->gap[i].start) + end = nfsi->ooo->gap[i].end; + else if (start == nfsi->ooo->gap[i].end) + start = nfsi->ooo->gap[i].start; + else + continue; + /* Remove 'i' from table and loop to insert the new range */ + cnt -= 1; + nfsi->ooo->gap[i] = nfsi->ooo->gap[cnt]; + i = -1; + } + if (start != end) { + if (cnt >= ARRAY_SIZE(nfsi->ooo->gap)) { + nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; + kfree(nfsi->ooo); + nfsi->ooo = NULL; + return; + } + nfsi->ooo->gap[cnt].start = start; + nfsi->ooo->gap[cnt].end = end; + cnt += 1; + } + nfsi->ooo->cnt = cnt; +} + +static void nfs_ooo_record(struct nfs_inode *nfsi, + struct nfs_fattr *fattr) +{ + /* This reply was out-of-order, so record in the + * pre/post change id, possibly cancelling + * gaps created when iversion was jumpped forward. + */ + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) && + (fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) + nfs_ooo_merge(nfsi, + fattr->change_attr, + fattr->pre_change_attr); +} + +static int nfs_refresh_inode_locked(struct inode *inode, + struct nfs_fattr *fattr) +{ + int attr_cmp = nfs_inode_attrs_cmp(fattr, inode); + int ret = 0; trace_nfs_refresh_inode_enter(inode); - if (nfs_inode_attrs_need_update(inode, fattr)) + if (attr_cmp > 0 || nfs_inode_finish_partial_attr_update(fattr, inode)) ret = nfs_update_inode(inode, fattr); - else - ret = nfs_check_inode_attributes(inode, fattr); + else { + nfs_ooo_record(NFS_I(inode), fattr); + + if (attr_cmp == 0) + ret = nfs_check_inode_attributes(inode, fattr); + } trace_nfs_refresh_inode_exit(inode, ret); return ret; @@ -1614,8 +2084,8 @@ static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr /** * nfs_refresh_inode - try to update the inode attribute cache - * @inode - pointer to inode - * @fattr - updated attributes + * @inode: pointer to inode + * @fattr: updated attributes * * Check that an RPC call that returned attributes has not overlapped with * other recent updates of the inode metadata, then decide whether it is @@ -1649,8 +2119,8 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, /** * nfs_post_op_update_inode - try to update the inode attribute cache - * @inode - pointer to inode - * @fattr - updated attributes + * @inode: pointer to inode + * @fattr: updated attributes * * After an operation that has changed the inode metadata, mark the * attribute cache as being invalid, then try to update it. @@ -1679,8 +2149,8 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); /** * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache - * @inode - pointer to inode - * @fattr - updated attributes + * @inode: pointer to inode + * @fattr: updated attributes * * After an operation that has changed the inode metadata, mark the * attribute cache as being invalid, then try to update it. Fake up @@ -1690,11 +2160,15 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); */ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr) { + int attr_cmp = nfs_inode_attrs_cmp(fattr, inode); int status; /* Don't do a WCC update if these attributes are already stale */ - if ((fattr->valid & NFS_ATTR_FATTR) == 0 || - !nfs_inode_attrs_need_update(inode, fattr)) { + if (attr_cmp < 0) + return 0; + if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !attr_cmp) { + /* Record the pre/post change info before clearing PRECHANGE */ + nfs_ooo_record(NFS_I(inode), fattr); fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE | NFS_ATTR_FATTR_PRESIZE | NFS_ATTR_FATTR_PREMTIME @@ -1708,12 +2182,12 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa } if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) { - fattr->pre_ctime = timespec64_to_timespec(inode->i_ctime); + fattr->pre_ctime = inode_get_ctime(inode); fattr->valid |= NFS_ATTR_FATTR_PRECTIME; } if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 && (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) { - fattr->pre_mtime = timespec64_to_timespec(inode->i_mtime); + fattr->pre_mtime = inode_get_mtime(inode); fattr->valid |= NFS_ATTR_FATTR_PREMTIME; } if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 && @@ -1725,14 +2199,15 @@ out_noforce: status = nfs_post_op_update_inode_locked(inode, fattr, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME - | NFS_INO_INVALID_MTIME); + | NFS_INO_INVALID_MTIME + | NFS_INO_INVALID_BLOCKS); return status; } /** * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache - * @inode - pointer to inode - * @fattr - updated attributes + * @inode: pointer to inode + * @fattr: updated attributes * * After an operation that has changed the inode metadata, mark the * attribute cache as being invalid, then try to update it. Fake up @@ -1753,18 +2228,6 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); -static inline bool nfs_fileid_valid(struct nfs_inode *nfsi, - struct nfs_fattr *fattr) -{ - bool ret1 = true, ret2 = true; - - if (fattr->valid & NFS_ATTR_FATTR_FILEID) - ret1 = (nfsi->fileid == fattr->fileid); - if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) - ret2 = (nfsi->fileid == fattr->mounted_on_fileid); - return ret1 || ret2; -} - /* * Many nfs protocol calls return the new file attributes after * an operation. Here we update the inode to reflect the state @@ -1779,9 +2242,10 @@ static inline bool nfs_fileid_valid(struct nfs_inode *nfsi, */ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) { - struct nfs_server *server; + struct nfs_server *server = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_isize, new_isize; + u64 fattr_supported = server->fattr_valid; unsigned long invalid = 0; unsigned long now = jiffies; unsigned long save_cache_validity; @@ -1790,12 +2254,21 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) bool attr_changed = false; bool have_delegation; - dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", + dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%llx)\n", __func__, inode->i_sb->s_id, inode->i_ino, nfs_display_fhandle_hash(NFS_FH(inode)), - atomic_read(&inode->i_count), fattr->valid); + icount_read(inode), fattr->valid); - if (!nfs_fileid_valid(nfsi, fattr)) { + if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) { + /* Only a mounted-on-fileid? Just exit */ + if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) + return 0; + /* Has the inode gone and changed behind our back? */ + } else if (nfsi->fileid != fattr->fileid) { + /* Is this perhaps the mounted-on fileid? */ + if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) && + nfsi->fileid == fattr->mounted_on_fileid) + return 0; printk(KERN_ERR "NFS: server %s error: fileid changed\n" "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", NFS_SERVER(inode)->nfs_client->cl_hostname, @@ -1807,7 +2280,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* * Make sure the inode's type hasn't changed. */ - if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && inode_wrong_type(inode, fattr->mode)) { /* * Big trouble! The inode has become a different object. */ @@ -1816,7 +2289,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) goto out_err; } - server = NFS_SERVER(inode); /* Update the fsid? */ if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) && !nfs_fsid_equal(&server->fsid, &fattr->fsid) && @@ -1831,67 +2303,92 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ nfsi->read_cache_jiffies = fattr->time_start; + /* Fix up any delegated attributes in the struct nfs_fattr */ + nfs_fattr_fixup_delegated(inode, fattr); + save_cache_validity = nfsi->cache_validity; nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME | NFS_INO_REVAL_FORCED - | NFS_INO_REVAL_PAGECACHE); + | NFS_INO_INVALID_BLOCKS); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); if (pnfs_layoutcommit_outstanding(inode)) { - nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= + save_cache_validity & + (NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME | + NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE | + NFS_INO_INVALID_BLOCKS); cache_revalidated = false; } /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { + if (!have_writers && nfsi->ooo && nfsi->ooo->cnt == 1 && + nfsi->ooo->gap[0].end == inode_peek_iversion_raw(inode)) { + /* There is one remaining gap that hasn't been + * merged into iversion - do that now. + */ + inode_set_iversion_raw(inode, nfsi->ooo->gap[0].start); + kfree(nfsi->ooo); + nfsi->ooo = NULL; + } if (!inode_eq_iversion_raw(inode, fattr->change_attr)) { /* Could it be a race with writeback? */ if (!(have_writers || have_delegation)) { invalid |= NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; + | NFS_INO_INVALID_ACL + | NFS_INO_INVALID_XATTR; /* Force revalidate of all attributes */ save_cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE - | NFS_INO_INVALID_OTHER; + | NFS_INO_INVALID_BLOCKS + | NFS_INO_INVALID_NLINK + | NFS_INO_INVALID_MODE + | NFS_INO_INVALID_OTHER + | NFS_INO_INVALID_BTIME; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); + attr_changed = true; dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); + } else if (!have_delegation) { + nfs_ooo_record(nfsi, fattr); + nfs_ooo_merge(nfsi, inode_peek_iversion_raw(inode), + fattr->change_attr); } inode_set_iversion_raw(inode, fattr->change_attr); - attr_changed = true; } } else { - nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_CHANGE - | NFS_INO_REVAL_PAGECACHE - | NFS_INO_REVAL_FORCED); - cache_revalidated = false; + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_CHANGE; + if (!have_delegation || + (nfsi->cache_validity & NFS_INO_INVALID_CHANGE) != 0) + cache_revalidated = false; } - if (fattr->valid & NFS_ATTR_FATTR_MTIME) { - inode->i_mtime = timespec_to_timespec64(fattr->mtime); - } else if (server->caps & NFS_CAP_MTIME) { - nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_MTIME - | NFS_INO_REVAL_FORCED); - cache_revalidated = false; - } + if (fattr->valid & NFS_ATTR_FATTR_MTIME) + inode_set_mtime_to_ts(inode, fattr->mtime); + else if (fattr_supported & NFS_ATTR_FATTR_MTIME) + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_MTIME; - if (fattr->valid & NFS_ATTR_FATTR_CTIME) { - inode->i_ctime = timespec_to_timespec64(fattr->ctime); - } else if (server->caps & NFS_CAP_CTIME) { - nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_CTIME - | NFS_INO_REVAL_FORCED); - cache_revalidated = false; - } + if (fattr->valid & NFS_ATTR_FATTR_CTIME) + inode_set_ctime_to_ts(inode, fattr->ctime); + else if (fattr_supported & NFS_ATTR_FATTR_CTIME) + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_CTIME; + + if (fattr->valid & NFS_ATTR_FATTR_BTIME) + nfsi->btime = fattr->btime; + else if (fattr_supported & NFS_ATTR_FATTR_BTIME) + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_BTIME; /* Check if our cached file size is stale */ if (fattr->valid & NFS_ATTR_FATTR_SIZE) { @@ -1901,35 +2398,27 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ if (!nfs_have_writebacks(inode) || new_isize > cur_isize) { + trace_nfs_size_update(inode, new_isize); i_size_write(inode, new_isize); if (!have_writers) invalid |= NFS_INO_INVALID_DATA; - attr_changed = true; } - dprintk("NFS: isize change on server for file %s/%ld " - "(%Ld to %Ld)\n", - inode->i_sb->s_id, - inode->i_ino, - (long long)cur_isize, - (long long)new_isize); } - } else { - nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_SIZE - | NFS_INO_REVAL_PAGECACHE - | NFS_INO_REVAL_FORCED); - cache_revalidated = false; - } - + if (new_isize == 0 && + !(fattr->valid & (NFS_ATTR_FATTR_SPACE_USED | + NFS_ATTR_FATTR_BLOCKS_USED))) { + fattr->du.nfs3.used = 0; + fattr->valid |= NFS_ATTR_FATTR_SPACE_USED; + } + } else + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_SIZE; if (fattr->valid & NFS_ATTR_FATTR_ATIME) - inode->i_atime = timespec_to_timespec64(fattr->atime); - else if (server->caps & NFS_CAP_ATIME) { - nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_ATIME - | NFS_INO_REVAL_FORCED); - cache_revalidated = false; - } + inode_set_atime_to_ts(inode, fattr->atime); + else if (fattr_supported & NFS_ATTR_FATTR_ATIME) + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_ATIME; if (fattr->valid & NFS_ATTR_FATTR_MODE) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { @@ -1938,70 +2427,55 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_mode = newmode; invalid |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; - attr_changed = true; } - } else if (server->caps & NFS_CAP_MODE) { - nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_OTHER - | NFS_INO_REVAL_FORCED); - cache_revalidated = false; - } + } else if (fattr_supported & NFS_ATTR_FATTR_MODE) + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_MODE; if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (!uid_eq(inode->i_uid, fattr->uid)) { invalid |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; inode->i_uid = fattr->uid; - attr_changed = true; } - } else if (server->caps & NFS_CAP_OWNER) { - nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_OTHER - | NFS_INO_REVAL_FORCED); - cache_revalidated = false; - } + } else if (fattr_supported & NFS_ATTR_FATTR_OWNER) + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_OTHER; if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (!gid_eq(inode->i_gid, fattr->gid)) { invalid |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; inode->i_gid = fattr->gid; - attr_changed = true; } - } else if (server->caps & NFS_CAP_OWNER_GROUP) { - nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_OTHER - | NFS_INO_REVAL_FORCED); - cache_revalidated = false; - } + } else if (fattr_supported & NFS_ATTR_FATTR_GROUP) + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_OTHER; if (fattr->valid & NFS_ATTR_FATTR_NLINK) { - if (inode->i_nlink != fattr->nlink) { - if (S_ISDIR(inode->i_mode)) - invalid |= NFS_INO_INVALID_DATA; + if (inode->i_nlink != fattr->nlink) set_nlink(inode, fattr->nlink); - attr_changed = true; - } - } else if (server->caps & NFS_CAP_NLINK) { - nfsi->cache_validity |= save_cache_validity & - (NFS_INO_INVALID_OTHER - | NFS_INO_REVAL_FORCED); - cache_revalidated = false; - } + } else if (fattr_supported & NFS_ATTR_FATTR_NLINK) + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_NLINK; if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) + } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED) + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_BLOCKS; + + if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; - else - cache_revalidated = false; + else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED) + nfsi->cache_validity |= + save_cache_validity & NFS_INO_INVALID_BLOCKS; /* Update attrtimeo value if we're out of the unstable period */ if (attr_changed) { - invalid &= ~NFS_INO_INVALID_ATTR; nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; @@ -2018,7 +2492,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->attrtimeo_timestamp = now; } /* Set the barrier to be more recent than this fattr */ - if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) + if ((long)(fattr->gencount - nfsi->attr_gencount) > 0) nfsi->attr_gencount = fattr->gencount; } @@ -2035,36 +2509,37 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) * lookup validation will know that the inode is bad. * (But we fall through to invalidate the caches.) */ - nfs_invalidate_inode(inode); + nfs_set_inode_stale_locked(inode); return -ESTALE; } struct inode *nfs_alloc_inode(struct super_block *sb) { struct nfs_inode *nfsi; - nfsi = kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL); + nfsi = alloc_inode_sb(sb, nfs_inode_cachep, GFP_KERNEL); if (!nfsi) return NULL; nfsi->flags = 0UL; nfsi->cache_validity = 0UL; + nfsi->ooo = NULL; #if IS_ENABLED(CONFIG_NFS_V4) nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ +#ifdef CONFIG_NFS_V4_2 + nfsi->xattr_cache = NULL; +#endif + nfs_netfs_inode_init(nfsi); + return &nfsi->vfs_inode; } EXPORT_SYMBOL_GPL(nfs_alloc_inode); -static void nfs_i_callback(struct rcu_head *head) +void nfs_free_inode(struct inode *inode) { - struct inode *inode = container_of(head, struct inode, i_rcu); + kfree(NFS_I(inode)->ooo); kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } - -void nfs_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, nfs_i_callback); -} -EXPORT_SYMBOL_GPL(nfs_destroy_inode); +EXPORT_SYMBOL_GPL(nfs_free_inode); static inline void nfs4_init_once(struct nfs_inode *nfsi) { @@ -2078,18 +2553,12 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) static void init_once(void *foo) { - struct nfs_inode *nfsi = (struct nfs_inode *) foo; + struct nfs_inode *nfsi = foo; inode_init_once(&nfsi->vfs_inode); INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); - INIT_LIST_HEAD(&nfsi->commit_info.list); - atomic_long_set(&nfsi->nrequests, 0); - atomic_long_set(&nfsi->commit_info.ncommit, 0); - atomic_set(&nfsi->commit_info.rpcs_out, 0); - init_rwsem(&nfsi->rmdir_sem); - mutex_init(&nfsi->commit_mutex); nfs4_init_once(nfsi); } @@ -2098,7 +2567,7 @@ static int __init nfs_init_inodecache(void) nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", sizeof(struct nfs_inode), 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), + SLAB_ACCOUNT), init_once); if (nfs_inode_cachep == NULL) return -ENOMEM; @@ -2116,35 +2585,54 @@ static void nfs_destroy_inodecache(void) kmem_cache_destroy(nfs_inode_cachep); } +struct workqueue_struct *nfslocaliod_workqueue; struct workqueue_struct *nfsiod_workqueue; EXPORT_SYMBOL_GPL(nfsiod_workqueue); /* - * start up the nfsiod workqueue + * Destroy the nfsiod workqueues */ -static int nfsiod_start(void) +static void nfsiod_stop(void) { struct workqueue_struct *wq; - dprintk("RPC: creating workqueue nfsiod\n"); - wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0); - if (wq == NULL) - return -ENOMEM; - nfsiod_workqueue = wq; - return 0; + + wq = nfsiod_workqueue; + if (wq != NULL) { + nfsiod_workqueue = NULL; + destroy_workqueue(wq); + } +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + wq = nfslocaliod_workqueue; + if (wq != NULL) { + nfslocaliod_workqueue = NULL; + destroy_workqueue(wq); + } +#endif /* CONFIG_NFS_LOCALIO */ } /* - * Destroy the nfsiod workqueue + * Start the nfsiod workqueues */ -static void nfsiod_stop(void) +static int nfsiod_start(void) { - struct workqueue_struct *wq; - - wq = nfsiod_workqueue; - if (wq == NULL) - return; - nfsiod_workqueue = NULL; - destroy_workqueue(wq); + dprintk("RPC: creating workqueue nfsiod\n"); + nfsiod_workqueue = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); + if (nfsiod_workqueue == NULL) + return -ENOMEM; +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + /* + * localio writes need to use a normal (non-memreclaim) workqueue. + * When we start getting low on space, XFS goes and calls flush_work() on + * a non-memreclaim work queue, which causes a priority inversion problem. + */ + dprintk("RPC: creating workqueue nfslocaliod\n"); + nfslocaliod_workqueue = alloc_workqueue("nfslocaliod", WQ_UNBOUND, 0); + if (unlikely(nfslocaliod_workqueue == NULL)) { + nfsiod_stop(); + return -ENOMEM; + } +#endif /* CONFIG_NFS_LOCALIO */ + return 0; } unsigned int nfs_net_id; @@ -2152,18 +2640,34 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { + struct nfs_net *nn = net_generic(net, nfs_net_id); + int err; + nfs_clients_init(net); - return nfs_fs_proc_net_init(net); + + if (!rpc_proc_register(net, &nn->rpcstats)) { + err = -ENOMEM; + goto err_proc_rpc; + } + + err = nfs_fs_proc_net_init(net); + if (err) + goto err_proc_nfs; + + return 0; + +err_proc_nfs: + rpc_proc_unregister(net, "nfs"); +err_proc_rpc: + nfs_clients_exit(net); + return err; } static void nfs_net_exit(struct net *net) { - struct nfs_net *nn = net_generic(net, nfs_net_id); - + rpc_proc_unregister(net, "nfs"); nfs_fs_proc_net_exit(net); - nfs_cleanup_cb_ident_idr(net); - WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); - WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); + nfs_clients_exit(net); } static struct pernet_operations nfs_net_ops = { @@ -2173,6 +2677,35 @@ static struct pernet_operations nfs_net_ops = { .size = sizeof(struct nfs_net), }; +#ifdef CONFIG_KEYS +static struct key *nfs_keyring; + +static int __init nfs_init_keyring(void) +{ + nfs_keyring = keyring_alloc(".nfs", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + current_cred(), + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + (KEY_USR_ALL & ~KEY_USR_SETATTR), + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); + return PTR_ERR_OR_ZERO(nfs_keyring); +} + +static void nfs_exit_keyring(void) +{ + key_put(nfs_keyring); +} +#else +static inline int nfs_init_keyring(void) +{ + return 0; +} + +static inline void nfs_exit_keyring(void) +{ +} +#endif /* CONFIG_KEYS */ + /* * Initialize NFS */ @@ -2180,13 +2713,17 @@ static int __init init_nfs_fs(void) { int err; - err = register_pernet_subsys(&nfs_net_ops); + err = nfs_init_keyring(); + if (err) + return err; + + err = nfs_sysfs_init(); if (err < 0) - goto out9; + goto out10; - err = nfs_fscache_register(); + err = register_pernet_subsys(&nfs_net_ops); if (err < 0) - goto out8; + goto out9; err = nfsiod_start(); if (err) @@ -2216,15 +2753,12 @@ static int __init init_nfs_fs(void) if (err) goto out1; - rpc_proc_register(&init_net, &nfs_rpcstat); - err = register_nfs_fs(); if (err) goto out0; return 0; out0: - rpc_proc_unregister(&init_net, "nfs"); nfs_destroy_directcache(); out1: nfs_destroy_writepagecache(); @@ -2239,10 +2773,11 @@ out5: out6: nfsiod_stop(); out7: - nfs_fscache_unregister(); -out8: unregister_pernet_subsys(&nfs_net_ops); out9: + nfs_sysfs_exit(); +out10: + nfs_exit_keyring(); return err; } @@ -2253,16 +2788,17 @@ static void __exit exit_nfs_fs(void) nfs_destroy_readpagecache(); nfs_destroy_inodecache(); nfs_destroy_nfspagecache(); - nfs_fscache_unregister(); unregister_pernet_subsys(&nfs_net_ops); - rpc_proc_unregister(&init_net, "nfs"); unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); + nfs_sysfs_exit(); + nfs_exit_keyring(); } /* Not quite true; I just maintain it */ MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); +MODULE_DESCRIPTION("NFS client support"); MODULE_LICENSE("GPL"); module_param(enable_ino64, bool, 0644); |
