diff options
Diffstat (limited to 'fs/nfsd/filecache.c')
| -rw-r--r-- | fs/nfsd/filecache.c | 492 |
1 files changed, 342 insertions, 150 deletions
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ee9c923192e0..93798575b807 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -39,6 +39,7 @@ #include <linux/fsnotify.h> #include <linux/seq_file.h> #include <linux/rhashtable.h> +#include <linux/nfslocalio.h> #include "vfs.h" #include "nfsd.h" @@ -52,22 +53,20 @@ #define NFSD_FILE_CACHE_UP (0) /* We only care about NFSD_MAY_READ/WRITE for this cache */ -#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) +#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE|NFSD_MAY_LOCALIO) static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); +static DEFINE_PER_CPU(unsigned long, nfsd_file_allocations); static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); struct nfsd_fcache_disposal { - struct work_struct work; spinlock_t lock; struct list_head freeme; }; -static struct workqueue_struct *nfsd_filecache_wq __read_mostly; - static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; static struct list_lru nfsd_file_lru; @@ -114,7 +113,7 @@ static void nfsd_file_schedule_laundrette(void) { if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) - queue_delayed_work(system_wq, &nfsd_filecache_laundrette, + queue_delayed_work(system_dfl_wq, &nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY); } @@ -154,7 +153,7 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm) } static struct nfsd_file_mark * -nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) +nfsd_file_mark_find_or_create(struct inode *inode) { int err; struct fsnotify_mark *mark; @@ -162,8 +161,8 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) do { fsnotify_group_lock(nfsd_file_fsnotify_group); - mark = fsnotify_find_mark(&inode->i_fsnotify_marks, - nfsd_file_fsnotify_group); + mark = fsnotify_find_inode_mark(inode, + nfsd_file_fsnotify_group); if (mark) { nfm = nfsd_file_mark_get(container_of(mark, struct nfsd_file_mark, @@ -218,7 +217,9 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, if (unlikely(!nf)) return NULL; + this_cpu_inc(nfsd_file_allocations); INIT_LIST_HEAD(&nf->nf_lru); + INIT_LIST_HEAD(&nf->nf_gc); nf->nf_birthtime = ktime_get(); nf->nf_file = NULL; nf->nf_cred = get_current_cred(); @@ -230,6 +231,9 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, refcount_set(&nf->nf_ref, 1); nf->nf_may = need; nf->nf_mark = NULL; + nf->nf_dio_mem_align = 0; + nf->nf_dio_offset_align = 0; + nf->nf_dio_read_offset_align = 0; return nf; } @@ -283,7 +287,7 @@ nfsd_file_free(struct nfsd_file *nf) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { nfsd_file_check_write_error(nf); - filp_close(nf->nf_file, NULL); + nfsd_filp_close(nf->nf_file); } /* @@ -318,20 +322,19 @@ nfsd_file_check_writeback(struct nfsd_file *nf) mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } - -static bool nfsd_file_lru_add(struct nfsd_file *nf) +static void nfsd_file_lru_add(struct nfsd_file *nf) { - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { + refcount_inc(&nf->nf_ref); + if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru)) trace_nfsd_file_lru_add(nf); - return true; - } - return false; + else + WARN_ON(1); + nfsd_file_schedule_laundrette(); } static bool nfsd_file_lru_remove(struct nfsd_file *nf) { - if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { + if (list_lru_del_obj(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_del(nf); return true; } @@ -362,42 +365,55 @@ nfsd_file_put(struct nfsd_file *nf) if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - /* - * If this is the last reference (nf_ref == 1), then try to - * transfer it to the LRU. - */ - if (refcount_dec_not_one(&nf->nf_ref)) - return; - - /* Try to add it to the LRU. If that fails, decrement. */ - if (nfsd_file_lru_add(nf)) { - /* If it's still hashed, we're done */ - if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_schedule_laundrette(); - return; - } - - /* - * We're racing with unhashing, so try to remove it from - * the LRU. If removal fails, then someone else already - * has our reference. - */ - if (!nfsd_file_lru_remove(nf)) - return; - } + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + set_bit(NFSD_FILE_RECENT, &nf->nf_flags); } + if (refcount_dec_and_test(&nf->nf_ref)) nfsd_file_free(nf); } +/** + * nfsd_file_put_local - put nfsd_file reference and arm nfsd_net_put in caller + * @pnf: nfsd_file of which to put the reference + * + * First save the associated net to return to caller, then put + * the reference of the nfsd_file. + */ +struct net * +nfsd_file_put_local(struct nfsd_file __rcu **pnf) +{ + struct nfsd_file *nf; + struct net *net = NULL; + + nf = unrcu_pointer(xchg(pnf, NULL)); + if (nf) { + net = nf->nf_net; + nfsd_file_put(nf); + } + return net; +} + +/** + * nfsd_file_file - get the backing file of an nfsd_file + * @nf: nfsd_file of which to access the backing file. + * + * Return backing file for @nf. + */ +struct file * +nfsd_file_file(struct nfsd_file *nf) +{ + return nf->nf_file; +} + static void nfsd_file_dispose_list(struct list_head *dispose) { struct nfsd_file *nf; while (!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); + nf = list_first_entry(dispose, struct nfsd_file, nf_gc); + list_del_init(&nf->nf_gc); nfsd_file_free(nf); } } @@ -414,14 +430,53 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) { while(!list_empty(dispose)) { struct nfsd_file *nf = list_first_entry(dispose, - struct nfsd_file, nf_lru); + struct nfsd_file, nf_gc); struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); struct nfsd_fcache_disposal *l = nn->fcache_disposal; + struct svc_serv *serv; spin_lock(&l->lock); - list_move_tail(&nf->nf_lru, &l->freeme); + list_move_tail(&nf->nf_gc, &l->freeme); spin_unlock(&l->lock); - queue_work(nfsd_filecache_wq, &l->work); + + /* + * The filecache laundrette is shut down after the + * nn->nfsd_serv pointer is cleared, but before the + * svc_serv is freed. + */ + serv = nn->nfsd_serv; + if (serv) + svc_wake_up(serv); + } +} + +/** + * nfsd_file_net_dispose - deal with nfsd_files waiting to be disposed. + * @nn: nfsd_net in which to find files to be disposed. + * + * When files held open for nfsv3 are removed from the filecache, whether + * due to memory pressure or garbage collection, they are queued to + * a per-net-ns queue. This function completes the disposal, either + * directly or by waking another nfsd thread to help with the work. + */ +void nfsd_file_net_dispose(struct nfsd_net *nn) +{ + struct nfsd_fcache_disposal *l = nn->fcache_disposal; + + if (!list_empty(&l->freeme)) { + LIST_HEAD(dispose); + int i; + + spin_lock(&l->lock); + for (i = 0; i < 8 && !list_empty(&l->freeme); i++) + list_move(l->freeme.next, &dispose); + spin_unlock(&l->lock); + if (!list_empty(&l->freeme)) + /* Wake up another thread to share the work + * *before* doing any actual disposing. + */ + svc_wake_up(nn->nfsd_serv); + nfsd_file_dispose_list(&dispose); } } @@ -429,7 +484,6 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) * nfsd_file_lru_cb - Examine an entry on the LRU list * @item: LRU entry to examine * @lru: controlling LRU - * @lock: LRU list lock (unused) * @arg: dispose list * * Return values: @@ -439,9 +493,7 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) */ static enum lru_status nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, - spinlock_t *lock, void *arg) - __releases(lock) - __acquires(lock) + void *arg) { struct list_head *head = arg; struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); @@ -465,31 +517,71 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, } /* - * Put the reference held on behalf of the LRU. If it wasn't the last - * one, then just remove it from the LRU and ignore it. + * Put the reference held on behalf of the LRU if it is the last + * reference, else rotate. */ - if (!refcount_dec_and_test(&nf->nf_ref)) { + if (!refcount_dec_if_one(&nf->nf_ref)) { trace_nfsd_file_gc_in_use(nf); - list_lru_isolate(lru, &nf->nf_lru); - return LRU_REMOVED; + return LRU_ROTATE; } /* Refcount went to zero. Unhash it and queue it to the dispose list */ nfsd_file_unhash(nf); - list_lru_isolate_move(lru, &nf->nf_lru, head); + list_lru_isolate(lru, &nf->nf_lru); + list_add(&nf->nf_gc, head); this_cpu_inc(nfsd_file_evictions); trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; } +static enum lru_status +nfsd_file_gc_cb(struct list_head *item, struct list_lru_one *lru, + void *arg) +{ + struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); + + if (test_and_clear_bit(NFSD_FILE_RECENT, &nf->nf_flags)) { + /* + * "REFERENCED" really means "should be at the end of the + * LRU. As we are putting it there we can clear the flag. + */ + clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + trace_nfsd_file_gc_aged(nf); + return LRU_ROTATE; + } + return nfsd_file_lru_cb(item, lru, arg); +} + +/* If the shrinker runs between calls to list_lru_walk_node() in + * nfsd_file_gc(), the "remaining" count will be wrong. This could + * result in premature freeing of some files. This may not matter much + * but is easy to fix with this spinlock which temporarily disables + * the shrinker. + */ +static DEFINE_SPINLOCK(nfsd_gc_lock); static void nfsd_file_gc(void) { + unsigned long ret = 0; LIST_HEAD(dispose); - unsigned long ret; + int nid; + + spin_lock(&nfsd_gc_lock); + for_each_node_state(nid, N_NORMAL_MEMORY) { + unsigned long remaining = list_lru_count_node(&nfsd_file_lru, nid); - ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, - &dispose, list_lru_count(&nfsd_file_lru)); + while (remaining > 0) { + unsigned long nr = min(remaining, NFSD_FILE_GC_BATCH); + + remaining -= nr; + ret += list_lru_walk_node(&nfsd_file_lru, nid, nfsd_file_gc_cb, + &dispose, &nr); + if (nr) + /* walk aborted early */ + remaining = 0; + } + } + spin_unlock(&nfsd_gc_lock); trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); nfsd_file_dispose_list_delayed(&dispose); } @@ -497,9 +589,9 @@ nfsd_file_gc(void) static void nfsd_file_gc_worker(struct work_struct *work) { - nfsd_file_gc(); if (list_lru_count(&nfsd_file_lru)) - nfsd_file_schedule_laundrette(); + nfsd_file_gc(); + nfsd_file_schedule_laundrette(); } static unsigned long @@ -514,18 +606,18 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) LIST_HEAD(dispose); unsigned long ret; + if (!spin_trylock(&nfsd_gc_lock)) + return SHRINK_STOP; + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &dispose); + spin_unlock(&nfsd_gc_lock); trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); nfsd_file_dispose_list_delayed(&dispose); return ret; } -static struct shrinker nfsd_file_shrinker = { - .scan_objects = nfsd_file_lru_scan, - .count_objects = nfsd_file_lru_count, - .seeks = 1, -}; +static struct shrinker *nfsd_file_shrinker; /** * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file @@ -555,7 +647,7 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) /* If refcount goes to 0, then put on the dispose list */ if (refcount_sub_and_test(decrement, &nf->nf_ref)) { - list_add(&nf->nf_lru, dispose); + list_add(&nf->nf_gc, dispose); trace_nfsd_file_closing(nf); } } @@ -624,50 +716,23 @@ nfsd_file_close_inode(struct inode *inode) void nfsd_file_close_inode_sync(struct inode *inode) { - struct nfsd_file *nf; LIST_HEAD(dispose); trace_nfsd_file_close(inode); nfsd_file_queue_for_close(inode, &dispose); - while (!list_empty(&dispose)) { - nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); - nfsd_file_free(nf); - } - flush_delayed_fput(); -} - -/** - * nfsd_file_delayed_close - close unused nfsd_files - * @work: dummy - * - * Scrape the freeme list for this nfsd_net, and then dispose of them - * all. - */ -static void -nfsd_file_delayed_close(struct work_struct *work) -{ - LIST_HEAD(head); - struct nfsd_fcache_disposal *l = container_of(work, - struct nfsd_fcache_disposal, work); - - spin_lock(&l->lock); - list_splice_init(&l->freeme, &head); - spin_unlock(&l->lock); - - nfsd_file_dispose_list(&head); + nfsd_file_dispose_list(&dispose); } static int nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, void *data) { - struct file_lock *fl = data; + struct file_lease *fl = data; /* Only close files for F_SETLEASE leases */ - if (fl->fl_flags & FL_LEASE) - nfsd_file_close_inode(file_inode(fl->fl_file)); + if (fl->c.flc_flags & FL_LEASE) + nfsd_file_close_inode(file_inode(fl->c.flc_file)); return 0; } @@ -718,40 +783,40 @@ nfsd_file_cache_init(void) ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params); if (ret) - return ret; - - ret = -ENOMEM; - nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); - if (!nfsd_filecache_wq) goto out; - nfsd_file_slab = kmem_cache_create("nfsd_file", - sizeof(struct nfsd_file), 0, 0, NULL); + ret = -ENOMEM; + nfsd_file_slab = KMEM_CACHE(nfsd_file, 0); if (!nfsd_file_slab) { pr_err("nfsd: unable to create nfsd_file_slab\n"); goto out_err; } - nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", - sizeof(struct nfsd_file_mark), 0, 0, NULL); + nfsd_file_mark_slab = KMEM_CACHE(nfsd_file_mark, 0); if (!nfsd_file_mark_slab) { pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); goto out_err; } - ret = list_lru_init(&nfsd_file_lru); if (ret) { pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); goto out_err; } - ret = register_shrinker(&nfsd_file_shrinker, "nfsd-filecache"); - if (ret) { - pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); + nfsd_file_shrinker = shrinker_alloc(0, "nfsd-filecache"); + if (!nfsd_file_shrinker) { + ret = -ENOMEM; + pr_err("nfsd: failed to allocate nfsd_file_shrinker\n"); goto out_lru; } + nfsd_file_shrinker->count_objects = nfsd_file_lru_count; + nfsd_file_shrinker->scan_objects = nfsd_file_lru_scan; + nfsd_file_shrinker->seeks = 1; + + shrinker_register(nfsd_file_shrinker); + ret = lease_register_notifier(&nfsd_file_lease_notifier); if (ret) { pr_err("nfsd: unable to register lease notifier: %d\n", ret); @@ -759,7 +824,7 @@ nfsd_file_cache_init(void) } nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, - FSNOTIFY_GROUP_NOFS); + 0); if (IS_ERR(nfsd_file_fsnotify_group)) { pr_err("nfsd: unable to create fsnotify group: %ld\n", PTR_ERR(nfsd_file_fsnotify_group)); @@ -770,11 +835,13 @@ nfsd_file_cache_init(void) INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); out: + if (ret) + clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags); return ret; out_notifier: lease_unregister_notifier(&nfsd_file_lease_notifier); out_shrinker: - unregister_shrinker(&nfsd_file_shrinker); + shrinker_free(nfsd_file_shrinker); out_lru: list_lru_destroy(&nfsd_file_lru); out_err: @@ -782,8 +849,6 @@ out_err: nfsd_file_slab = NULL; kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - destroy_workqueue(nfsd_filecache_wq); - nfsd_filecache_wq = NULL; rhltable_destroy(&nfsd_file_rhltable); goto out; } @@ -803,6 +868,14 @@ __nfsd_file_cache_purge(struct net *net) struct nfsd_file *nf; LIST_HEAD(dispose); +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + if (net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + nfs_localio_invalidate_clients(&nn->local_clients, + &nn->local_clients_lock); + } +#endif + rhltable_walk_enter(&nfsd_file_rhltable, &iter); do { rhashtable_walk_start(&iter); @@ -829,7 +902,6 @@ nfsd_alloc_fcache_disposal(void) l = kmalloc(sizeof(*l), GFP_KERNEL); if (!l) return NULL; - INIT_WORK(&l->work, nfsd_file_delayed_close); spin_lock_init(&l->lock); INIT_LIST_HEAD(&l->freeme); return l; @@ -838,7 +910,6 @@ nfsd_alloc_fcache_disposal(void) static void nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) { - cancel_work_sync(&l->work); nfsd_file_dispose_list(&l->freeme); kfree(l); } @@ -891,7 +962,7 @@ nfsd_file_cache_shutdown(void) return; lease_unregister_notifier(&nfsd_file_lease_notifier); - unregister_shrinker(&nfsd_file_shrinker); + shrinker_free(nfsd_file_shrinker); /* * make sure all callers of nfsd_file_lru_cb are done before * calling nfsd_file_cache_purge @@ -907,13 +978,12 @@ nfsd_file_cache_shutdown(void) fsnotify_wait_marks_destroyed(); kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - destroy_workqueue(nfsd_filecache_wq); - nfsd_filecache_wq = NULL; rhltable_destroy(&nfsd_file_rhltable); for_each_possible_cpu(i) { per_cpu(nfsd_file_cache_hits, i) = 0; per_cpu(nfsd_file_acquisitions, i) = 0; + per_cpu(nfsd_file_allocations, i) = 0; per_cpu(nfsd_file_releases, i) = 0; per_cpu(nfsd_file_total_age, i) = 0; per_cpu(nfsd_file_evictions, i) = 0; @@ -982,41 +1052,68 @@ nfsd_file_is_cached(struct inode *inode) } static __be32 -nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, +nfsd_file_get_dio_attrs(const struct svc_fh *fhp, struct nfsd_file *nf) +{ + struct inode *inode = file_inode(nf->nf_file); + struct kstat stat; + __be32 status; + + /* Currently only need to get DIO alignment info for regular files */ + if (!S_ISREG(inode->i_mode)) + return nfs_ok; + + status = fh_getattr(fhp, &stat); + if (status != nfs_ok) + return status; + + trace_nfsd_file_get_dio_attrs(inode, &stat); + + if (stat.result_mask & STATX_DIOALIGN) { + nf->nf_dio_mem_align = stat.dio_mem_align; + nf->nf_dio_offset_align = stat.dio_offset_align; + } + if (stat.result_mask & STATX_DIO_READ_ALIGN) + nf->nf_dio_read_offset_align = stat.dio_read_offset_align; + else + nf->nf_dio_read_offset_align = nf->nf_dio_offset_align; + + return nfs_ok; +} + +static __be32 +nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net, + struct svc_cred *cred, + struct auth_domain *client, + struct svc_fh *fhp, unsigned int may_flags, struct file *file, - struct nfsd_file **pnf, bool want_gc) + umode_t type, bool want_gc, struct nfsd_file **pnf) { unsigned char need = may_flags & NFSD_FILE_MAY_MASK; - struct net *net = SVC_NET(rqstp); struct nfsd_file *new, *nf; - const struct cred *cred; + bool stale_retry = true; bool open_retry = true; struct inode *inode; __be32 status; int ret; - status = fh_verify(rqstp, fhp, S_IFREG, - may_flags|NFSD_MAY_OWNER_OVERRIDE); +retry: + if (rqstp) + status = fh_verify(rqstp, fhp, type, + may_flags|NFSD_MAY_OWNER_OVERRIDE); + else + status = fh_verify_local(net, cred, client, fhp, type, + may_flags|NFSD_MAY_OWNER_OVERRIDE); + if (status != nfs_ok) return status; inode = d_inode(fhp->fh_dentry); - cred = get_current_cred(); -retry: rcu_read_lock(); - nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); + nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc); rcu_read_unlock(); - if (nf) { - /* - * If the nf is on the LRU then it holds an extra reference - * that must be put if it's removed. It had better not be - * the last one however, since we should hold another. - */ - if (nfsd_file_lru_remove(nf)) - WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); + if (nf) goto wait_for_construction; - } new = nfsd_file_alloc(net, inode, need, want_gc); if (!new) { @@ -1026,11 +1123,11 @@ retry: rcu_read_lock(); spin_lock(&inode->i_lock); - nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); + nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc); if (unlikely(nf)) { spin_unlock(&inode->i_lock); rcu_read_unlock(); - nfsd_file_slab_free(&new->nf_rcu); + nfsd_file_free(new); goto wait_for_construction; } nf = new; @@ -1041,8 +1138,6 @@ retry: if (likely(ret == 0)) goto open_file; - if (ret == -EEXIST) - goto retry; trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret); status = nfserr_jukebox; goto construction_err; @@ -1057,7 +1152,9 @@ wait_for_construction: status = nfserr_jukebox; goto construction_err; } + nfsd_file_put(nf); open_retry = false; + fh_put(fhp); goto retry; } this_cpu_inc(nfsd_file_cache_hits); @@ -1074,23 +1171,38 @@ out: nfsd_file_check_write_error(nf); *pnf = nf; } - put_cred(cred); trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); return status; open_file: trace_nfsd_file_alloc(nf); - nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode); - if (nf->nf_mark) { + + if (type == S_IFREG) + nf->nf_mark = nfsd_file_mark_find_or_create(inode); + + if (type != S_IFREG || nf->nf_mark) { if (file) { get_file(file); nf->nf_file = file; status = nfs_ok; trace_nfsd_file_opened(nf, status); } else { - status = nfsd_open_verified(rqstp, fhp, may_flags, - &nf->nf_file); + ret = nfsd_open_verified(fhp, type, may_flags, &nf->nf_file); + if (ret == -EOPENSTALE && stale_retry) { + stale_retry = false; + nfsd_file_unhash(nf); + clear_and_wake_up_bit(NFSD_FILE_PENDING, + &nf->nf_flags); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); + nf = NULL; + fh_put(fhp); + goto retry; + } + status = nfserrno(ret); trace_nfsd_file_open(nf, status); + if (status == nfs_ok) + status = nfsd_file_get_dio_attrs(fhp, nf); } } else status = nfserr_jukebox; @@ -1100,6 +1212,9 @@ open_file: */ if (status != nfs_ok || inode->i_nlink == 0) nfsd_file_unhash(nf); + else if (want_gc) + nfsd_file_lru_add(nf); + clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); if (status == nfs_ok) goto out; @@ -1133,7 +1248,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); + return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, + fhp, may_flags, NULL, S_IFREG, true, pnf); } /** @@ -1157,7 +1273,54 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); + return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, + fhp, may_flags, NULL, S_IFREG, false, pnf); +} + +/** + * nfsd_file_acquire_local - Get a struct nfsd_file with an open file for localio + * @net: The network namespace in which to perform a lookup + * @cred: the user credential with which to validate access + * @client: the auth_domain for LOCALIO lookup + * @fhp: the NFS filehandle of the file to be opened + * @may_flags: NFSD_MAY_ settings for the file + * @pnf: OUT: new or found "struct nfsd_file" object + * + * This file lookup interface provide access to a file given the + * filehandle and credential. No connection-based authorisation + * is performed and in that way it is quite different to other + * file access mediated by nfsd. It allows a kernel module such as the NFS + * client to reach across network and filesystem namespaces to access + * a file. The security implications of this should be carefully + * considered before use. + * + * The nfsd_file_object returned by this API is reference-counted + * but not garbage-collected. The object is unhashed after the + * final nfsd_file_put(). + * + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. + */ +__be32 +nfsd_file_acquire_local(struct net *net, struct svc_cred *cred, + struct auth_domain *client, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf) +{ + /* + * Save creds before calling nfsd_file_do_acquire() (which calls + * nfsd_setuser). Important because caller (LOCALIO) is from + * client context. + */ + const struct cred *save_cred = get_current_cred(); + __be32 beres; + + beres = nfsd_file_do_acquire(NULL, net, cred, client, fhp, may_flags, + NULL, S_IFREG, false, pnf); + put_cred(revert_creds(save_cred)); + return beres; } /** @@ -1183,7 +1346,34 @@ nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct file *file, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); + return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, + fhp, may_flags, file, S_IFREG, false, pnf); +} + +/** + * nfsd_file_acquire_dir - Get a struct nfsd_file with an open directory + * @rqstp: the RPC transaction being executed + * @fhp: the NFS filehandle of the file to be opened + * @pnf: OUT: new or found "struct nfsd_file" object + * + * The nfsd_file_object returned by this API is reference-counted + * but not garbage-collected. The object is unhashed after the + * final nfsd_file_put(). This opens directories only, and only + * in O_RDONLY mode. + * + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. + */ +__be32 +nfsd_file_acquire_dir(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfsd_file **pnf) +{ + return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, fhp, + NFSD_MAY_READ|NFSD_MAY_64BIT_COOKIE, + NULL, S_IFDIR, false, pnf); } /* @@ -1193,7 +1383,7 @@ nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, */ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { - unsigned long releases = 0, evictions = 0; + unsigned long allocations = 0, releases = 0, evictions = 0; unsigned long hits = 0, acquisitions = 0; unsigned int i, count = 0, buckets = 0; unsigned long lru = 0, total_age = 0; @@ -1218,6 +1408,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) for_each_possible_cpu(i) { hits += per_cpu(nfsd_file_cache_hits, i); acquisitions += per_cpu(nfsd_file_acquisitions, i); + allocations += per_cpu(nfsd_file_allocations, i); releases += per_cpu(nfsd_file_releases, i); total_age += per_cpu(nfsd_file_total_age, i); evictions += per_cpu(nfsd_file_evictions, i); @@ -1228,6 +1419,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); seq_printf(m, "acquisitions: %lu\n", acquisitions); + seq_printf(m, "allocations: %lu\n", allocations); seq_printf(m, "releases: %lu\n", releases); seq_printf(m, "evictions: %lu\n", evictions); if (releases) |
