diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/caps.c | 9 | ||||
-rw-r--r-- | fs/ceph/inode.c | 9 | ||||
-rw-r--r-- | fs/ceph/locks.c | 177 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 96 | ||||
-rw-r--r-- | fs/ceph/super.c | 5 | ||||
-rw-r--r-- | fs/ceph/super.h | 4 | ||||
-rw-r--r-- | fs/lockd/svc.c | 20 | ||||
-rw-r--r-- | fs/nfs_common/grace.c | 24 | ||||
-rw-r--r-- | fs/nfsd/fault_inject.c | 5 | ||||
-rw-r--r-- | fs/nfsd/netns.h | 2 | ||||
-rw-r--r-- | fs/nfsd/nfs3xdr.c | 10 | ||||
-rw-r--r-- | fs/nfsd/nfs4layouts.c | 4 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 19 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 127 | ||||
-rw-r--r-- | fs/nfsd/nfssvc.c | 4 | ||||
-rw-r--r-- | fs/nfsd/state.h | 11 | ||||
-rw-r--r-- | fs/nfsd/xdr4.h | 13 | ||||
-rw-r--r-- | fs/orangefs/acl.c | 10 | ||||
-rw-r--r-- | fs/orangefs/dir.c | 1 | ||||
-rw-r--r-- | fs/orangefs/file.c | 16 | ||||
-rw-r--r-- | fs/orangefs/inode.c | 17 | ||||
-rw-r--r-- | fs/orangefs/namei.c | 45 | ||||
-rw-r--r-- | fs/orangefs/orangefs-debug.h | 4 | ||||
-rw-r--r-- | fs/orangefs/orangefs-kernel.h | 31 | ||||
-rw-r--r-- | fs/orangefs/orangefs-utils.c | 86 | ||||
-rw-r--r-- | fs/orangefs/super.c | 15 | ||||
-rw-r--r-- | fs/orangefs/symlink.c | 1 |
27 files changed, 435 insertions, 330 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ff5d32cf9578..a14b2c974c9e 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1160,7 +1160,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, struct ceph_inode_info *ci = cap->ci; struct inode *inode = &ci->vfs_inode; struct cap_msg_args arg; - int held, revoking, dropping; + int held, revoking; int wake = 0; int delayed = 0; int ret; @@ -1168,7 +1168,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, held = cap->issued | cap->implemented; revoking = cap->implemented & ~cap->issued; retain &= ~revoking; - dropping = cap->issued & ~retain; dout("__send_cap %p cap %p session %p %s -> %s (revoking %s)\n", inode, cap, cap->session, @@ -1712,7 +1711,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, /* if we are unmounting, flush any unused caps immediately. */ if (mdsc->stopping) - is_delayed = 1; + is_delayed = true; spin_lock(&ci->i_ceph_lock); @@ -3189,8 +3188,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, int dirty = le32_to_cpu(m->dirty); int cleaned = 0; bool drop = false; - bool wake_ci = 0; - bool wake_mdsc = 0; + bool wake_ci = false; + bool wake_mdsc = false; list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) { if (cf->tid == flush_tid) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index f2550a076edc..ab81652198c4 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -493,6 +493,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_wb_ref = 0; ci->i_wrbuffer_ref = 0; ci->i_wrbuffer_ref_head = 0; + atomic_set(&ci->i_filelock_ref, 0); ci->i_shared_gen = 0; ci->i_rdcache_gen = 0; ci->i_rdcache_revoking = 0; @@ -786,7 +787,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page, /* update inode */ ci->i_version = le64_to_cpu(info->version); - inode->i_version++; inode->i_rdev = le32_to_cpu(info->rdev); inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; @@ -1185,6 +1185,7 @@ retry_lookup: ceph_snap(d_inode(dn)) != tvino.snap)) { dout(" dn %p points to wrong inode %p\n", dn, d_inode(dn)); + ceph_dir_clear_ordered(dir); d_delete(dn); dput(dn); goto retry_lookup; @@ -1322,6 +1323,7 @@ retry_lookup: dout(" %p links to %p %llx.%llx, not %llx.%llx\n", dn, d_inode(dn), ceph_vinop(d_inode(dn)), ceph_vinop(in)); + ceph_dir_clear_ordered(dir); d_invalidate(dn); have_lease = false; } @@ -1573,6 +1575,7 @@ retry_lookup: ceph_snap(d_inode(dn)) != tvino.snap)) { dout(" dn %p points to wrong inode %p\n", dn, d_inode(dn)); + __ceph_dir_clear_ordered(ci); d_delete(dn); dput(dn); goto retry_lookup; @@ -1597,7 +1600,9 @@ retry_lookup: &req->r_caps_reservation); if (ret < 0) { pr_err("fill_inode badness on %p\n", in); - if (d_really_is_negative(dn)) + if (d_really_is_positive(dn)) + __ceph_dir_clear_ordered(ci); + else iput(in); d_drop(dn); err = ret; diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index e7cce412f2cf..9e66f69ee8a5 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -30,19 +30,52 @@ void __init ceph_flock_init(void) get_random_bytes(&lock_secret, sizeof(lock_secret)); } +static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) +{ + struct inode *inode = file_inode(src->fl_file); + atomic_inc(&ceph_inode(inode)->i_filelock_ref); +} + +static void ceph_fl_release_lock(struct file_lock *fl) +{ + struct inode *inode = file_inode(fl->fl_file); + struct ceph_inode_info *ci = ceph_inode(inode); + if (atomic_dec_and_test(&ci->i_filelock_ref)) { + /* clear error when all locks are released */ + spin_lock(&ci->i_ceph_lock); + ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK; + spin_unlock(&ci->i_ceph_lock); + } +} + +static const struct file_lock_operations ceph_fl_lock_ops = { + .fl_copy_lock = ceph_fl_copy_lock, + .fl_release_private = ceph_fl_release_lock, +}; + /** * Implement fcntl and flock locking functions. */ -static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, +static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, int cmd, u8 wait, struct file_lock *fl) { - struct inode *inode = file_inode(file); struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_mds_request *req; int err; u64 length = 0; u64 owner; + if (operation == CEPH_MDS_OP_SETFILELOCK) { + /* + * increasing i_filelock_ref closes race window between + * handling request reply and adding file_lock struct to + * inode. Otherwise, auth caps may get trimmed in the + * window. Caller function will decrease the counter. + */ + fl->fl_ops = &ceph_fl_lock_ops; + atomic_inc(&ceph_inode(inode)->i_filelock_ref); + } + if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK) wait = 0; @@ -180,10 +213,12 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, */ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) { - u8 lock_cmd; - int err; - u8 wait = 0; + struct inode *inode = file_inode(file); + struct ceph_inode_info *ci = ceph_inode(inode); + int err = 0; u16 op = CEPH_MDS_OP_SETFILELOCK; + u8 wait = 0; + u8 lock_cmd; if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; @@ -199,6 +234,26 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) else if (IS_SETLKW(cmd)) wait = 1; + spin_lock(&ci->i_ceph_lock); + if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { + err = -EIO; + } else if (op == CEPH_MDS_OP_SETFILELOCK) { + /* + * increasing i_filelock_ref closes race window between + * handling request reply and adding file_lock struct to + * inode. Otherwise, i_auth_cap may get trimmed in the + * window. Caller function will decrease the counter. + */ + fl->fl_ops = &ceph_fl_lock_ops; + atomic_inc(&ci->i_filelock_ref); + } + spin_unlock(&ci->i_ceph_lock); + if (err < 0) { + if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type) + posix_lock_file(file, fl, NULL); + return err; + } + if (F_RDLCK == fl->fl_type) lock_cmd = CEPH_LOCK_SHARED; else if (F_WRLCK == fl->fl_type) @@ -206,16 +261,16 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) else lock_cmd = CEPH_LOCK_UNLOCK; - err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); + err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl); if (!err) { - if (op != CEPH_MDS_OP_GETFILELOCK) { + if (op == CEPH_MDS_OP_SETFILELOCK) { dout("mds locked, locking locally"); err = posix_lock_file(file, fl, NULL); - if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { + if (err) { /* undo! This should only happen if * the kernel detects local * deadlock. */ - ceph_lock_message(CEPH_LOCK_FCNTL, op, file, + ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, CEPH_LOCK_UNLOCK, 0, fl); dout("got %d on posix_lock_file, undid lock", err); @@ -227,9 +282,11 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) int ceph_flock(struct file *file, int cmd, struct file_lock *fl) { - u8 lock_cmd; - int err; + struct inode *inode = file_inode(file); + struct ceph_inode_info *ci = ceph_inode(inode); + int err = 0; u8 wait = 0; + u8 lock_cmd; if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; @@ -239,6 +296,21 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) dout("ceph_flock, fl_file: %p", fl->fl_file); + spin_lock(&ci->i_ceph_lock); + if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { + err = -EIO; + } else { + /* see comment in ceph_lock */ + fl->fl_ops = &ceph_fl_lock_ops; + atomic_inc(&ci->i_filelock_ref); + } + spin_unlock(&ci->i_ceph_lock); + if (err < 0) { + if (F_UNLCK == fl->fl_type) + locks_lock_file_wait(file, fl); + return err; + } + if (IS_SETLKW(cmd)) wait = 1; @@ -250,13 +322,13 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) lock_cmd = CEPH_LOCK_UNLOCK; err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, - file, lock_cmd, wait, fl); + inode, lock_cmd, wait, fl); if (!err) { err = locks_lock_file_wait(file, fl); if (err) { ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, - file, CEPH_LOCK_UNLOCK, 0, fl); + inode, CEPH_LOCK_UNLOCK, 0, fl); dout("got %d on locks_lock_file_wait, undid lock", err); } } @@ -288,6 +360,37 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) *flock_count, *fcntl_count); } +/* + * Given a pointer to a lock, convert it to a ceph filelock + */ +static int lock_to_ceph_filelock(struct file_lock *lock, + struct ceph_filelock *cephlock) +{ + int err = 0; + cephlock->start = cpu_to_le64(lock->fl_start); + cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); + cephlock->client = cpu_to_le64(0); + cephlock->pid = cpu_to_le64((u64)lock->fl_pid); + cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); + + switch (lock->fl_type) { + case F_RDLCK: + cephlock->type = CEPH_LOCK_SHARED; + break; + case F_WRLCK: + cephlock->type = CEPH_LOCK_EXCL; + break; + case F_UNLCK: + cephlock->type = CEPH_LOCK_UNLOCK; + break; + default: + dout("Have unknown lock type %d", lock->fl_type); + err = -EINVAL; + } + + return err; +} + /** * Encode the flock and fcntl locks for the given inode into the ceph_filelock * array. Must be called with inode->i_lock already held. @@ -356,50 +459,22 @@ int ceph_locks_to_pagelist(struct ceph_filelock *flocks, if (err) goto out_fail; - err = ceph_pagelist_append(pagelist, flocks, - num_fcntl_locks * sizeof(*flocks)); - if (err) - goto out_fail; + if (num_fcntl_locks > 0) { + err = ceph_pagelist_append(pagelist, flocks, + num_fcntl_locks * sizeof(*flocks)); + if (err) + goto out_fail; + } nlocks = cpu_to_le32(num_flock_locks); err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); if (err) goto out_fail; - err = ceph_pagelist_append(pagelist, - &flocks[num_fcntl_locks], - num_flock_locks * sizeof(*flocks)); -out_fail: - return err; -} - -/* - * Given a pointer to a lock, convert it to a ceph filelock - */ -int lock_to_ceph_filelock(struct file_lock *lock, - struct ceph_filelock *cephlock) -{ - int err = 0; - cephlock->start = cpu_to_le64(lock->fl_start); - cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); - cephlock->client = cpu_to_le64(0); - cephlock->pid = cpu_to_le64((u64)lock->fl_pid); - cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); - - switch (lock->fl_type) { - case F_RDLCK: - cephlock->type = CEPH_LOCK_SHARED; - break; - case F_WRLCK: - cephlock->type = CEPH_LOCK_EXCL; - break; - case F_UNLCK: - cephlock->type = CEPH_LOCK_UNLOCK; - break; - default: - dout("Have unknown lock type %d", lock->fl_type); - err = -EINVAL; + if (num_flock_locks > 0) { + err = ceph_pagelist_append(pagelist, &flocks[num_fcntl_locks], + num_flock_locks * sizeof(*flocks)); } - +out_fail: return err; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 0687ab3c3267..ab69dcb70e8a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1039,22 +1039,23 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, * session caps */ -/* caller holds s_cap_lock, we drop it */ -static void cleanup_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) - __releases(session->s_cap_lock) +static void detach_cap_releases(struct ceph_mds_session *session, + struct list_head *target) { - LIST_HEAD(tmp_list); - list_splice_init(&session->s_cap_releases, &tmp_list); + lockdep_assert_held(&session->s_cap_lock); + + list_splice_init(&session->s_cap_releases, target); session->s_num_cap_releases = 0; - spin_unlock(&session->s_cap_lock); + dout("dispose_cap_releases mds%d\n", session->s_mds); +} - dout("cleanup_cap_releases mds%d\n", session->s_mds); - while (!list_empty(&tmp_list)) { +static void dispose_cap_releases(struct ceph_mds_client *mdsc, + struct list_head *dispose) +{ + while (!list_empty(dispose)) { struct ceph_cap *cap; /* zero out the in-progress message */ - cap = list_first_entry(&tmp_list, - struct ceph_cap, session_caps); + cap = list_first_entry(dispose, struct ceph_cap, session_caps); list_del(&cap->session_caps); ceph_put_cap(mdsc, cap); } @@ -1215,6 +1216,13 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, } spin_unlock(&mdsc->cap_dirty_lock); + if (atomic_read(&ci->i_filelock_ref) > 0) { + /* make further file lock syscall return -EIO */ + ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK; + pr_warn_ratelimited(" dropping file locks for %p %lld\n", + inode, ceph_ino(inode)); + } + if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) { list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); ci->i_prealloc_cap_flush = NULL; @@ -1244,6 +1252,8 @@ static void remove_session_caps(struct ceph_mds_session *session) { struct ceph_fs_client *fsc = session->s_mdsc->fsc; struct super_block *sb = fsc->sb; + LIST_HEAD(dispose); + dout("remove_session_caps on %p\n", session); iterate_session_caps(session, remove_session_caps_cb, fsc); @@ -1278,10 +1288,12 @@ static void remove_session_caps(struct ceph_mds_session *session) } // drop cap expires and unlock s_cap_lock - cleanup_cap_releases(session->s_mdsc, session); + detach_cap_releases(session, &dispose); BUG_ON(session->s_nr_caps > 0); BUG_ON(!list_empty(&session->s_cap_flushing)); + spin_unlock(&session->s_cap_lock); + dispose_cap_releases(session->s_mdsc, &dispose); } /* @@ -1462,6 +1474,11 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) goto out; if ((used | wanted) & CEPH_CAP_ANY_WR) goto out; + /* Note: it's possible that i_filelock_ref becomes non-zero + * after dropping auth caps. It doesn't hurt because reply + * of lock mds request will re-add auth caps. */ + if (atomic_read(&ci->i_filelock_ref) > 0) + goto out; } /* The inode has cached pages, but it's no longer used. * we can safely drop it */ @@ -2827,7 +2844,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_mds_cap_reconnect v2; struct ceph_mds_cap_reconnect_v1 v1; } rec; - struct ceph_inode_info *ci; + struct ceph_inode_info *ci = cap->ci; struct ceph_reconnect_state *recon_state = arg; struct ceph_pagelist *pagelist = recon_state->pagelist; char *path; @@ -2836,8 +2853,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, u64 snap_follows; struct dentry *dentry; - ci = cap->ci; - dout(" adding %p ino %llx.%llx cap %p %lld %s\n", inode, ceph_vinop(inode), cap, cap->cap_id, ceph_cap_string(cap->issued)); @@ -2870,7 +2885,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, rec.v2.issued = cpu_to_le32(cap->issued); rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); rec.v2.pathbase = cpu_to_le64(pathbase); - rec.v2.flock_len = 0; + rec.v2.flock_len = (__force __le32) + ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); } else { rec.v1.cap_id = cpu_to_le64(cap->cap_id); rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); @@ -2894,26 +2910,37 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, if (recon_state->msg_version >= 2) { int num_fcntl_locks, num_flock_locks; - struct ceph_filelock *flocks; + struct ceph_filelock *flocks = NULL; size_t struct_len, total_len = 0; u8 struct_v = 0; encode_again: - ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); - flocks = kmalloc((num_fcntl_locks+num_flock_locks) * - sizeof(struct ceph_filelock), GFP_NOFS); - if (!flocks) { - err = -ENOMEM; - goto out_free; + if (rec.v2.flock_len) { + ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); + } else { + num_fcntl_locks = 0; + num_flock_locks = 0; } - err = ceph_encode_locks_to_buffer(inode, flocks, - num_fcntl_locks, - num_flock_locks); - if (err) { + if (num_fcntl_locks + num_flock_locks > 0) { + flocks = kmalloc((num_fcntl_locks + num_flock_locks) * + sizeof(struct ceph_filelock), GFP_NOFS); + if (!flocks) { + err = -ENOMEM; + goto out_free; + } + err = ceph_encode_locks_to_buffer(inode, flocks, + num_fcntl_locks, + num_flock_locks); + if (err) { + kfree(flocks); + flocks = NULL; + if (err == -ENOSPC) + goto encode_again; + goto out_free; + } + } else { kfree(flocks); - if (err == -ENOSPC) - goto encode_again; - goto out_free; + flocks = NULL; } if (recon_state->msg_version >= 3) { @@ -2993,6 +3020,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int s_nr_caps; struct ceph_pagelist *pagelist; struct ceph_reconnect_state recon_state; + LIST_HEAD(dispose); pr_info("mds%d reconnect start\n", mds); @@ -3026,7 +3054,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, */ session->s_cap_reconnect = 1; /* drop old cap expires; we're about to reestablish that state */ - cleanup_cap_releases(mdsc, session); + detach_cap_releases(session, &dispose); + spin_unlock(&session->s_cap_lock); + dispose_cap_releases(mdsc, &dispose); /* trim unused caps to reduce MDS's cache rejoin time */ if (mdsc->fsc->sb->s_root) @@ -3857,14 +3887,14 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) goto err_out; } return; + bad: pr_err("error decoding fsmap\n"); err_out: mutex_lock(&mdsc->mutex); - mdsc->mdsmap_err = -ENOENT; + mdsc->mdsmap_err = err; __wake_requests(mdsc, &mdsc->waiting_for_map); mutex_unlock(&mdsc->mutex); - return; } /* diff --git a/fs/ceph/super.c b/fs/ceph/super.c index e4082afedcb1..fe9fbb3f13f7 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -84,8 +84,9 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_ffree = -1; buf->f_namelen = NAME_MAX; - /* leave fsid little-endian, regardless of host endianness */ - fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); + /* Must convert the fsid, for consistent values across arches */ + fsid = le64_to_cpu(*(__le64 *)(&monmap->fsid)) ^ + le64_to_cpu(*((__le64 *)&monmap->fsid + 1)); buf->f_fsid.val[0] = fsid & 0xffffffff; buf->f_fsid.val[1] = fsid >> 32; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 3e27a28aa44a..2beeec07fa76 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -352,6 +352,7 @@ struct ceph_inode_info { int i_pin_ref; int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref; int i_wrbuffer_ref, i_wrbuffer_ref_head; + atomic_t i_filelock_ref; u32 i_shared_gen; /* increment each time we get FILE_SHARED */ u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ @@ -487,6 +488,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, #define CEPH_I_KICK_FLUSH (1 << 9) /* kick flushing caps */ #define CEPH_I_FLUSH_SNAPS (1 << 10) /* need flush snapss */ #define CEPH_I_ERROR_WRITE (1 << 11) /* have seen write errors */ +#define CEPH_I_ERROR_FILELOCK (1 << 12) /* have seen file lock errors */ + /* * We set the ERROR_WRITE bit when we start seeing write errors on an inode @@ -1011,7 +1014,6 @@ extern int ceph_encode_locks_to_buffer(struct inode *inode, extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks, struct ceph_pagelist *pagelist, int num_fcntl_locks, int num_flock_locks); -extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); /* debugfs.c */ extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index b837fb7e290a..a8e3777c94dc 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -369,6 +369,7 @@ static int lockd_start_svc(struct svc_serv *serv) printk(KERN_WARNING "lockd_up: svc_rqst allocation failed, error=%d\n", error); + lockd_unregister_notifiers(); goto out_rqst; } @@ -459,13 +460,16 @@ int lockd_up(struct net *net) } error = lockd_up_net(serv, net); - if (error < 0) - goto err_net; + if (error < 0) { + lockd_unregister_notifiers(); + goto err_put; + } error = lockd_start_svc(serv); - if (error < 0) - goto err_start; - + if (error < 0) { + lockd_down_net(serv, net); + goto err_put; + } nlmsvc_users++; /* * Note: svc_serv structures have an initial use count of 1, @@ -476,12 +480,6 @@ err_put: err_create: mutex_unlock(&nlmsvc_mutex); return error; - -err_start: - lockd_down_net(serv, net); -err_net: - lockd_unregister_notifiers(); - goto err_put; } EXPORT_SYMBOL_GPL(lockd_up); diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 420d3a0ab258..897b299db55e 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -55,14 +55,7 @@ locks_end_grace(struct lock_manager *lm) } EXPORT_SYMBOL_GPL(locks_end_grace); -/** - * locks_in_grace - * - * Lock managers call this function to determine when it is OK for them - * to answer ordinary lock requests, and when they should accept only - * lock reclaims. - */ -int +static bool __state_in_grace(struct net *net, bool open) { struct list_head *grace_list = net_generic(net, grace_net_id); @@ -78,15 +71,22 @@ __state_in_grace(struct net *net, bool open) return false; } -int locks_in_grace(struct net *net) +/** + * locks_in_grace + * + * Lock managers call this function to determine when it is OK for them + * to answer ordinary lock requests, and when they should accept only + * lock reclaims. + */ +bool locks_in_grace(struct net *net) { - return __state_in_grace(net, 0); + return __state_in_grace(net, false); } EXPORT_SYMBOL_GPL(locks_in_grace); -int opens_in_grace(struct net *net) +bool opens_in_grace(struct net *net) { - return __state_in_grace(net, 1); + return __state_in_grace(net, true); } EXPORT_SYMBOL_GPL(opens_in_grace); diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 6dfede6d172a..84831253203d 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -12,6 +12,7 @@ #include <linux/nsproxy.h> #include <linux/sunrpc/addr.h> #include <linux/uaccess.h> +#include <linux/kernel.h> #include "state.h" #include "netns.h" @@ -126,8 +127,6 @@ static struct nfsd_fault_inject_op inject_ops[] = { }, }; -#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op)) - int nfsd_fault_inject_init(void) { unsigned int i; @@ -138,7 +137,7 @@ int nfsd_fault_inject_init(void) if (!debug_dir) goto fail; - for (i = 0; i < NUM_INJECT_OPS; i++) { + for (i = 0; i < ARRAY_SIZE(inject_ops); i++) { op = &inject_ops[i]; if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) goto fail; diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 3714231a9d0f..1c91391f4805 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -107,7 +107,7 @@ struct nfsd_net { bool lockd_up; /* Time of server startup */ - struct timeval nfssvc_boot; + struct timespec64 nfssvc_boot; /* * Max number of connections this nfsd container will allow. Defaults diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index f38acd905441..2758480555fa 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -748,8 +748,9 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) if (resp->status == 0) { *p++ = htonl(resp->count); *p++ = htonl(resp->committed); - *p++ = htonl(nn->nfssvc_boot.tv_sec); - *p++ = htonl(nn->nfssvc_boot.tv_usec); + /* unique identifier, y2038 overflow can be ignored */ + *p++ = htonl((u32)nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_nsec); } return xdr_ressize_check(rqstp, p); } @@ -1119,8 +1120,9 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) p = encode_wcc_data(rqstp, p, &resp->fh); /* Write verifier */ if (resp->status == 0) { - *p++ = htonl(nn->nfssvc_boot.tv_sec); - *p++ = htonl(nn->nfssvc_boot.tv_usec); + /* unique identifier, y2038 overflow can be ignored */ + *p++ = htonl((u32)nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_nsec); } return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index ea45d954e8d7..7d888369f85a 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -336,7 +336,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) trace_layout_recall(&ls->ls_stid.sc_stateid); - atomic_inc(&ls->ls_stid.sc_count); + refcount_inc(&ls->ls_stid.sc_count); nfsd4_run_cb(&ls->ls_recall); out_unlock: @@ -441,7 +441,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) goto done; } - atomic_inc(&ls->ls_stid.sc_count); + refcount_inc(&ls->ls_stid.sc_count); list_add_tail(&new->lo_perstate, &ls->ls_layouts); new = NULL; done: diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8487486ec496..008ea0b627d0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -485,9 +485,6 @@ static __be32 nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - if (!cstate->current_fh.fh_dentry) - return nfserr_nofilehandle; - u->getfh = &cstate->current_fh; return nfs_ok; } @@ -535,9 +532,6 @@ static __be32 nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - if (!cstate->current_fh.fh_dentry) - return nfserr_nofilehandle; - fh_dup2(&cstate->save_fh, &cstate->current_fh); if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) { memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t)); @@ -570,10 +564,11 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) /* * This is opaque to client, so no need to byte-swap. Use - * __force to keep sparse happy + * __force to keep sparse happy. y2038 time_t overflow is + * irrelevant in this usage. */ verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; - verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec; + verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec; memcpy(verifier->data, verf, sizeof(verifier->data)); } @@ -703,10 +698,8 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_link *link = &u->link; - __be32 status = nfserr_nofilehandle; + __be32 status; - if (!cstate->save_fh.fh_dentry) - return status; status = nfsd_link(rqstp, &cstate->current_fh, link->li_name, link->li_namelen, &cstate->save_fh); if (!status) @@ -850,10 +843,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_rename *rename = &u->rename; - __be32 status = nfserr_nofilehandle; + __be32 status; - if (!cstate->save_fh.fh_dentry) - return status; if (opens_in_grace(SVC_NET(rqstp)) && !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) return nfserr_grace; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0c04f81aa63b..b82817767b9d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -359,7 +359,7 @@ put_nfs4_file(struct nfs4_file *fi) { might_lock(&state_lock); - if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { + if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { hlist_del_rcu(&fi->fi_hash); spin_unlock(&state_lock); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); @@ -568,7 +568,7 @@ alloc_clnt_odstate(struct nfs4_client *clp) co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL); if (co) { co->co_client = clp; - atomic_set(&co->co_odcount, 1); + refcount_set(&co->co_odcount, 1); } return co; } @@ -586,7 +586,7 @@ static inline void get_clnt_odstate(struct nfs4_clnt_odstate *co) { if (co) - atomic_inc(&co->co_odcount); + refcount_inc(&co->co_odcount); } static void @@ -598,7 +598,7 @@ put_clnt_odstate(struct nfs4_clnt_odstate *co) return; fp = co->co_file; - if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) { + if (refcount_dec_and_lock(&co->co_odcount, &fp->fi_lock)) { list_del(&co->co_perfile); spin_unlock(&fp->fi_lock); @@ -656,7 +656,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla stid->sc_stateid.si_opaque.so_id = new_id; stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ - atomic_set(&stid->sc_count, 1); + refcount_set(&stid->sc_count, 1); spin_lock_init(&stid->sc_lock); /* @@ -813,7 +813,7 @@ nfs4_put_stid(struct nfs4_stid *s) might_lock(&clp->cl_lock); - if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) { + if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) { wake_up_all(&close_wq); return; } @@ -913,7 +913,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) if (status) return status; ++fp->fi_delegees; - atomic_inc(&dp->dl_stid.sc_count); + refcount_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &clp->cl_delegations); @@ -1214,7 +1214,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, WARN_ON_ONCE(!list_empty(&stp->st_locks)); - if (!atomic_dec_and_test(&s->sc_count)) { + if (!refcount_dec_and_test(&s->sc_count)) { wake_up_all(&close_wq); return; } @@ -1439,8 +1439,10 @@ free_session_slots(struct nfsd4_session *ses) { int i; - for (i = 0; i < ses->se_fchannel.maxreqs; i++) + for (i = 0; i < ses->se_fchannel.maxreqs; i++) { + free_svc_cred(&ses->se_slots[i]->sl_cred); kfree(ses->se_slots[i]); + } } /* @@ -1472,6 +1474,11 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) spin_lock(&nfsd_drc_lock); avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, nfsd_drc_max_mem - nfsd_drc_mem_used); + /* + * Never use more than a third of the remaining memory, + * unless it's the only way to give this client a slot: + */ + avail = clamp_t(int, avail, slotsize, avail/3); num = min_t(int, num, avail / slotsize); nfsd_drc_mem_used += num * slotsize; spin_unlock(&nfsd_drc_lock); @@ -2072,7 +2079,7 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) s = find_stateid_locked(cl, t); if (s != NULL) { if (typemask & s->sc_type) - atomic_inc(&s->sc_count); + refcount_inc(&s->sc_count); else s = NULL; } @@ -2287,14 +2294,18 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) dprintk("--> %s slot %p\n", __func__, slot); + slot->sl_flags |= NFSD4_SLOT_INITIALIZED; slot->sl_opcnt = resp->opcnt; slot->sl_status = resp->cstate.status; + free_svc_cred(&slot->sl_cred); + copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred); - slot->sl_flags |= NFSD4_SLOT_INITIALIZED; - if (nfsd4_not_cached(resp)) { - slot->sl_datalen = 0; + if (!nfsd4_cache_this(resp)) { + slot->sl_flags &= ~NFSD4_SLOT_CACHED; return; } + slot->sl_flags |= NFSD4_SLOT_CACHED; + base = resp->cstate.data_offset; slot->sl_datalen = buf->len - base; if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) @@ -2321,8 +2332,16 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, op = &args->ops[resp->opcnt - 1]; nfsd4_encode_operation(resp, op); - /* Return nfserr_retry_uncached_rep in next operation. */ - if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) { + if (slot->sl_flags & NFSD4_SLOT_CACHED) + return op->status; + if (args->opcnt == 1) { + /* + * The original operation wasn't a solo sequence--we + * always cache those--so this retry must not match the + * original: + */ + op->status = nfserr_seq_false_retry; + } else { op = &args->ops[resp->opcnt++]; op->status = nfserr_retry_uncached_rep; nfsd4_encode_operation(resp, op); @@ -2986,6 +3005,34 @@ static bool nfsd4_request_too_big(struct svc_rqst *rqstp, return xb->len > session->se_fchannel.maxreq_sz; } +static bool replay_matches_cache(struct svc_rqst *rqstp, + struct nfsd4_sequence *seq, struct nfsd4_slot *slot) +{ + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + + if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) != + (bool)seq->cachethis) + return false; + /* + * If there's an error than the reply can have fewer ops than + * the call. But if we cached a reply with *more* ops than the + * call you're sending us now, then this new call is clearly not + * really a replay of the old one: + */ + if (slot->sl_opcnt < argp->opcnt) + return false; + /* This is the only check explicitly called by spec: */ + if (!same_creds(&rqstp->rq_cred, &slot->sl_cred)) + return false; + /* + * There may be more comparisons we could actually do, but the + * spec doesn't require us to catch every case where the calls + * don't match (that would require caching the call as well as + * the reply), so we don't bother. + */ + return true; +} + __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -3045,6 +3092,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_seq_misordered; if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) goto out_put_session; + status = nfserr_seq_false_retry; + if (!replay_matches_cache(rqstp, seq, slot)) + goto out_put_session; cstate->slot = slot; cstate->session = session; cstate->clp = clp; @@ -3351,7 +3401,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, { lockdep_assert_held(&state_lock); - atomic_set(&fp->fi_ref, 1); + refcount_set(&fp->fi_ref, 1); spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); @@ -3514,7 +3564,7 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) continue; if (local->st_stateowner == &oo->oo_owner) { ret = local; - atomic_inc(&ret->st_stid.sc_count); + refcount_inc(&ret->st_stid.sc_count); break; } } @@ -3573,7 +3623,7 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) goto out_unlock; open->op_stp = NULL; - atomic_inc(&stp->st_stid.sc_count); + refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner); @@ -3621,7 +3671,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) * there should be no danger of the refcount going back up again at * this point. */ - wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2); + wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2); release_all_access(s); if (s->st_stid.sc_file) { @@ -3647,7 +3697,7 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval) hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { if (fh_match(&fp->fi_fhandle, fh)) { - if (atomic_inc_not_zero(&fp->fi_ref)) + if (refcount_inc_not_zero(&fp->fi_ref)) return fp; } } @@ -3783,7 +3833,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * lock) we know the server hasn't removed the lease yet, we know * it's safe to take a reference. */ - atomic_inc(&dp->dl_stid.sc_count); + refcount_inc(&dp->dl_stid.sc_count); nfsd4_run_cb(&dp->dl_recall); } @@ -3966,7 +4016,8 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei { struct nfs4_stid *ret; - ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID); + ret = find_stateid_by_type(cl, s, + NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID); if (!ret) return NULL; return delegstateid(ret); @@ -3989,6 +4040,12 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, deleg = find_deleg_stateid(cl, &open->op_delegate_stateid); if (deleg == NULL) goto out; + if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) { + nfs4_put_stid(&deleg->dl_stid); + if (cl->cl_minorversion) + status = nfserr_deleg_revoked; + goto out; + } flags = share_access_to_flags(open->op_share_access); status = nfs4_check_delegmode(deleg, flags); if (status) { @@ -4858,6 +4915,16 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, struct nfs4_stid **s, struct nfsd_net *nn) { __be32 status; + bool return_revoked = false; + + /* + * only return revoked delegations if explicitly asked. + * otherwise we report revoked or bad_stateid status. + */ + if (typemask & NFS4_REVOKED_DELEG_STID) + return_revoked = true; + else if (typemask & NFS4_DELEG_STID) + typemask |= NFS4_REVOKED_DELEG_STID; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; @@ -4872,6 +4939,12 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, *s = find_stateid_by_type(cstate->clp, stateid, typemask); if (!*s) return nfserr_bad_stateid; + if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { + nfs4_put_stid(*s); + if (cstate->minorversion) + return nfserr_deleg_revoked; + return nfserr_bad_stateid; + } return nfs_ok; } @@ -5071,7 +5144,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ret = nfserr_locks_held; break; case NFS4_LOCK_STID: - atomic_inc(&s->sc_count); + refcount_inc(&s->sc_count); spin_unlock(&cl->cl_lock); ret = nfsd4_free_lock_stateid(stateid, s); goto out; @@ -5578,7 +5651,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, lockdep_assert_held(&clp->cl_lock); - atomic_inc(&stp->st_stid.sc_count); + refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_LOCK_STID; stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); get_nfs4_file(fp); @@ -5604,7 +5677,7 @@ find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { if (lst->st_stid.sc_file == fp) { - atomic_inc(&lst->st_stid.sc_count); + refcount_inc(&lst->st_stid.sc_count); return lst; } } @@ -7006,8 +7079,8 @@ nfs4_state_start_net(struct net *net) nn->nfsd4_manager.block_opens = true; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); - printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", - nn->nfsd4_grace, net); + printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n", + nn->nfsd4_grace, net->ns.inum); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); return 0; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index e02bd2783124..33117d4ffce0 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -447,7 +447,7 @@ void nfsd_reset_versions(void) */ static void set_max_drc(void) { - #define NFSD_DRC_SIZE_SHIFT 10 + #define NFSD_DRC_SIZE_SHIFT 7 nfsd_drc_max_mem = (nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; nfsd_drc_mem_used = 0; @@ -517,7 +517,7 @@ int nfsd_create_serv(struct net *net) register_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } - do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ + ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */ return 0; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 005c911b34ac..f3772ea8ba0d 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -36,6 +36,7 @@ #define _NFSD4_STATE_H #include <linux/idr.h> +#include <linux/refcount.h> #include <linux/sunrpc/svc_xprt.h> #include "nfsfh.h" @@ -83,7 +84,7 @@ struct nfsd4_callback_ops { * fields that are of general use to any stateid. */ struct nfs4_stid { - atomic_t sc_count; + refcount_t sc_count; #define NFS4_OPEN_STID 1 #define NFS4_LOCK_STID 2 #define NFS4_DELEG_STID 4 @@ -169,11 +170,13 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) struct nfsd4_slot { u32 sl_seqid; __be32 sl_status; + struct svc_cred sl_cred; u32 sl_datalen; u16 sl_opcnt; #define NFSD4_SLOT_INUSE (1 << 0) #define NFSD4_SLOT_CACHETHIS (1 << 1) #define NFSD4_SLOT_INITIALIZED (1 << 2) +#define NFSD4_SLOT_CACHED (1 << 3) u8 sl_flags; char sl_data[]; }; @@ -465,7 +468,7 @@ struct nfs4_clnt_odstate { struct nfs4_client *co_client; struct nfs4_file *co_file; struct list_head co_perfile; - atomic_t co_odcount; + refcount_t co_odcount; }; /* @@ -481,7 +484,7 @@ struct nfs4_clnt_odstate { * the global state_lock spinlock. */ struct nfs4_file { - atomic_t fi_ref; + refcount_t fi_ref; spinlock_t fi_lock; struct hlist_node fi_hash; /* hash on fi_fhandle */ struct list_head fi_stateids; @@ -634,7 +637,7 @@ struct nfs4_file *find_file(struct knfsd_fh *fh); void put_nfs4_file(struct nfs4_file *fi); static inline void get_nfs4_file(struct nfs4_file *fi) { - atomic_inc(&fi->fi_ref); + refcount_inc(&fi->fi_ref); } struct file *find_any_file(struct nfs4_file *f); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 1e4edbf70052..bc29511b6405 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -649,9 +649,18 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE; } -static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) +/* + * The session reply cache only needs to cache replies that the client + * actually asked us to. But it's almost free for us to cache compounds + * consisting of only a SEQUENCE op, so we may as well cache those too. + * Also, the protocol doesn't give us a convenient response in the case + * of a replay of a solo SEQUENCE op that wasn't cached + * (RETRY_UNCACHED_REP can only be returned in the second op of a + * compound). + */ +static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp) { - return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) + return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) || nfsd4_is_solo_sequence(resp); } diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c index c2d8233b1e82..480ea059a680 100644 --- a/fs/orangefs/acl.c +++ b/fs/orangefs/acl.c @@ -155,13 +155,11 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) int orangefs_init_acl(struct inode *inode, struct inode *dir) { - struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct posix_acl *default_acl, *acl; umode_t mode = inode->i_mode; + struct iattr iattr; int error = 0; - ClearModeFlag(orangefs_inode); - error = posix_acl_create(dir, &mode, &default_acl, &acl); if (error) return error; @@ -180,9 +178,11 @@ int orangefs_init_acl(struct inode *inode, struct inode *dir) /* If mode of the inode was changed, then do a forcible ->setattr */ if (mode != inode->i_mode) { - SetModeFlag(orangefs_inode); + memset(&iattr, 0, sizeof iattr); inode->i_mode = mode; - orangefs_flush_inode(inode); + iattr.ia_mode = mode; + iattr.ia_valid |= ATTR_MODE; + orangefs_inode_setattr(inode, &iattr); } return error; diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index a8cc588d6224..e2c2699d8016 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -386,7 +386,6 @@ static int orangefs_dir_release(struct inode *inode, struct file *file) { struct orangefs_dir *od = file->private_data; struct orangefs_dir_part *part = od->part; - orangefs_flush_inode(inode); while (part) { struct orangefs_dir_part *next = part->next; vfree(part); diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index e4a8e6a7eb17..1668fd645c45 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -383,9 +383,15 @@ out: if (type == ORANGEFS_IO_READ) { file_accessed(file); } else { - SetMtimeFlag(orangefs_inode); - inode->i_mtime = current_time(inode); - mark_inode_dirty_sync(inode); + file_update_time(file); + /* + * Must invalidate to ensure write loop doesn't + * prevent kernel from reading updated + * attribute. Size probably changed because of + * the write, and other clients could update + * any other attribute. + */ + orangefs_inode->getattr_time = jiffies - 1; } } @@ -615,8 +621,6 @@ static int orangefs_file_release(struct inode *inode, struct file *file) "orangefs_file_release: called on %pD\n", file); - orangefs_flush_inode(inode); - /* * remove all associated inode pages from the page cache and * readahead cache (if any); this forces an expensive refresh of @@ -666,8 +670,6 @@ static int orangefs_fsync(struct file *file, ret); op_release(new_op); - - orangefs_flush_inode(file_inode(file)); return ret; } diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 28825a5b6d09..fe1d705ad91f 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -290,6 +290,22 @@ int orangefs_permission(struct inode *inode, int mask) return generic_permission(inode, mask); } +int orangefs_update_time(struct inode *inode, struct timespec *time, int flags) +{ + struct iattr iattr; + gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_update_time: %pU\n", + get_khandle_from_ino(inode)); + generic_update_time(inode, time, flags); + memset(&iattr, 0, sizeof iattr); + if (flags & S_ATIME) + iattr.ia_valid |= ATTR_ATIME; + if (flags & S_CTIME) + iattr.ia_valid |= ATTR_CTIME; + if (flags & S_MTIME) + iattr.ia_valid |= ATTR_MTIME; + return orangefs_inode_setattr(inode, &iattr); +} + /* ORANGEDS2 implementation of VFS inode operations for files */ const struct inode_operations orangefs_file_inode_operations = { .get_acl = orangefs_get_acl, @@ -298,6 +314,7 @@ const struct inode_operations orangefs_file_inode_operations = { .getattr = orangefs_getattr, .listxattr = orangefs_listxattr, .permission = orangefs_permission, + .update_time = orangefs_update_time, }; static int orangefs_init_iops(struct inode *inode) diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 7e9e5d0ea3bc..c98bba2dbc94 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -22,7 +22,9 @@ static int orangefs_create(struct inode *dir, { struct orangefs_inode_s *parent = ORANGEFS_I(dir); struct orangefs_kernel_op_s *new_op; + struct orangefs_object_kref ref; struct inode *inode; + struct iattr iattr; int ret; gossip_debug(GOSSIP_NAME_DEBUG, "%s: %pd\n", @@ -55,8 +57,10 @@ static int orangefs_create(struct inode *dir, if (ret < 0) goto out; - inode = orangefs_new_inode(dir->i_sb, dir, S_IFREG | mode, 0, - &new_op->downcall.resp.create.refn); + ref = new_op->downcall.resp.create.refn; + op_release(new_op); + + inode = orangefs_new_inode(dir->i_sb, dir, S_IFREG | mode, 0, &ref); if (IS_ERR(inode)) { gossip_err("%s: Failed to allocate inode for file :%pd:\n", __func__, @@ -82,12 +86,13 @@ static int orangefs_create(struct inode *dir, __func__, dentry); - SetMtimeFlag(parent); dir->i_mtime = dir->i_ctime = current_time(dir); + memset(&iattr, 0, sizeof iattr); + iattr.ia_valid |= ATTR_MTIME; + orangefs_inode_setattr(dir, &iattr); mark_inode_dirty_sync(dir); ret = 0; out: - op_release(new_op); gossip_debug(GOSSIP_NAME_DEBUG, "%s: %pd: returning %d\n", __func__, @@ -221,6 +226,7 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; struct orangefs_inode_s *parent = ORANGEFS_I(dir); struct orangefs_kernel_op_s *new_op; + struct iattr iattr; int ret; gossip_debug(GOSSIP_NAME_DEBUG, @@ -253,8 +259,10 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry) if (!ret) { drop_nlink(inode); - SetMtimeFlag(parent); dir->i_mtime = dir->i_ctime = current_time(dir); + memset(&iattr, 0, sizeof iattr); + iattr.ia_valid |= ATTR_MTIME; + orangefs_inode_setattr(dir, &iattr); mark_inode_dirty_sync(dir); } return ret; @@ -266,7 +274,9 @@ static int orangefs_symlink(struct inode *dir, { struct orangefs_inode_s *parent = ORANGEFS_I(dir); struct orangefs_kernel_op_s *new_op; + struct orangefs_object_kref ref; struct inode *inode; + struct iattr iattr; int mode = 755; int ret; @@ -307,8 +317,10 @@ static int orangefs_symlink(struct inode *dir, goto out; } - inode = orangefs_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0, - &new_op->downcall.resp.sym.refn); + ref = new_op->downcall.resp.sym.refn; + op_release(new_op); + + inode = orangefs_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0, &ref); if (IS_ERR(inode)) { gossip_err ("*** Failed to allocate orangefs symlink inode\n"); @@ -331,12 +343,13 @@ static int orangefs_symlink(struct inode *dir, get_khandle_from_ino(inode), dentry); - SetMtimeFlag(parent); dir->i_mtime = dir->i_ctime = current_time(dir); + memset(&iattr, 0, sizeof iattr); + iattr.ia_valid |= ATTR_MTIME; + orangefs_inode_setattr(dir, &iattr); mark_inode_dirty_sync(dir); ret = 0; out: - op_release(new_op); return ret; } @@ -344,7 +357,9 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode { struct orangefs_inode_s *parent = ORANGEFS_I(dir); struct orangefs_kernel_op_s *new_op; + struct orangefs_object_kref ref; struct inode *inode; + struct iattr iattr; int ret; new_op = op_alloc(ORANGEFS_VFS_OP_MKDIR); @@ -373,8 +388,10 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode goto out; } - inode = orangefs_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0, - &new_op->downcall.resp.mkdir.refn); + ref = new_op->downcall.resp.mkdir.refn; + op_release(new_op); + + inode = orangefs_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0, &ref); if (IS_ERR(inode)) { gossip_err("*** Failed to allocate orangefs dir inode\n"); ret = PTR_ERR(inode); @@ -400,11 +417,12 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode * NOTE: we have no good way to keep nlink consistent for directories * across clients; keep constant at 1. */ - SetMtimeFlag(parent); dir->i_mtime = dir->i_ctime = current_time(dir); + memset(&iattr, 0, sizeof iattr); + iattr.ia_valid |= ATTR_MTIME; + orangefs_inode_setattr(dir, &iattr); mark_inode_dirty_sync(dir); out: - op_release(new_op); return ret; } @@ -470,4 +488,5 @@ const struct inode_operations orangefs_dir_inode_operations = { .getattr = orangefs_getattr, .listxattr = orangefs_listxattr, .permission = orangefs_permission, + .update_time = orangefs_update_time, }; diff --git a/fs/orangefs/orangefs-debug.h b/fs/orangefs/orangefs-debug.h index b6001bb28f5a..c7db56a31b92 100644 --- a/fs/orangefs/orangefs-debug.h +++ b/fs/orangefs/orangefs-debug.h @@ -15,8 +15,10 @@ #ifdef __KERNEL__ #include <linux/types.h> +#include <linux/kernel.h> #else #include <stdint.h> +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #endif #define GOSSIP_NO_DEBUG (__u64)0 @@ -88,6 +90,6 @@ static struct __keyword_mask_s s_kmod_keyword_mask_map[] = { }; static const int num_kmod_keyword_mask_map = (int) - (sizeof(s_kmod_keyword_mask_map) / sizeof(struct __keyword_mask_s)); + (ARRAY_SIZE(s_kmod_keyword_mask_map)); #endif /* __ORANGEFS_DEBUG_H */ diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index f44d5eb74fcc..97adf7d100b5 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -209,37 +209,10 @@ struct orangefs_inode_s { struct inode vfs_inode; sector_t last_failed_block_index_read; - /* - * State of in-memory attributes not yet flushed to disk associated - * with this object - */ - unsigned long pinode_flags; - unsigned long getattr_time; u32 getattr_mask; }; -#define P_ATIME_FLAG 0 -#define P_MTIME_FLAG 1 -#define P_CTIME_FLAG 2 -#define P_MODE_FLAG 3 - -#define ClearAtimeFlag(pinode) clear_bit(P_ATIME_FLAG, &(pinode)->pinode_flags) -#define SetAtimeFlag(pinode) set_bit(P_ATIME_FLAG, &(pinode)->pinode_flags) -#define AtimeFlag(pinode) test_bit(P_ATIME_FLAG, &(pinode)->pinode_flags) - -#define ClearMtimeFlag(pinode) clear_bit(P_MTIME_FLAG, &(pinode)->pinode_flags) -#define SetMtimeFlag(pinode) set_bit(P_MTIME_FLAG, &(pinode)->pinode_flags) -#define MtimeFlag(pinode) test_bit(P_MTIME_FLAG, &(pinode)->pinode_flags) - -#define ClearCtimeFlag(pinode) clear_bit(P_CTIME_FLAG, &(pinode)->pinode_flags) -#define SetCtimeFlag(pinode) set_bit(P_CTIME_FLAG, &(pinode)->pinode_flags) -#define CtimeFlag(pinode) test_bit(P_CTIME_FLAG, &(pinode)->pinode_flags) - -#define ClearModeFlag(pinode) clear_bit(P_MODE_FLAG, &(pinode)->pinode_flags) -#define SetModeFlag(pinode) set_bit(P_MODE_FLAG, &(pinode)->pinode_flags) -#define ModeFlag(pinode) test_bit(P_MODE_FLAG, &(pinode)->pinode_flags) - /* per superblock private orangefs info */ struct orangefs_sb_info_s { struct orangefs_khandle root_khandle; @@ -436,6 +409,8 @@ int orangefs_getattr(const struct path *path, struct kstat *stat, int orangefs_permission(struct inode *inode, int mask); +int orangefs_update_time(struct inode *, struct timespec *, int); + /* * defined in xattr.c */ @@ -478,8 +453,6 @@ bool __is_daemon_in_service(void); */ __s32 fsid_of_op(struct orangefs_kernel_op_s *op); -int orangefs_flush_inode(struct inode *inode); - ssize_t orangefs_inode_getxattr(struct inode *inode, const char *name, void *buffer, diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index f82336496311..97fe93129f38 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -4,6 +4,7 @@ * * See COPYING in top-level directory. */ +#include <linux/kernel.h> #include "protocol.h" #include "orangefs-kernel.h" #include "orangefs-dev-proto.h" @@ -437,89 +438,8 @@ int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr) op_release(new_op); - /* - * successful setattr should clear the atime, mtime and - * ctime flags. - */ - if (ret == 0) { - ClearAtimeFlag(orangefs_inode); - ClearMtimeFlag(orangefs_inode); - ClearCtimeFlag(orangefs_inode); - ClearModeFlag(orangefs_inode); + if (ret == 0) orangefs_inode->getattr_time = jiffies - 1; - } - - return ret; -} - -int orangefs_flush_inode(struct inode *inode) -{ - /* - * If it is a dirty inode, this function gets called. - * Gather all the information that needs to be setattr'ed - * Right now, this will only be used for mode, atime, mtime - * and/or ctime. - */ - struct iattr wbattr; - int ret; - int mtime_flag; - int ctime_flag; - int atime_flag; - int mode_flag; - struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); - - memset(&wbattr, 0, sizeof(wbattr)); - - /* - * check inode flags up front, and clear them if they are set. This - * will prevent multiple processes from all trying to flush the same - * inode if they call close() simultaneously - */ - mtime_flag = MtimeFlag(orangefs_inode); - ClearMtimeFlag(orangefs_inode); - ctime_flag = CtimeFlag(orangefs_inode); - ClearCtimeFlag(orangefs_inode); - atime_flag = AtimeFlag(orangefs_inode); - ClearAtimeFlag(orangefs_inode); - mode_flag = ModeFlag(orangefs_inode); - ClearModeFlag(orangefs_inode); - - /* -- Lazy atime,mtime and ctime update -- - * Note: all times are dictated by server in the new scheme - * and not by the clients - * - * Also mode updates are being handled now.. - */ - - if (mtime_flag) - wbattr.ia_valid |= ATTR_MTIME; - if (ctime_flag) - wbattr.ia_valid |= ATTR_CTIME; - if (atime_flag) - wbattr.ia_valid |= ATTR_ATIME; - - if (mode_flag) { - wbattr.ia_mode = inode->i_mode; - wbattr.ia_valid |= ATTR_MODE; - } - - gossip_debug(GOSSIP_UTILS_DEBUG, - "*********** orangefs_flush_inode: %pU " - "(ia_valid %d)\n", - get_khandle_from_ino(inode), - wbattr.ia_valid); - if (wbattr.ia_valid == 0) { - gossip_debug(GOSSIP_UTILS_DEBUG, - "orangefs_flush_inode skipping setattr()\n"); - return 0; - } - - gossip_debug(GOSSIP_UTILS_DEBUG, - "orangefs_flush_inode (%pU) writing mode %o\n", - get_khandle_from_ino(inode), - inode->i_mode); - - ret = orangefs_inode_setattr(inode, &wbattr); return ret; } @@ -606,7 +526,7 @@ int orangefs_normalize_to_errno(__s32 error_code) /* Convert ORANGEFS encoded errno values into regular errno values. */ } else if ((-error_code) & ORANGEFS_ERROR_BIT) { i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS); - if (i < sizeof(PINT_errno_mapping)/sizeof(*PINT_errno_mapping)) + if (i < ARRAY_SIZE(PINT_errno_mapping)) error_code = -PINT_errno_mapping[i]; else error_code = -EINVAL; diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 47ebd9bfd1a1..366750eef201 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -99,8 +99,6 @@ static void orangefs_inode_cache_ctor(void *req) inode_init_once(&orangefs_inode->vfs_inode); init_rwsem(&orangefs_inode->xattr_sem); - - orangefs_inode->vfs_inode.i_version = 1; } static struct inode *orangefs_alloc_inode(struct super_block *sb) @@ -119,7 +117,6 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb) orangefs_inode->refn.fs_id = ORANGEFS_FS_ID_NULL; orangefs_inode->last_failed_block_index_read = 0; memset(orangefs_inode->link_target, 0, sizeof(orangefs_inode->link_target)); - orangefs_inode->pinode_flags = 0; gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_alloc_inode: allocated %p\n", @@ -299,21 +296,9 @@ void fsid_key_table_finalize(void) { } -/* Called whenever the VFS dirties the inode in response to atime updates */ -static void orangefs_dirty_inode(struct inode *inode, int flags) -{ - struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); - - gossip_debug(GOSSIP_SUPER_DEBUG, - "orangefs_dirty_inode: %pU\n", - get_khandle_from_ino(inode)); - SetAtimeFlag(orangefs_inode); -} - static const struct super_operations orangefs_s_ops = { .alloc_inode = orangefs_alloc_inode, .destroy_inode = orangefs_destroy_inode, - .dirty_inode = orangefs_dirty_inode, .drop_inode = generic_delete_inode, .statfs = orangefs_statfs, .remount_fs = orangefs_remount_fs, diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c index d856cdf91763..db107fe91ab3 100644 --- a/fs/orangefs/symlink.c +++ b/fs/orangefs/symlink.c @@ -15,4 +15,5 @@ const struct inode_operations orangefs_symlink_inode_operations = { .getattr = orangefs_getattr, .listxattr = orangefs_listxattr, .permission = orangefs_permission, + .update_time = orangefs_update_time, }; |