From 89aa593010135660991d05c92528c2c9163d5900 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 8 Sep 2017 15:23:18 +0800 Subject: ceph: keep auth cap when inode has flocks or posix locks file locks are tracked by inode's auth mds. dropping auth caps is equivalent to releasing all file locks. Signed-off-by: "Yan, Zheng" Acked-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/locks.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 9 deletions(-) (limited to 'fs/ceph/locks.c') diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index e7cce412f2cf..316d550b9603 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -30,19 +30,46 @@ void __init ceph_flock_init(void) get_random_bytes(&lock_secret, sizeof(lock_secret)); } +static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) +{ + struct inode *inode = file_inode(src->fl_file); + atomic_inc(&ceph_inode(inode)->i_filelock_ref); +} + +static void ceph_fl_release_lock(struct file_lock *fl) +{ + struct inode *inode = file_inode(fl->fl_file); + atomic_dec(&ceph_inode(inode)->i_filelock_ref); +} + +static const struct file_lock_operations ceph_fl_lock_ops = { + .fl_copy_lock = ceph_fl_copy_lock, + .fl_release_private = ceph_fl_release_lock, +}; + /** * Implement fcntl and flock locking functions. */ -static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, +static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, int cmd, u8 wait, struct file_lock *fl) { - struct inode *inode = file_inode(file); struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_mds_request *req; int err; u64 length = 0; u64 owner; + if (operation == CEPH_MDS_OP_SETFILELOCK) { + /* + * increasing i_filelock_ref closes race window between + * handling request reply and adding file_lock struct to + * inode. Otherwise, auth caps may get trimmed in the + * window. Caller function will decrease the counter. + */ + fl->fl_ops = &ceph_fl_lock_ops; + atomic_inc(&ceph_inode(inode)->i_filelock_ref); + } + if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK) wait = 0; @@ -180,10 +207,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, */ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) { - u8 lock_cmd; + struct inode *inode = file_inode(file); int err; - u8 wait = 0; u16 op = CEPH_MDS_OP_SETFILELOCK; + u8 lock_cmd; + u8 wait = 0; if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; @@ -199,6 +227,17 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) else if (IS_SETLKW(cmd)) wait = 1; + if (op == CEPH_MDS_OP_SETFILELOCK) { + /* + * increasing i_filelock_ref closes race window between + * handling request reply and adding file_lock struct to + * inode. Otherwise, i_auth_cap may get trimmed in the + * window. Caller function will decrease the counter. + */ + fl->fl_ops = &ceph_fl_lock_ops; + atomic_inc(&ceph_inode(inode)->i_filelock_ref); + } + if (F_RDLCK == fl->fl_type) lock_cmd = CEPH_LOCK_SHARED; else if (F_WRLCK == fl->fl_type) @@ -206,7 +245,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) else lock_cmd = CEPH_LOCK_UNLOCK; - err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); + err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl); if (!err) { if (op != CEPH_MDS_OP_GETFILELOCK) { dout("mds locked, locking locally"); @@ -215,7 +254,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) /* undo! This should only happen if * the kernel detects local * deadlock. */ - ceph_lock_message(CEPH_LOCK_FCNTL, op, file, + ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, CEPH_LOCK_UNLOCK, 0, fl); dout("got %d on posix_lock_file, undid lock", err); @@ -227,8 +266,9 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) int ceph_flock(struct file *file, int cmd, struct file_lock *fl) { - u8 lock_cmd; + struct inode *inode = file_inode(file); int err; + u8 lock_cmd; u8 wait = 0; if (!(fl->fl_flags & FL_FLOCK)) @@ -239,6 +279,10 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) dout("ceph_flock, fl_file: %p", fl->fl_file); + /* see comment in ceph_lock */ + fl->fl_ops = &ceph_fl_lock_ops; + atomic_inc(&ceph_inode(inode)->i_filelock_ref); + if (IS_SETLKW(cmd)) wait = 1; @@ -250,13 +294,13 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) lock_cmd = CEPH_LOCK_UNLOCK; err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, - file, lock_cmd, wait, fl); + inode, lock_cmd, wait, fl); if (!err) { err = locks_lock_file_wait(file, fl); if (err) { ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, - file, CEPH_LOCK_UNLOCK, 0, fl); + inode, CEPH_LOCK_UNLOCK, 0, fl); dout("got %d on locks_lock_file_wait, undid lock", err); } } -- cgit From c6db84723363790160a89dee4554ad2f0687a0c5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 11 Sep 2017 09:58:56 +0800 Subject: ceph: make lock_to_ceph_filelock() static Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/locks.c | 62 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) (limited to 'fs/ceph/locks.c') diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 316d550b9603..2927f3bc2fc9 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -332,6 +332,37 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) *flock_count, *fcntl_count); } +/* + * Given a pointer to a lock, convert it to a ceph filelock + */ +static int lock_to_ceph_filelock(struct file_lock *lock, + struct ceph_filelock *cephlock) +{ + int err = 0; + cephlock->start = cpu_to_le64(lock->fl_start); + cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); + cephlock->client = cpu_to_le64(0); + cephlock->pid = cpu_to_le64((u64)lock->fl_pid); + cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); + + switch (lock->fl_type) { + case F_RDLCK: + cephlock->type = CEPH_LOCK_SHARED; + break; + case F_WRLCK: + cephlock->type = CEPH_LOCK_EXCL; + break; + case F_UNLCK: + cephlock->type = CEPH_LOCK_UNLOCK; + break; + default: + dout("Have unknown lock type %d", lock->fl_type); + err = -EINVAL; + } + + return err; +} + /** * Encode the flock and fcntl locks for the given inode into the ceph_filelock * array. Must be called with inode->i_lock already held. @@ -416,34 +447,3 @@ int ceph_locks_to_pagelist(struct ceph_filelock *flocks, out_fail: return err; } - -/* - * Given a pointer to a lock, convert it to a ceph filelock - */ -int lock_to_ceph_filelock(struct file_lock *lock, - struct ceph_filelock *cephlock) -{ - int err = 0; - cephlock->start = cpu_to_le64(lock->fl_start); - cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); - cephlock->client = cpu_to_le64(0); - cephlock->pid = cpu_to_le64((u64)lock->fl_pid); - cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); - - switch (lock->fl_type) { - case F_RDLCK: - cephlock->type = CEPH_LOCK_SHARED; - break; - case F_WRLCK: - cephlock->type = CEPH_LOCK_EXCL; - break; - case F_UNLCK: - cephlock->type = CEPH_LOCK_UNLOCK; - break; - default: - dout("Have unknown lock type %d", lock->fl_type); - err = -EINVAL; - } - - return err; -} -- cgit From 4deb14a2593dfade102dd94a803a63cf620cfd56 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 11 Sep 2017 10:36:28 +0800 Subject: ceph: optimize flock encoding during reconnect Don't malloc if there is no flock. Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/locks.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'fs/ceph/locks.c') diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 2927f3bc2fc9..aaea82076849 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -431,19 +431,22 @@ int ceph_locks_to_pagelist(struct ceph_filelock *flocks, if (err) goto out_fail; - err = ceph_pagelist_append(pagelist, flocks, - num_fcntl_locks * sizeof(*flocks)); - if (err) - goto out_fail; + if (num_fcntl_locks > 0) { + err = ceph_pagelist_append(pagelist, flocks, + num_fcntl_locks * sizeof(*flocks)); + if (err) + goto out_fail; + } nlocks = cpu_to_le32(num_flock_locks); err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); if (err) goto out_fail; - err = ceph_pagelist_append(pagelist, - &flocks[num_fcntl_locks], - num_flock_locks * sizeof(*flocks)); + if (num_flock_locks > 0) { + err = ceph_pagelist_append(pagelist, &flocks[num_fcntl_locks], + num_flock_locks * sizeof(*flocks)); + } out_fail: return err; } -- cgit From b3f8d68f38a879daed1eab66c0e19bc293096d34 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 11 Sep 2017 10:58:55 +0800 Subject: ceph: handle 'session get evicted while there are file locks' When session get evicted, all file locks associated with the session get released remotely by mds. File locks tracked by kernel become stale. In this situation, set an error flag on inode. The flag makes further file locks return -EIO. Another option to handle this situation is cleanup file locks tracked kernel. I do not choose it because it is inconvenient to notify user program about the error. Signed-off-by: "Yan, Zheng" Acked-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/locks.c | 52 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 12 deletions(-) (limited to 'fs/ceph/locks.c') diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index aaea82076849..9e66f69ee8a5 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -39,7 +39,13 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) static void ceph_fl_release_lock(struct file_lock *fl) { struct inode *inode = file_inode(fl->fl_file); - atomic_dec(&ceph_inode(inode)->i_filelock_ref); + struct ceph_inode_info *ci = ceph_inode(inode); + if (atomic_dec_and_test(&ci->i_filelock_ref)) { + /* clear error when all locks are released */ + spin_lock(&ci->i_ceph_lock); + ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK; + spin_unlock(&ci->i_ceph_lock); + } } static const struct file_lock_operations ceph_fl_lock_ops = { @@ -208,10 +214,11 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc, int ceph_lock(struct file *file, int cmd, struct file_lock *fl) { struct inode *inode = file_inode(file); - int err; + struct ceph_inode_info *ci = ceph_inode(inode); + int err = 0; u16 op = CEPH_MDS_OP_SETFILELOCK; - u8 lock_cmd; u8 wait = 0; + u8 lock_cmd; if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; @@ -227,7 +234,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) else if (IS_SETLKW(cmd)) wait = 1; - if (op == CEPH_MDS_OP_SETFILELOCK) { + spin_lock(&ci->i_ceph_lock); + if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { + err = -EIO; + } else if (op == CEPH_MDS_OP_SETFILELOCK) { /* * increasing i_filelock_ref closes race window between * handling request reply and adding file_lock struct to @@ -235,7 +245,13 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) * window. Caller function will decrease the counter. */ fl->fl_ops = &ceph_fl_lock_ops; - atomic_inc(&ceph_inode(inode)->i_filelock_ref); + atomic_inc(&ci->i_filelock_ref); + } + spin_unlock(&ci->i_ceph_lock); + if (err < 0) { + if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type) + posix_lock_file(file, fl, NULL); + return err; } if (F_RDLCK == fl->fl_type) @@ -247,10 +263,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl); if (!err) { - if (op != CEPH_MDS_OP_GETFILELOCK) { + if (op == CEPH_MDS_OP_SETFILELOCK) { dout("mds locked, locking locally"); err = posix_lock_file(file, fl, NULL); - if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { + if (err) { /* undo! This should only happen if * the kernel detects local * deadlock. */ @@ -267,9 +283,10 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) int ceph_flock(struct file *file, int cmd, struct file_lock *fl) { struct inode *inode = file_inode(file); - int err; - u8 lock_cmd; + struct ceph_inode_info *ci = ceph_inode(inode); + int err = 0; u8 wait = 0; + u8 lock_cmd; if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; @@ -279,9 +296,20 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) dout("ceph_flock, fl_file: %p", fl->fl_file); - /* see comment in ceph_lock */ - fl->fl_ops = &ceph_fl_lock_ops; - atomic_inc(&ceph_inode(inode)->i_filelock_ref); + spin_lock(&ci->i_ceph_lock); + if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { + err = -EIO; + } else { + /* see comment in ceph_lock */ + fl->fl_ops = &ceph_fl_lock_ops; + atomic_inc(&ci->i_filelock_ref); + } + spin_unlock(&ci->i_ceph_lock); + if (err < 0) { + if (F_UNLCK == fl->fl_type) + locks_lock_file_wait(file, fl); + return err; + } if (IS_SETLKW(cmd)) wait = 1; -- cgit