summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/dev-replace.c5
-rw-r--r--fs/btrfs/scrub.c33
-rw-r--r--fs/ceph/mds_client.c8
-rw-r--r--fs/io_uring.c4
-rw-r--r--fs/readdir.c79
-rw-r--r--fs/reiserfs/xattr.c8
6 files changed, 84 insertions, 53 deletions
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index f639dde2a679..ba4d8f375b3c 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -500,11 +500,8 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
&dev_replace->scrub_progress, 0, 1);
ret = btrfs_dev_replace_finishing(fs_info, ret);
- if (ret == -EINPROGRESS) {
+ if (ret == -EINPROGRESS)
ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS;
- } else if (ret != -ECANCELED) {
- WARN_ON(ret);
- }
return ret;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 21de630b0730..fd266a2d15ec 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3577,17 +3577,27 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
* This can easily boost the amount of SYSTEM chunks if cleaner
* thread can't be triggered fast enough, and use up all space
* of btrfs_super_block::sys_chunk_array
+ *
+ * While for dev replace, we need to try our best to mark block
+ * group RO, to prevent race between:
+ * - Write duplication
+ * Contains latest data
+ * - Scrub copy
+ * Contains data from commit tree
+ *
+ * If target block group is not marked RO, nocow writes can
+ * be overwritten by scrub copy, causing data corruption.
+ * So for dev-replace, it's not allowed to continue if a block
+ * group is not RO.
*/
- ret = btrfs_inc_block_group_ro(cache, false);
- scrub_pause_off(fs_info);
-
+ ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace);
if (ret == 0) {
ro_set = 1;
- } else if (ret == -ENOSPC) {
+ } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
/*
* btrfs_inc_block_group_ro return -ENOSPC when it
* failed in creating new chunk for metadata.
- * It is not a problem for scrub/replace, because
+ * It is not a problem for scrub, because
* metadata are always cowed, and our scrub paused
* commit_transactions.
*/
@@ -3596,9 +3606,22 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
btrfs_warn(fs_info,
"failed setting block group ro: %d", ret);
btrfs_put_block_group(cache);
+ scrub_pause_off(fs_info);
break;
}
+ /*
+ * Now the target block is marked RO, wait for nocow writes to
+ * finish before dev-replace.
+ * COW is fine, as COW never overwrites extents in commit tree.
+ */
+ if (sctx->is_dev_replace) {
+ btrfs_wait_nocow_writers(cache);
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start,
+ cache->length);
+ }
+
+ scrub_pause_off(fs_info);
down_write(&dev_replace->rwsem);
dev_replace->cursor_right = found_key.offset + length;
dev_replace->cursor_left = found_key.offset;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 374db1bd57d1..145d46ba25ae 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -708,8 +708,10 @@ void ceph_mdsc_release_request(struct kref *kref)
/* avoid calling iput_final() in mds dispatch threads */
ceph_async_iput(req->r_inode);
}
- if (req->r_parent)
+ if (req->r_parent) {
ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
+ ceph_async_iput(req->r_parent);
+ }
ceph_async_iput(req->r_target_inode);
if (req->r_dentry)
dput(req->r_dentry);
@@ -2676,8 +2678,10 @@ int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir,
/* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */
if (req->r_inode)
ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
- if (req->r_parent)
+ if (req->r_parent) {
ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
+ ihold(req->r_parent);
+ }
if (req->r_old_dentry_dir)
ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
CEPH_CAP_PIN);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 187dd94fd6b1..5953d7f13690 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4463,13 +4463,15 @@ static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
return -EINVAL;
if (copy_from_user(&up, arg, sizeof(up)))
return -EFAULT;
+ if (up.resv)
+ return -EINVAL;
if (check_add_overflow(up.offset, nr_args, &done))
return -EOVERFLOW;
if (done > ctx->nr_user_files)
return -EINVAL;
done = 0;
- fds = (__s32 __user *) up.fds;
+ fds = u64_to_user_ptr(up.fds);
while (nr_args) {
struct fixed_file_table *table;
unsigned index;
diff --git a/fs/readdir.c b/fs/readdir.c
index d26d5ea4de7b..de2eceffdee8 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -102,10 +102,14 @@ EXPORT_SYMBOL(iterate_dir);
* filename length, and the above "soft error" worry means
* that it's probably better left alone until we have that
* issue clarified.
+ *
+ * Note the PATH_MAX check - it's arbitrary but the real
+ * kernel limit on a possible path component, not NAME_MAX,
+ * which is the technical standard limit.
*/
static int verify_dirent_name(const char *name, int len)
{
- if (!len)
+ if (len <= 0 || len >= PATH_MAX)
return -EIO;
if (memchr(name, '/', len))
return -EIO;
@@ -206,7 +210,7 @@ struct linux_dirent {
struct getdents_callback {
struct dir_context ctx;
struct linux_dirent __user * current_dir;
- struct linux_dirent __user * previous;
+ int prev_reclen;
int count;
int error;
};
@@ -214,12 +218,13 @@ struct getdents_callback {
static int filldir(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
- struct linux_dirent __user * dirent;
+ struct linux_dirent __user *dirent, *prev;
struct getdents_callback *buf =
container_of(ctx, struct getdents_callback, ctx);
unsigned long d_ino;
int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
sizeof(long));
+ int prev_reclen;
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
@@ -232,28 +237,24 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen,
buf->error = -EOVERFLOW;
return -EOVERFLOW;
}
- dirent = buf->previous;
- if (dirent && signal_pending(current))
+ prev_reclen = buf->prev_reclen;
+ if (prev_reclen && signal_pending(current))
return -EINTR;
-
- /*
- * Note! This range-checks 'previous' (which may be NULL).
- * The real range was checked in getdents
- */
- if (!user_access_begin(dirent, sizeof(*dirent)))
- goto efault;
- if (dirent)
- unsafe_put_user(offset, &dirent->d_off, efault_end);
dirent = buf->current_dir;
+ prev = (void __user *) dirent - prev_reclen;
+ if (!user_access_begin(prev, reclen + prev_reclen))
+ goto efault;
+
+ /* This might be 'dirent->d_off', but if so it will get overwritten */
+ unsafe_put_user(offset, &prev->d_off, efault_end);
unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
user_access_end();
- buf->previous = dirent;
- dirent = (void __user *)dirent + reclen;
- buf->current_dir = dirent;
+ buf->current_dir = (void __user *)dirent + reclen;
+ buf->prev_reclen = reclen;
buf->count -= reclen;
return 0;
efault_end:
@@ -267,7 +268,6 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
struct linux_dirent __user *, dirent, unsigned int, count)
{
struct fd f;
- struct linux_dirent __user * lastdirent;
struct getdents_callback buf = {
.ctx.actor = filldir,
.count = count,
@@ -285,8 +285,10 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
error = iterate_dir(f.file, &buf.ctx);
if (error >= 0)
error = buf.error;
- lastdirent = buf.previous;
- if (lastdirent) {
+ if (buf.prev_reclen) {
+ struct linux_dirent __user * lastdirent;
+ lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
+
if (put_user(buf.ctx.pos, &lastdirent->d_off))
error = -EFAULT;
else
@@ -299,7 +301,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
struct getdents_callback64 {
struct dir_context ctx;
struct linux_dirent64 __user * current_dir;
- struct linux_dirent64 __user * previous;
+ int prev_reclen;
int count;
int error;
};
@@ -307,11 +309,12 @@ struct getdents_callback64 {
static int filldir64(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
- struct linux_dirent64 __user *dirent;
+ struct linux_dirent64 __user *dirent, *prev;
struct getdents_callback64 *buf =
container_of(ctx, struct getdents_callback64, ctx);
int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
sizeof(u64));
+ int prev_reclen;
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
@@ -319,30 +322,27 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen,
buf->error = -EINVAL; /* only used if we fail.. */
if (reclen > buf->count)
return -EINVAL;
- dirent = buf->previous;
- if (dirent && signal_pending(current))
+ prev_reclen = buf->prev_reclen;
+ if (prev_reclen && signal_pending(current))
return -EINTR;
-
- /*
- * Note! This range-checks 'previous' (which may be NULL).
- * The real range was checked in getdents
- */
- if (!user_access_begin(dirent, sizeof(*dirent)))
- goto efault;
- if (dirent)
- unsafe_put_user(offset, &dirent->d_off, efault_end);
dirent = buf->current_dir;
+ prev = (void __user *)dirent - prev_reclen;
+ if (!user_access_begin(prev, reclen + prev_reclen))
+ goto efault;
+
+ /* This might be 'dirent->d_off', but if so it will get overwritten */
+ unsafe_put_user(offset, &prev->d_off, efault_end);
unsafe_put_user(ino, &dirent->d_ino, efault_end);
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
unsafe_put_user(d_type, &dirent->d_type, efault_end);
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
user_access_end();
- buf->previous = dirent;
- dirent = (void __user *)dirent + reclen;
- buf->current_dir = dirent;
+ buf->prev_reclen = reclen;
+ buf->current_dir = (void __user *)dirent + reclen;
buf->count -= reclen;
return 0;
+
efault_end:
user_access_end();
efault:
@@ -354,7 +354,6 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
unsigned int count)
{
struct fd f;
- struct linux_dirent64 __user * lastdirent;
struct getdents_callback64 buf = {
.ctx.actor = filldir64,
.count = count,
@@ -372,9 +371,11 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
error = iterate_dir(f.file, &buf.ctx);
if (error >= 0)
error = buf.error;
- lastdirent = buf.previous;
- if (lastdirent) {
+ if (buf.prev_reclen) {
+ struct linux_dirent64 __user * lastdirent;
typeof(lastdirent->d_off) d_off = buf.ctx.pos;
+
+ lastdirent = (void __user *) buf.current_dir - buf.prev_reclen;
if (__put_user(d_off, &lastdirent->d_off))
error = -EFAULT;
else
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 62b40df36c98..28b241cd6987 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -319,8 +319,12 @@ static int reiserfs_for_each_xattr(struct inode *inode,
out_dir:
dput(dir);
out:
- /* -ENODATA isn't an error */
- if (err == -ENODATA)
+ /*
+ * -ENODATA: this object doesn't have any xattrs
+ * -EOPNOTSUPP: this file system doesn't have xattrs enabled on disk.
+ * Neither are errors
+ */
+ if (err == -ENODATA || err == -EOPNOTSUPP)
err = 0;
return err;
}