summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/file-item.c15
-rw-r--r--fs/btrfs/ordered-data.c3
-rw-r--r--fs/ceph/dir.c6
-rw-r--r--fs/ceph/inode.c16
-rw-r--r--fs/ceph/mds_client.c70
-rw-r--r--fs/ceph/snap.c7
-rw-r--r--fs/cifs/file.c15
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/misc.c23
-rw-r--r--fs/cifs/smb2pdu.c1
-rw-r--r--fs/io_uring.c42
-rw-r--r--fs/proc/proc_sysctl.c6
-rw-r--r--fs/splice.c4
13 files changed, 158 insertions, 54 deletions
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 920bf3b4b0ef..cccc75d15970 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -7,6 +7,7 @@
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -427,9 +428,13 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
unsigned long this_sum_bytes = 0;
int i;
u64 offset;
+ unsigned nofs_flag;
+
+ nofs_flag = memalloc_nofs_save();
+ sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
+ GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
- sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
- GFP_NOFS);
if (!sums)
return BLK_STS_RESOURCE;
@@ -472,8 +477,10 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
bytes_left = bio->bi_iter.bi_size - total_bytes;
- sums = kzalloc(btrfs_ordered_sum_size(fs_info, bytes_left),
- GFP_NOFS);
+ nofs_flag = memalloc_nofs_save();
+ sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
+ bytes_left), GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
BUG_ON(!sums); /* -ENOMEM */
sums->len = bytes_left;
ordered = btrfs_lookup_ordered_extent(inode,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 6fde2b2741ef..45e3cfd1198b 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -6,6 +6,7 @@
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/writeback.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "transaction.h"
#include "btrfs_inode.h"
@@ -442,7 +443,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
cur = entry->list.next;
sum = list_entry(cur, struct btrfs_ordered_sum, list);
list_del(&sum->list);
- kfree(sum);
+ kvfree(sum);
}
kmem_cache_free(btrfs_ordered_extent_cache, entry);
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index a8f429882249..0637149fb9f9 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1766,6 +1766,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
{
struct ceph_inode_info *dci = ceph_inode(dir);
+ unsigned hash;
switch (dci->i_dir_layout.dl_dir_hash) {
case 0: /* for backward compat */
@@ -1773,8 +1774,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
return dn->d_name.hash;
default:
- return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
+ spin_lock(&dn->d_lock);
+ hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
dn->d_name.name, dn->d_name.len);
+ spin_unlock(&dn->d_lock);
+ return hash;
}
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2d61ddda9bf5..c2feb310ac1e 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1163,6 +1163,19 @@ static int splice_dentry(struct dentry **pdn, struct inode *in)
return 0;
}
+static int d_name_cmp(struct dentry *dentry, const char *name, size_t len)
+{
+ int ret;
+
+ /* take d_lock to ensure dentry->d_name stability */
+ spin_lock(&dentry->d_lock);
+ ret = dentry->d_name.len - len;
+ if (!ret)
+ ret = memcmp(dentry->d_name.name, name, len);
+ spin_unlock(&dentry->d_lock);
+ return ret;
+}
+
/*
* Incorporate results into the local cache. This is either just
* one inode, or a directory, dentry, and possibly linked-to inode (e.g.,
@@ -1412,7 +1425,8 @@ retry_lookup:
err = splice_dentry(&req->r_dentry, in);
if (err < 0)
goto done;
- } else if (rinfo->head->is_dentry) {
+ } else if (rinfo->head->is_dentry &&
+ !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) {
struct ceph_vino *ptvino = NULL;
if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 21c33ed048ed..9049c2a3e972 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1414,6 +1414,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
ci->i_prealloc_cap_flush = NULL;
}
+
+ if (drop &&
+ ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+ ci->i_dirty_caps == 0 &&
+ ci->i_flushing_caps == 0) {
+ ceph_put_snap_context(ci->i_head_snapc);
+ ci->i_head_snapc = NULL;
+ }
}
spin_unlock(&ci->i_ceph_lock);
while (!list_empty(&to_remove)) {
@@ -2161,10 +2170,39 @@ retry:
return path;
}
+/* Duplicate the dentry->d_name.name safely */
+static int clone_dentry_name(struct dentry *dentry, const char **ppath,
+ int *ppathlen)
+{
+ u32 len;
+ char *name;
+
+retry:
+ len = READ_ONCE(dentry->d_name.len);
+ name = kmalloc(len + 1, GFP_NOFS);
+ if (!name)
+ return -ENOMEM;
+
+ spin_lock(&dentry->d_lock);
+ if (dentry->d_name.len != len) {
+ spin_unlock(&dentry->d_lock);
+ kfree(name);
+ goto retry;
+ }
+ memcpy(name, dentry->d_name.name, len);
+ spin_unlock(&dentry->d_lock);
+
+ name[len] = '\0';
+ *ppath = name;
+ *ppathlen = len;
+ return 0;
+}
+
static int build_dentry_path(struct dentry *dentry, struct inode *dir,
const char **ppath, int *ppathlen, u64 *pino,
- int *pfreepath)
+ bool *pfreepath, bool parent_locked)
{
+ int ret;
char *path;
rcu_read_lock();
@@ -2173,8 +2211,15 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
*pino = ceph_ino(dir);
rcu_read_unlock();
- *ppath = dentry->d_name.name;
- *ppathlen = dentry->d_name.len;
+ if (parent_locked) {
+ *ppath = dentry->d_name.name;
+ *ppathlen = dentry->d_name.len;
+ } else {
+ ret = clone_dentry_name(dentry, ppath, ppathlen);
+ if (ret)
+ return ret;
+ *pfreepath = true;
+ }
return 0;
}
rcu_read_unlock();
@@ -2182,13 +2227,13 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
if (IS_ERR(path))
return PTR_ERR(path);
*ppath = path;
- *pfreepath = 1;
+ *pfreepath = true;
return 0;
}
static int build_inode_path(struct inode *inode,
const char **ppath, int *ppathlen, u64 *pino,
- int *pfreepath)
+ bool *pfreepath)
{
struct dentry *dentry;
char *path;
@@ -2204,7 +2249,7 @@ static int build_inode_path(struct inode *inode,
if (IS_ERR(path))
return PTR_ERR(path);
*ppath = path;
- *pfreepath = 1;
+ *pfreepath = true;
return 0;
}
@@ -2215,7 +2260,7 @@ static int build_inode_path(struct inode *inode,
static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
struct inode *rdiri, const char *rpath,
u64 rino, const char **ppath, int *pathlen,
- u64 *ino, int *freepath)
+ u64 *ino, bool *freepath, bool parent_locked)
{
int r = 0;
@@ -2225,7 +2270,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
ceph_snap(rinode));
} else if (rdentry) {
r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
- freepath);
+ freepath, parent_locked);
dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
*ppath);
} else if (rpath || rino) {
@@ -2251,7 +2296,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
const char *path2 = NULL;
u64 ino1 = 0, ino2 = 0;
int pathlen1 = 0, pathlen2 = 0;
- int freepath1 = 0, freepath2 = 0;
+ bool freepath1 = false, freepath2 = false;
int len;
u16 releases;
void *p, *end;
@@ -2259,16 +2304,19 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ret = set_request_path_attr(req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino,
- &path1, &pathlen1, &ino1, &freepath1);
+ &path1, &pathlen1, &ino1, &freepath1,
+ test_bit(CEPH_MDS_R_PARENT_LOCKED,
+ &req->r_req_flags));
if (ret < 0) {
msg = ERR_PTR(ret);
goto out;
}
+ /* If r_old_dentry is set, then assume that its parent is locked */
ret = set_request_path_attr(NULL, req->r_old_dentry,
req->r_old_dentry_dir,
req->r_path2, req->r_ino2.ino,
- &path2, &pathlen2, &ino2, &freepath2);
+ &path2, &pathlen2, &ino2, &freepath2, true);
if (ret < 0) {
msg = ERR_PTR(ret);
goto out_free1;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 89aa37fa0f84..b26e12cd8ec3 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -572,7 +572,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
old_snapc = NULL;
update_snapc:
- if (ci->i_head_snapc) {
+ if (ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+ ci->i_dirty_caps == 0 &&
+ ci->i_flushing_caps == 0) {
+ ci->i_head_snapc = NULL;
+ } else {
ci->i_head_snapc = ceph_get_snap_context(new_snapc);
dout(" new snapc is %p\n", new_snapc);
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9c0ccc06d172..7037a137fa53 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2877,7 +2877,6 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
struct cifs_tcon *tcon;
struct cifs_sb_info *cifs_sb;
struct dentry *dentry = ctx->cfile->dentry;
- unsigned int i;
int rc;
tcon = tlink_tcon(ctx->cfile->tlink);
@@ -2941,10 +2940,6 @@ restart_loop:
kref_put(&wdata->refcount, cifs_uncached_writedata_release);
}
- if (!ctx->direct_io)
- for (i = 0; i < ctx->npages; i++)
- put_page(ctx->bv[i].bv_page);
-
cifs_stats_bytes_written(tcon, ctx->total_len);
set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
@@ -3582,7 +3577,6 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx)
struct iov_iter *to = &ctx->iter;
struct cifs_sb_info *cifs_sb;
struct cifs_tcon *tcon;
- unsigned int i;
int rc;
tcon = tlink_tcon(ctx->cfile->tlink);
@@ -3666,15 +3660,8 @@ again:
kref_put(&rdata->refcount, cifs_uncached_readdata_release);
}
- if (!ctx->direct_io) {
- for (i = 0; i < ctx->npages; i++) {
- if (ctx->should_dirty)
- set_page_dirty(ctx->bv[i].bv_page);
- put_page(ctx->bv[i].bv_page);
- }
-
+ if (!ctx->direct_io)
ctx->total_len = ctx->len - iov_iter_count(to);
- }
/* mask nodata case */
if (rc == -ENODATA)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 53fdb5df0d2e..538fd7d807e4 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1735,6 +1735,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
if (rc == 0 || rc != -EBUSY)
goto do_rename_exit;
+ /* Don't fall back to using SMB on SMB 2+ mount */
+ if (server->vals->protocol_id != 0)
+ goto do_rename_exit;
+
/* open-file renames don't work across directories */
if (to_dentry->d_parent != from_dentry->d_parent)
goto do_rename_exit;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1e1626a2cfc3..0dc6f08020ac 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -789,6 +789,11 @@ cifs_aio_ctx_alloc(void)
{
struct cifs_aio_ctx *ctx;
+ /*
+ * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io
+ * to false so that we know when we have to unreference pages within
+ * cifs_aio_ctx_release()
+ */
ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL);
if (!ctx)
return NULL;
@@ -807,7 +812,23 @@ cifs_aio_ctx_release(struct kref *refcount)
struct cifs_aio_ctx, refcount);
cifsFileInfo_put(ctx->cfile);
- kvfree(ctx->bv);
+
+ /*
+ * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly
+ * which means that iov_iter_get_pages() was a success and thus that
+ * we have taken reference on pages.
+ */
+ if (ctx->bv) {
+ unsigned i;
+
+ for (i = 0; i < ctx->npages; i++) {
+ if (ctx->should_dirty)
+ set_page_dirty(ctx->bv[i].bv_page);
+ put_page(ctx->bv[i].bv_page);
+ }
+ kvfree(ctx->bv);
+ }
+
kfree(ctx);
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index b8f7262ac354..a37774a55f3a 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3466,6 +3466,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
io_parms->tcon->tid, ses->Suid,
io_parms->offset, 0);
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+ cifs_small_buf_release(req);
return rc == -ENODATA ? 0 : rc;
} else
trace_smb3_read_done(xid, req->PersistentFileId,
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f65f85d89217..0e9fb2cb1984 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -740,7 +740,7 @@ static bool io_file_supports_async(struct file *file)
}
static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock, struct io_submit_state *state)
+ bool force_nonblock)
{
const struct io_uring_sqe *sqe = s->sqe;
struct io_ring_ctx *ctx = req->ctx;
@@ -938,7 +938,7 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
}
static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock, struct io_submit_state *state)
+ bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw;
@@ -947,7 +947,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
size_t iov_count;
int ret;
- ret = io_prep_rw(req, s, force_nonblock, state);
+ ret = io_prep_rw(req, s, force_nonblock);
if (ret)
return ret;
file = kiocb->ki_filp;
@@ -985,7 +985,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
}
static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock, struct io_submit_state *state)
+ bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw;
@@ -994,7 +994,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
size_t iov_count;
int ret;
- ret = io_prep_rw(req, s, force_nonblock, state);
+ ret = io_prep_rw(req, s, force_nonblock);
if (ret)
return ret;
@@ -1336,8 +1336,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
}
static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct sqe_submit *s, bool force_nonblock,
- struct io_submit_state *state)
+ const struct sqe_submit *s, bool force_nonblock)
{
int ret, opcode;
@@ -1353,18 +1352,18 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
case IORING_OP_READV:
if (unlikely(s->sqe->buf_index))
return -EINVAL;
- ret = io_read(req, s, force_nonblock, state);
+ ret = io_read(req, s, force_nonblock);
break;
case IORING_OP_WRITEV:
if (unlikely(s->sqe->buf_index))
return -EINVAL;
- ret = io_write(req, s, force_nonblock, state);
+ ret = io_write(req, s, force_nonblock);
break;
case IORING_OP_READ_FIXED:
- ret = io_read(req, s, force_nonblock, state);
+ ret = io_read(req, s, force_nonblock);
break;
case IORING_OP_WRITE_FIXED:
- ret = io_write(req, s, force_nonblock, state);
+ ret = io_write(req, s, force_nonblock);
break;
case IORING_OP_FSYNC:
ret = io_fsync(req, s->sqe, force_nonblock);
@@ -1457,7 +1456,7 @@ restart:
s->has_user = cur_mm != NULL;
s->needs_lock = true;
do {
- ret = __io_submit_sqe(ctx, req, s, false, NULL);
+ ret = __io_submit_sqe(ctx, req, s, false);
/*
* We can get EAGAIN for polled IO even though
* we're forcing a sync submission from here,
@@ -1623,7 +1622,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
if (unlikely(ret))
goto out;
- ret = __io_submit_sqe(ctx, req, s, true, state);
+ ret = __io_submit_sqe(ctx, req, s, true);
if (ret == -EAGAIN) {
struct io_uring_sqe *sqe_copy;
@@ -1739,7 +1738,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
head = ctx->cached_sq_head;
/* See comment at the top of this file */
smp_rmb();
- if (head == READ_ONCE(ring->r.tail))
+ /* make sure SQ entry isn't read before tail */
+ if (head == smp_load_acquire(&ring->r.tail))
return false;
head = READ_ONCE(ring->array[head & ctx->sq_mask]);
@@ -1864,7 +1864,8 @@ static int io_sq_thread(void *data)
/* Tell userspace we may need a wakeup call */
ctx->sq_ring->flags |= IORING_SQ_NEED_WAKEUP;
- smp_wmb();
+ /* make sure to read SQ tail after writing flags */
+ smp_mb();
if (!io_get_sqring(ctx, &sqes[0])) {
if (kthread_should_stop()) {
@@ -2574,7 +2575,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
poll_wait(file, &ctx->cq_wait, wait);
/* See comment at the top of this file */
smp_rmb();
- if (READ_ONCE(ctx->sq_ring->r.tail) + 1 != ctx->cached_sq_head)
+ if (READ_ONCE(ctx->sq_ring->r.tail) - ctx->cached_sq_head !=
+ ctx->sq_ring->ring_entries)
mask |= EPOLLOUT | EPOLLWRNORM;
if (READ_ONCE(ctx->cq_ring->r.head) != ctx->cached_cq_tail)
mask |= EPOLLIN | EPOLLRDNORM;
@@ -2934,6 +2936,14 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
{
int ret;
+ /*
+ * We're inside the ring mutex, if the ref is already dying, then
+ * someone else killed the ctx or is already going through
+ * io_uring_register().
+ */
+ if (percpu_ref_is_dying(&ctx->refs))
+ return -ENXIO;
+
percpu_ref_kill(&ctx->refs);
/*
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d65390727541..7325baa8f9d4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1626,9 +1626,11 @@ static void drop_sysctl_table(struct ctl_table_header *header)
if (--header->nreg)
return;
- if (parent)
+ if (parent) {
put_links(header);
- start_unregistering(header);
+ start_unregistering(header);
+ }
+
if (!--header->count)
kfree_rcu(header, rcu);
diff --git a/fs/splice.c b/fs/splice.c
index 98943d9b219c..25212dcca2df 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -330,8 +330,8 @@ const struct pipe_buf_operations default_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
-static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
+int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
{
return 1;
}