summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/inode.c47
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/write.c11
-rw-r--r--fs/autofs/waitq.c2
-rw-r--r--fs/btrfs/dev-replace.c46
-rw-r--r--fs/btrfs/disk-io.c11
-rw-r--r--fs/btrfs/sysfs.c16
-rw-r--r--fs/btrfs/volumes.c13
-rw-r--r--fs/btrfs/volumes.h3
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/eventpoll.c72
-rw-r--r--fs/exfat/cache.c11
-rw-r--r--fs/exfat/exfat_fs.h3
-rw-r--r--fs/exfat/inode.c2
-rw-r--r--fs/exfat/namei.c13
-rw-r--r--fs/exfat/super.c5
-rw-r--r--fs/fuse/file.c25
-rw-r--r--fs/io_uring.c86
-rw-r--r--fs/nfs/dir.c3
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c43
-rw-r--r--fs/nfs/nfs42proc.c10
-rw-r--r--fs/pipe.c62
-rw-r--r--fs/read_write.c8
-rw-r--r--fs/splice.c28
-rw-r--r--fs/vboxsf/super.c2
25 files changed, 345 insertions, 180 deletions
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1d13d2e882ad..0fe8844b4bee 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -810,14 +810,32 @@ void afs_evict_inode(struct inode *inode)
static void afs_setattr_success(struct afs_operation *op)
{
- struct inode *inode = &op->file[0].vnode->vfs_inode;
+ struct afs_vnode_param *vp = &op->file[0];
+ struct inode *inode = &vp->vnode->vfs_inode;
+ loff_t old_i_size = i_size_read(inode);
+
+ op->setattr.old_i_size = old_i_size;
+ afs_vnode_commit_status(op, vp);
+ /* inode->i_size has now been changed. */
+
+ if (op->setattr.attr->ia_valid & ATTR_SIZE) {
+ loff_t size = op->setattr.attr->ia_size;
+ if (size > old_i_size)
+ pagecache_isize_extended(inode, old_i_size, size);
+ }
+}
+
+static void afs_setattr_edit_file(struct afs_operation *op)
+{
+ struct afs_vnode_param *vp = &op->file[0];
+ struct inode *inode = &vp->vnode->vfs_inode;
- afs_vnode_commit_status(op, &op->file[0]);
if (op->setattr.attr->ia_valid & ATTR_SIZE) {
- loff_t i_size = inode->i_size, size = op->setattr.attr->ia_size;
- if (size > i_size)
- pagecache_isize_extended(inode, i_size, size);
- truncate_pagecache(inode, size);
+ loff_t size = op->setattr.attr->ia_size;
+ loff_t i_size = op->setattr.old_i_size;
+
+ if (size < i_size)
+ truncate_pagecache(inode, size);
}
}
@@ -825,6 +843,7 @@ static const struct afs_operation_ops afs_setattr_operation = {
.issue_afs_rpc = afs_fs_setattr,
.issue_yfs_rpc = yfs_fs_setattr,
.success = afs_setattr_success,
+ .edit_dir = afs_setattr_edit_file,
};
/*
@@ -863,11 +882,16 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
if (S_ISREG(vnode->vfs_inode.i_mode))
filemap_write_and_wait(vnode->vfs_inode.i_mapping);
+ /* Prevent any new writebacks from starting whilst we do this. */
+ down_write(&vnode->validate_lock);
+
op = afs_alloc_operation(((attr->ia_valid & ATTR_FILE) ?
afs_file_key(attr->ia_file) : NULL),
vnode->volume);
- if (IS_ERR(op))
- return PTR_ERR(op);
+ if (IS_ERR(op)) {
+ ret = PTR_ERR(op);
+ goto out_unlock;
+ }
afs_op_set_vnode(op, 0, vnode);
op->setattr.attr = attr;
@@ -880,5 +904,10 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
op->file[0].update_ctime = 1;
op->ops = &afs_setattr_operation;
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+
+out_unlock:
+ up_write(&vnode->validate_lock);
+ _leave(" = %d", ret);
+ return ret;
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 18042b7dab6a..e5f0446f27e5 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -812,6 +812,7 @@ struct afs_operation {
} store;
struct {
struct iattr *attr;
+ loff_t old_i_size;
} setattr;
struct afs_acl *acl;
struct yfs_acl *yacl;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 4b2265cb1891..da12abd6db21 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -738,11 +738,21 @@ static int afs_writepages_region(struct address_space *mapping,
int afs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
+ struct afs_vnode *vnode = AFS_FS_I(mapping->host);
pgoff_t start, end, next;
int ret;
_enter("");
+ /* We have to be careful as we can end up racing with setattr()
+ * truncating the pagecache since the caller doesn't take a lock here
+ * to prevent it.
+ */
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ down_read(&vnode->validate_lock);
+ else if (!down_read_trylock(&vnode->validate_lock))
+ return 0;
+
if (wbc->range_cyclic) {
start = mapping->writeback_index;
end = -1;
@@ -762,6 +772,7 @@ int afs_writepages(struct address_space *mapping,
ret = afs_writepages_region(mapping, wbc, start, end, &next);
}
+ up_read(&vnode->validate_lock);
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index 74c886f7c51c..5ced859dac53 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -53,7 +53,7 @@ static int autofs_write(struct autofs_sb_info *sbi,
mutex_lock(&sbi->pipe_mutex);
while (bytes) {
- wr = kernel_write(file, data, bytes, &file->f_pos);
+ wr = __kernel_write(file, data, bytes, NULL);
if (wr <= 0)
break;
data += wr;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index db93909b25e0..e4a1c6afe35d 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -599,6 +599,37 @@ static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info)
wake_up(&fs_info->dev_replace.replace_wait);
}
+/*
+ * When finishing the device replace, before swapping the source device with the
+ * target device we must update the chunk allocation state in the target device,
+ * as it is empty because replace works by directly copying the chunks and not
+ * through the normal chunk allocation path.
+ */
+static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev,
+ struct btrfs_device *tgtdev)
+{
+ struct extent_state *cached_state = NULL;
+ u64 start = 0;
+ u64 found_start;
+ u64 found_end;
+ int ret = 0;
+
+ lockdep_assert_held(&srcdev->fs_info->chunk_mutex);
+
+ while (!find_first_extent_bit(&srcdev->alloc_state, start,
+ &found_start, &found_end,
+ CHUNK_ALLOCATED, &cached_state)) {
+ ret = set_extent_bits(&tgtdev->alloc_state, found_start,
+ found_end, CHUNK_ALLOCATED);
+ if (ret)
+ break;
+ start = found_end + 1;
+ }
+
+ free_extent_state(cached_state);
+ return ret;
+}
+
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
int scrub_ret)
{
@@ -673,8 +704,14 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
dev_replace->time_stopped = ktime_get_real_seconds();
dev_replace->item_needs_writeback = 1;
- /* replace old device with new one in mapping tree */
+ /*
+ * Update allocation state in the new device and replace the old device
+ * with the new one in the mapping tree.
+ */
if (!scrub_ret) {
+ scrub_ret = btrfs_set_target_alloc_state(src_device, tgt_device);
+ if (scrub_ret)
+ goto error;
btrfs_dev_replace_update_device_in_mapping_tree(fs_info,
src_device,
tgt_device);
@@ -685,6 +722,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
btrfs_dev_name(src_device),
src_device->devid,
rcu_str_deref(tgt_device->name), scrub_ret);
+error:
up_write(&dev_replace->rwsem);
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
@@ -745,7 +783,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* replace the sysfs entry */
btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, src_device);
btrfs_sysfs_update_devid(tgt_device);
- btrfs_rm_dev_replace_free_srcdev(src_device);
+ if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &src_device->dev_state))
+ btrfs_scratch_superblocks(fs_info, src_device->bdev,
+ src_device->name->str);
/* write back the superblocks */
trans = btrfs_start_transaction(root, 0);
@@ -754,6 +794,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
+ btrfs_rm_dev_replace_free_srcdev(src_device);
+
return 0;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index abf86b202b43..9f72b092bc22 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -636,16 +636,15 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
csum_tree_block(eb, result);
if (memcmp_extent_buffer(eb, result, 0, csum_size)) {
- u32 val;
- u32 found = 0;
-
- memcpy(&found, result, csum_size);
+ u8 val[BTRFS_CSUM_SIZE] = { 0 };
read_extent_buffer(eb, &val, 0, csum_size);
btrfs_warn_rl(fs_info,
- "%s checksum verify failed on %llu wanted %x found %x level %d",
+ "%s checksum verify failed on %llu wanted " CSUM_FMT " found " CSUM_FMT " level %d",
fs_info->sb->s_id, eb->start,
- val, found, btrfs_header_level(eb));
+ CSUM_FMT_VALUE(csum_size, val),
+ CSUM_FMT_VALUE(csum_size, result),
+ btrfs_header_level(eb));
ret = -EUCLEAN;
goto err;
}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c8df2edafd85..5be30066563c 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1170,10 +1170,12 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
disk_kobj->name);
}
- kobject_del(&one_device->devid_kobj);
- kobject_put(&one_device->devid_kobj);
+ if (one_device->devid_kobj.state_initialized) {
+ kobject_del(&one_device->devid_kobj);
+ kobject_put(&one_device->devid_kobj);
- wait_for_completion(&one_device->kobj_unregister);
+ wait_for_completion(&one_device->kobj_unregister);
+ }
return 0;
}
@@ -1186,10 +1188,12 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
sysfs_remove_link(fs_devices->devices_kobj,
disk_kobj->name);
}
- kobject_del(&one_device->devid_kobj);
- kobject_put(&one_device->devid_kobj);
+ if (one_device->devid_kobj.state_initialized) {
+ kobject_del(&one_device->devid_kobj);
+ kobject_put(&one_device->devid_kobj);
- wait_for_completion(&one_device->kobj_unregister);
+ wait_for_completion(&one_device->kobj_unregister);
+ }
}
return 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 117b43367629..1997a7d67f22 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1999,9 +1999,9 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
return num_devices;
}
-static void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
- struct block_device *bdev,
- const char *device_path)
+void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
+ struct block_device *bdev,
+ const char *device_path)
{
struct btrfs_super_block *disk_super;
int copy_num;
@@ -2224,11 +2224,7 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
struct btrfs_fs_info *fs_info = srcdev->fs_info;
struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
- if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) {
- /* zero out the old super if it is writable */
- btrfs_scratch_superblocks(fs_info, srcdev->bdev,
- srcdev->name->str);
- }
+ mutex_lock(&uuid_mutex);
btrfs_close_bdev(srcdev);
synchronize_rcu();
@@ -2258,6 +2254,7 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
close_fs_devices(fs_devices);
free_fs_devices(fs_devices);
}
+ mutex_unlock(&uuid_mutex);
}
void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5eea93916fbf..302c9234f7d0 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -573,6 +573,9 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
struct btrfs_device *failing_dev);
+void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
+ struct block_device *bdev,
+ const char *device_path);
int btrfs_bg_type_to_factor(u64 flags);
const char *btrfs_bg_type_to_raid_name(u64 flags);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 32f90dc82c84..d44df8f95bcd 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1208,7 +1208,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
rqst[1].rq_iov = si_iov;
rqst[1].rq_nvec = 1;
- len = sizeof(ea) + ea_name_len + ea_value_len + 1;
+ len = sizeof(*ea) + ea_name_len + ea_value_len + 1;
ea = kzalloc(len, GFP_KERNEL);
if (ea == NULL) {
rc = -ENOMEM;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8107e06d7f6f..4df61129566d 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -218,8 +218,7 @@ struct eventpoll {
struct file *file;
/* used to optimize loop detection check */
- struct list_head visited_list_link;
- int visited;
+ u64 gen;
#ifdef CONFIG_NET_RX_BUSY_POLL
/* used to track busy poll napi_id */
@@ -274,6 +273,8 @@ static long max_user_watches __read_mostly;
*/
static DEFINE_MUTEX(epmutex);
+static u64 loop_check_gen = 0;
+
/* Used to check for epoll file descriptor inclusion loops */
static struct nested_calls poll_loop_ncalls;
@@ -283,9 +284,6 @@ static struct kmem_cache *epi_cache __read_mostly;
/* Slab cache used to allocate "struct eppoll_entry" */
static struct kmem_cache *pwq_cache __read_mostly;
-/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
-static LIST_HEAD(visited_list);
-
/*
* List of files with newly added links, where we may need to limit the number
* of emanating paths. Protected by the epmutex.
@@ -1450,7 +1448,7 @@ static int reverse_path_check(void)
static int ep_create_wakeup_source(struct epitem *epi)
{
- const char *name;
+ struct name_snapshot n;
struct wakeup_source *ws;
if (!epi->ep->ws) {
@@ -1459,8 +1457,9 @@ static int ep_create_wakeup_source(struct epitem *epi)
return -ENOMEM;
}
- name = epi->ffd.file->f_path.dentry->d_name.name;
- ws = wakeup_source_register(NULL, name);
+ take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry);
+ ws = wakeup_source_register(NULL, n.name.name);
+ release_dentry_name_snapshot(&n);
if (!ws)
return -ENOMEM;
@@ -1522,6 +1521,22 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
RCU_INIT_POINTER(epi->ws, NULL);
}
+ /* Add the current item to the list of active epoll hook for this file */
+ spin_lock(&tfile->f_lock);
+ list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
+ spin_unlock(&tfile->f_lock);
+
+ /*
+ * Add the current item to the RB tree. All RB tree operations are
+ * protected by "mtx", and ep_insert() is called with "mtx" held.
+ */
+ ep_rbtree_insert(ep, epi);
+
+ /* now check if we've created too many backpaths */
+ error = -EINVAL;
+ if (full_check && reverse_path_check())
+ goto error_remove_epi;
+
/* Initialize the poll table using the queue callback */
epq.epi = epi;
init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
@@ -1544,22 +1559,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
if (epi->nwait < 0)
goto error_unregister;
- /* Add the current item to the list of active epoll hook for this file */
- spin_lock(&tfile->f_lock);
- list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
- spin_unlock(&tfile->f_lock);
-
- /*
- * Add the current item to the RB tree. All RB tree operations are
- * protected by "mtx", and ep_insert() is called with "mtx" held.
- */
- ep_rbtree_insert(ep, epi);
-
- /* now check if we've created too many backpaths */
- error = -EINVAL;
- if (full_check && reverse_path_check())
- goto error_remove_epi;
-
/* We have to drop the new item inside our item list to keep track of it */
write_lock_irq(&ep->lock);
@@ -1588,6 +1587,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
return 0;
+error_unregister:
+ ep_unregister_pollwait(ep, epi);
error_remove_epi:
spin_lock(&tfile->f_lock);
list_del_rcu(&epi->fllink);
@@ -1595,9 +1596,6 @@ error_remove_epi:
rb_erase_cached(&epi->rbn, &ep->rbr);
-error_unregister:
- ep_unregister_pollwait(ep, epi);
-
/*
* We need to do this because an event could have been arrived on some
* allocated wait queue. Note that we don't care about the ep->ovflist
@@ -1972,13 +1970,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
struct epitem *epi;
mutex_lock_nested(&ep->mtx, call_nests + 1);
- ep->visited = 1;
- list_add(&ep->visited_list_link, &visited_list);
+ ep->gen = loop_check_gen;
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
ep_tovisit = epi->ffd.file->private_data;
- if (ep_tovisit->visited)
+ if (ep_tovisit->gen == loop_check_gen)
continue;
error = ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, epi->ffd.file,
@@ -2019,18 +2016,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
*/
static int ep_loop_check(struct eventpoll *ep, struct file *file)
{
- int ret;
- struct eventpoll *ep_cur, *ep_next;
-
- ret = ep_call_nested(&poll_loop_ncalls,
+ return ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, file, ep, current);
- /* clear visited list */
- list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
- visited_list_link) {
- ep_cur->visited = 0;
- list_del(&ep_cur->visited_list_link);
- }
- return ret;
}
static void clear_tfile_check_list(void)
@@ -2195,11 +2182,13 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
goto error_tgt_fput;
if (op == EPOLL_CTL_ADD) {
if (!list_empty(&f.file->f_ep_links) ||
+ ep->gen == loop_check_gen ||
is_file_epoll(tf.file)) {
mutex_unlock(&ep->mtx);
error = epoll_mutex_lock(&epmutex, 0, nonblock);
if (error)
goto error_tgt_fput;
+ loop_check_gen++;
full_check = 1;
if (is_file_epoll(tf.file)) {
error = -ELOOP;
@@ -2263,6 +2252,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
error_tgt_fput:
if (full_check) {
clear_tfile_check_list();
+ loop_check_gen++;
mutex_unlock(&epmutex);
}
diff --git a/fs/exfat/cache.c b/fs/exfat/cache.c
index 03d0824fc368..5a2f119b7e8c 100644
--- a/fs/exfat/cache.c
+++ b/fs/exfat/cache.c
@@ -17,7 +17,6 @@
#include "exfat_raw.h"
#include "exfat_fs.h"
-#define EXFAT_CACHE_VALID 0
#define EXFAT_MAX_CACHE 16
struct exfat_cache {
@@ -61,16 +60,6 @@ void exfat_cache_shutdown(void)
kmem_cache_destroy(exfat_cachep);
}
-void exfat_cache_init_inode(struct inode *inode)
-{
- struct exfat_inode_info *ei = EXFAT_I(inode);
-
- spin_lock_init(&ei->cache_lru_lock);
- ei->nr_caches = 0;
- ei->cache_valid_id = EXFAT_CACHE_VALID + 1;
- INIT_LIST_HEAD(&ei->cache_lru);
-}
-
static inline struct exfat_cache *exfat_cache_alloc(void)
{
return kmem_cache_alloc(exfat_cachep, GFP_NOFS);
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 95d717f8620c..c013fe931d9c 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -248,6 +248,8 @@ struct exfat_sb_info {
struct rcu_head rcu;
};
+#define EXFAT_CACHE_VALID 0
+
/*
* EXFAT file system inode in-memory data
*/
@@ -428,7 +430,6 @@ extern const struct dentry_operations exfat_utf8_dentry_ops;
/* cache.c */
int exfat_cache_init(void);
void exfat_cache_shutdown(void);
-void exfat_cache_init_inode(struct inode *inode);
void exfat_cache_inval_inode(struct inode *inode);
int exfat_get_cluster(struct inode *inode, unsigned int cluster,
unsigned int *fclus, unsigned int *dclus,
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 7f90204adef5..a6de17cac3df 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -611,8 +611,6 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
ei->i_crtime = info->crtime;
inode->i_atime = info->atime;
- exfat_cache_init_inode(inode);
-
return 0;
}
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index e73f20f66cb2..c94ac239f740 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -578,7 +578,8 @@ static int exfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
i_pos = exfat_make_i_pos(&info);
inode = exfat_build_inode(sb, &info, i_pos);
- if (IS_ERR(inode))
+ err = PTR_ERR_OR_ZERO(inode);
+ if (err)
goto unlock;
inode_inc_iversion(inode);
@@ -745,10 +746,9 @@ static struct dentry *exfat_lookup(struct inode *dir, struct dentry *dentry,
i_pos = exfat_make_i_pos(&info);
inode = exfat_build_inode(sb, &info, i_pos);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
+ err = PTR_ERR_OR_ZERO(inode);
+ if (err)
goto unlock;
- }
i_mode = inode->i_mode;
alias = d_find_alias(inode);
@@ -890,10 +890,9 @@ static int exfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
i_pos = exfat_make_i_pos(&info);
inode = exfat_build_inode(sb, &info, i_pos);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
+ err = PTR_ERR_OR_ZERO(inode);
+ if (err)
goto unlock;
- }
inode_inc_iversion(inode);
inode->i_mtime = inode->i_atime = inode->i_ctime =
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 3b6a1659892f..60b941ba557b 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -376,7 +376,6 @@ static int exfat_read_root(struct inode *inode)
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
current_time(inode);
exfat_truncate_atime(&inode->i_atime);
- exfat_cache_init_inode(inode);
return 0;
}
@@ -763,6 +762,10 @@ static void exfat_inode_init_once(void *foo)
{
struct exfat_inode_info *ei = (struct exfat_inode_info *)foo;
+ spin_lock_init(&ei->cache_lru_lock);
+ ei->nr_caches = 0;
+ ei->cache_valid_id = EXFAT_CACHE_VALID + 1;
+ INIT_LIST_HEAD(&ei->cache_lru);
INIT_HLIST_NODE(&ei->i_hash_fat);
inode_init_once(&ei->vfs_inode);
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6611ef3269a8..43c165e796da 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3091,11 +3091,10 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
ssize_t ret = 0;
struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data;
- bool async_dio = ff->fc->async_dio;
loff_t pos = 0;
struct inode *inode;
loff_t i_size;
- size_t count = iov_iter_count(iter);
+ size_t count = iov_iter_count(iter), shortened = 0;
loff_t offset = iocb->ki_pos;
struct fuse_io_priv *io;
@@ -3103,17 +3102,9 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
inode = file->f_mapping->host;
i_size = i_size_read(inode);
- if ((iov_iter_rw(iter) == READ) && (offset > i_size))
+ if ((iov_iter_rw(iter) == READ) && (offset >= i_size))
return 0;
- /* optimization for short read */
- if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) {
- if (offset >= i_size)
- return 0;
- iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset));
- count = iov_iter_count(iter);
- }
-
io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
if (!io)
return -ENOMEM;
@@ -3129,15 +3120,22 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
* By default, we want to optimize all I/Os with async request
* submission to the client filesystem if supported.
*/
- io->async = async_dio;
+ io->async = ff->fc->async_dio;
io->iocb = iocb;
io->blocking = is_sync_kiocb(iocb);
+ /* optimization for short read */
+ if (io->async && !io->write && offset + count > i_size) {
+ iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset));
+ shortened = count - iov_iter_count(iter);
+ count -= shortened;
+ }
+
/*
* We cannot asynchronously extend the size of a file.
* In such case the aio will behave exactly like sync io.
*/
- if ((offset + count > i_size) && iov_iter_rw(iter) == WRITE)
+ if ((offset + count > i_size) && io->write)
io->blocking = true;
if (io->async && io->blocking) {
@@ -3155,6 +3153,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
} else {
ret = __fuse_direct_read(io, iter, &pos);
}
+ iov_iter_reexpand(iter, iov_iter_count(iter) + shortened);
if (io->async) {
bool blocking = io->blocking;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3790c7fe9fee..aae0ef2ec34d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1753,6 +1753,9 @@ static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb,
struct io_ring_ctx *ctx = req->ctx;
int ret, notify;
+ if (tsk->flags & PF_EXITING)
+ return -ESRCH;
+
/*
* SQPOLL kernel thread doesn't need notification, just a wakeup. For
* all other cases, use TWA_SIGNAL unconditionally to ensure we're
@@ -1787,8 +1790,10 @@ static void __io_req_task_cancel(struct io_kiocb *req, int error)
static void io_req_task_cancel(struct callback_head *cb)
{
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+ struct io_ring_ctx *ctx = req->ctx;
__io_req_task_cancel(req, -ECANCELED);
+ percpu_ref_put(&ctx->refs);
}
static void __io_req_task_submit(struct io_kiocb *req)
@@ -2010,6 +2015,12 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
static inline bool io_run_task_work(void)
{
+ /*
+ * Not safe to run on exiting task, and the task_work handling will
+ * not add work to such a task.
+ */
+ if (unlikely(current->flags & PF_EXITING))
+ return false;
if (current->task_works) {
__set_current_state(TASK_RUNNING);
task_work_run();
@@ -2283,13 +2294,17 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
goto end_req;
}
- ret = io_import_iovec(rw, req, &iovec, &iter, false);
- if (ret < 0)
- goto end_req;
- ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
- if (!ret)
+ if (!req->io) {
+ ret = io_import_iovec(rw, req, &iovec, &iter, false);
+ if (ret < 0)
+ goto end_req;
+ ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
+ if (!ret)
+ return true;
+ kfree(iovec);
+ } else {
return true;
- kfree(iovec);
+ }
end_req:
req_set_fail_links(req);
io_req_complete(req, ret);
@@ -3034,6 +3049,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
if (!wake_page_match(wpq, key))
return 0;
+ req->rw.kiocb.ki_flags &= ~IOCB_WAITQ;
list_del_init(&wait->entry);
init_task_work(&req->task_work, io_req_task_submit);
@@ -3091,6 +3107,7 @@ static bool io_rw_should_retry(struct io_kiocb *req)
wait->wait.flags = 0;
INIT_LIST_HEAD(&wait->wait.entry);
kiocb->ki_flags |= IOCB_WAITQ;
+ kiocb->ki_flags &= ~IOCB_NOWAIT;
kiocb->ki_waitq = wait;
io_get_req_task(req);
@@ -3115,6 +3132,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
struct iov_iter __iter, *iter = &__iter;
ssize_t io_size, ret, ret2;
size_t iov_count;
+ bool no_async;
if (req->io)
iter = &req->io->rw.iter;
@@ -3132,7 +3150,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
kiocb->ki_flags &= ~IOCB_NOWAIT;
/* If the file doesn't support async, just async punt */
- if (force_nonblock && !io_file_supports_async(req->file, READ))
+ no_async = force_nonblock && !io_file_supports_async(req->file, READ);
+ if (no_async)
goto copy_iov;
ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), iov_count);
@@ -3155,10 +3174,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
goto done;
/* some cases will consume bytes even on error returns */
iov_iter_revert(iter, iov_count - iov_iter_count(iter));
- ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
- if (ret)
- goto out_free;
- return -EAGAIN;
+ ret = 0;
+ goto copy_iov;
} else if (ret < 0) {
/* make sure -ERESTARTSYS -> -EINTR is done */
goto done;
@@ -3176,6 +3193,8 @@ copy_iov:
ret = ret2;
goto out_free;
}
+ if (no_async)
+ return -EAGAIN;
/* it's copied and will be cleaned with ->io */
iovec = NULL;
/* now use our persistent iterator, if we aren't already */
@@ -3508,8 +3527,6 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
const char __user *fname;
int ret;
- if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
- return -EINVAL;
if (unlikely(sqe->ioprio || sqe->buf_index))
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
@@ -3536,6 +3553,8 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
u64 flags, mode;
+ if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
+ return -EINVAL;
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
mode = READ_ONCE(sqe->len);
@@ -3550,6 +3569,8 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
size_t len;
int ret;
+ if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
+ return -EINVAL;
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
@@ -3767,7 +3788,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
#if defined(CONFIG_EPOLL)
if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
return -EINVAL;
req->epoll.epfd = READ_ONCE(sqe->fd);
@@ -3882,7 +3903,7 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock)
static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
return -EINVAL;
if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
@@ -4724,6 +4745,8 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
if (mask && !(mask & poll->events))
return 0;
+ list_del_init(&wait->entry);
+
if (poll && poll->head) {
bool done;
@@ -5399,6 +5422,8 @@ static int io_async_cancel(struct io_kiocb *req)
static int io_files_update_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
+ if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL))
+ return -EINVAL;
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
return -EINVAL;
if (sqe->ioprio || sqe->rw_flags)
@@ -5449,6 +5474,8 @@ static int io_req_defer_prep(struct io_kiocb *req,
if (unlikely(ret))
return ret;
+ io_prep_async_work(req);
+
switch (req->opcode) {
case IORING_OP_NOP:
break;
@@ -5646,6 +5673,11 @@ static void __io_clean_op(struct io_kiocb *req)
io_put_file(req, req->splice.file_in,
(req->splice.flags & SPLICE_F_FD_IN_FIXED));
break;
+ case IORING_OP_OPENAT:
+ case IORING_OP_OPENAT2:
+ if (req->open.filename)
+ putname(req->open.filename);
+ break;
}
req->flags &= ~REQ_F_NEED_CLEANUP;
}
@@ -6323,9 +6355,6 @@ static void io_submit_state_start(struct io_submit_state *state,
struct io_ring_ctx *ctx, unsigned int max_ios)
{
blk_start_plug(&state->plug);
-#ifdef CONFIG_BLOCK
- state->plug.nowait = true;
-#endif
state->comp.nr = 0;
INIT_LIST_HEAD(&state->comp.list);
state->comp.ctx = ctx;
@@ -8180,6 +8209,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
/* cancel this request, or head link requests */
io_attempt_cancel(ctx, cancel_req);
io_put_req(cancel_req);
+ /* cancellations _may_ trigger task work */
+ io_run_task_work();
schedule();
finish_wait(&ctx->inflight_wait, &wait);
}
@@ -8385,11 +8416,19 @@ static int io_uring_show_cred(int id, void *p, void *data)
static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
{
+ bool has_lock;
int i;
- mutex_lock(&ctx->uring_lock);
+ /*
+ * Avoid ABBA deadlock between the seq lock and the io_uring mutex,
+ * since fdinfo case grabs it in the opposite direction of normal use
+ * cases. If we fail to get the lock, we just don't iterate any
+ * structures that could be going away outside the io_uring mutex.
+ */
+ has_lock = mutex_trylock(&ctx->uring_lock);
+
seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
- for (i = 0; i < ctx->nr_user_files; i++) {
+ for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
struct fixed_file_table *table;
struct file *f;
@@ -8401,13 +8440,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
seq_printf(m, "%5u: <none>\n", i);
}
seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs);
- for (i = 0; i < ctx->nr_user_bufs; i++) {
+ for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) {
struct io_mapped_ubuf *buf = &ctx->user_bufs[i];
seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
(unsigned int) buf->len);
}
- if (!idr_is_empty(&ctx->personality_idr)) {
+ if (has_lock && !idr_is_empty(&ctx->personality_idr)) {
seq_printf(m, "Personalities:\n");
idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
}
@@ -8422,7 +8461,8 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
req->task->task_works != NULL);
}
spin_unlock_irq(&ctx->completion_lock);
- mutex_unlock(&ctx->uring_lock);
+ if (has_lock)
+ mutex_unlock(&ctx->uring_lock);
}
static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e732580fe47b..cb52db9a0cfb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -579,6 +579,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
do {
+ if (entry->label)
+ entry->label->len = NFS4_MAXLABELLEN;
+
status = xdr_decode(desc, entry, &stream);
if (status != 0) {
if (status == -EAGAIN)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index ff8965d1a4d4..a163533446fa 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -715,7 +715,7 @@ nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,
}
static void
-ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx)
+ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
@@ -724,7 +724,7 @@ ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx)
}
static void
-ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx)
+ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
@@ -734,14 +734,14 @@ ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx)
static struct nfs4_pnfs_ds *
ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
- int start_idx, int *best_idx,
+ u32 start_idx, u32 *best_idx,
bool check_device)
{
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_pnfs_ds *ds;
bool fail_return = false;
- int idx;
+ u32 idx;
/* mirrors are initially sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
@@ -766,21 +766,21 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
static struct nfs4_pnfs_ds *
ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg,
- int start_idx, int *best_idx)
+ u32 start_idx, u32 *best_idx)
{
return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false);
}
static struct nfs4_pnfs_ds *
ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg,
- int start_idx, int *best_idx)
+ u32 start_idx, u32 *best_idx)
{
return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true);
}
static struct nfs4_pnfs_ds *
ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
- int start_idx, int *best_idx)
+ u32 start_idx, u32 *best_idx)
{
struct nfs4_pnfs_ds *ds;
@@ -791,7 +791,8 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
}
static struct nfs4_pnfs_ds *
-ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, int *best_idx)
+ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio,
+ u32 *best_idx)
{
struct pnfs_layout_segment *lseg = pgio->pg_lseg;
struct nfs4_pnfs_ds *ds;
@@ -837,7 +838,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_pgio_mirror *pgm;
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_pnfs_ds *ds;
- int ds_idx;
+ u32 ds_idx, i;
retry:
ff_layout_pg_check_layout(pgio, req);
@@ -863,14 +864,14 @@ retry:
goto retry;
}
- mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
+ for (i = 0; i < pgio->pg_mirror_count; i++) {
+ mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
+ pgm = &pgio->pg_mirrors[i];
+ pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
+ }
pgio->pg_mirror_idx = ds_idx;
- /* read always uses only one mirror - idx 0 for pgio layer */
- pgm = &pgio->pg_mirrors[0];
- pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
-
if (NFS_SERVER(pgio->pg_inode)->flags &
(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
pgio->pg_maxretrans = io_maxretrans;
@@ -894,7 +895,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs4_ff_layout_mirror *mirror;
struct nfs_pgio_mirror *pgm;
struct nfs4_pnfs_ds *ds;
- int i;
+ u32 i;
retry:
ff_layout_pg_check_layout(pgio, req);
@@ -1038,7 +1039,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
{
u32 idx = hdr->pgio_mirror_idx + 1;
- int new_idx = 0;
+ u32 new_idx = 0;
if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx))
ff_layout_send_layouterror(hdr->lseg);
@@ -1075,7 +1076,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
- int idx)
+ u32 idx)
{
struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode;
@@ -1149,7 +1150,7 @@ reset:
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
struct pnfs_layout_segment *lseg,
- int idx)
+ u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
@@ -1184,7 +1185,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
- int idx)
+ u32 idx)
{
int vers = clp->cl_nfs_mod->rpc_vers->number;
@@ -1211,7 +1212,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
}
static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
- int idx, u64 offset, u64 length,
+ u32 idx, u64 offset, u64 length,
u32 *op_status, int opnum, int error)
{
struct nfs4_ff_layout_mirror *mirror;
@@ -1809,7 +1810,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
loff_t offset = hdr->args.offset;
int vers;
struct nfs_fh *fh;
- int idx = hdr->pgio_mirror_idx;
+ u32 idx = hdr->pgio_mirror_idx;
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 142225f0af59..2b2211d1234e 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -356,7 +356,15 @@ static ssize_t _nfs42_proc_copy(struct file *src,
truncate_pagecache_range(dst_inode, pos_dst,
pos_dst + res->write_res.count);
-
+ spin_lock(&dst_inode->i_lock);
+ NFS_I(dst_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE |
+ NFS_INO_REVAL_FORCED | NFS_INO_INVALID_SIZE |
+ NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA);
+ spin_unlock(&dst_inode->i_lock);
+ spin_lock(&src_inode->i_lock);
+ NFS_I(src_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE |
+ NFS_INO_REVAL_FORCED | NFS_INO_INVALID_ATIME);
+ spin_unlock(&src_inode->i_lock);
status = res->write_res.count;
out:
if (args->sync)
diff --git a/fs/pipe.c b/fs/pipe.c
index 60dbee457143..117db82b10af 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -106,25 +106,6 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
}
}
-/* Drop the inode semaphore and wait for a pipe event, atomically */
-void pipe_wait(struct pipe_inode_info *pipe)
-{
- DEFINE_WAIT(rdwait);
- DEFINE_WAIT(wrwait);
-
- /*
- * Pipes are system-local resources, so sleeping on them
- * is considered a noninteractive wait:
- */
- prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
- prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
- pipe_unlock(pipe);
- schedule();
- finish_wait(&pipe->rd_wait, &rdwait);
- finish_wait(&pipe->wr_wait, &wrwait);
- pipe_lock(pipe);
-}
-
static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
@@ -1035,12 +1016,52 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
return do_pipe2(fildes, 0);
}
+/*
+ * This is the stupid "wait for pipe to be readable or writable"
+ * model.
+ *
+ * See pipe_read/write() for the proper kind of exclusive wait,
+ * but that requires that we wake up any other readers/writers
+ * if we then do not end up reading everything (ie the whole
+ * "wake_next_reader/writer" logic in pipe_read/write()).
+ */
+void pipe_wait_readable(struct pipe_inode_info *pipe)
+{
+ pipe_unlock(pipe);
+ wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
+ pipe_lock(pipe);
+}
+
+void pipe_wait_writable(struct pipe_inode_info *pipe)
+{
+ pipe_unlock(pipe);
+ wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
+ pipe_lock(pipe);
+}
+
+/*
+ * This depends on both the wait (here) and the wakeup (wake_up_partner)
+ * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
+ * race with the count check and waitqueue prep.
+ *
+ * Normally in order to avoid races, you'd do the prepare_to_wait() first,
+ * then check the condition you're waiting for, and only then sleep. But
+ * because of the pipe lock, we can check the condition before being on
+ * the wait queue.
+ *
+ * We use the 'rd_wait' waitqueue for pipe partner waiting.
+ */
static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
{
+ DEFINE_WAIT(rdwait);
int cur = *cnt;
while (cur == *cnt) {
- pipe_wait(pipe);
+ prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
+ pipe_unlock(pipe);
+ schedule();
+ finish_wait(&pipe->rd_wait, &rdwait);
+ pipe_lock(pipe);
if (signal_pending(current))
break;
}
@@ -1050,7 +1071,6 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
static void wake_up_partner(struct pipe_inode_info *pipe)
{
wake_up_interruptible_all(&pipe->rd_wait);
- wake_up_interruptible_all(&pipe->wr_wait);
}
static int fifo_open(struct inode *inode, struct file *filp)
diff --git a/fs/read_write.c b/fs/read_write.c
index 5db58b8c78d0..d3428189f36b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -538,6 +538,14 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t
inc_syscw(current);
return ret;
}
+/*
+ * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()",
+ * but autofs is one of the few internal kernel users that actually
+ * wants this _and_ can be built as a module. So we need to export
+ * this symbol for autofs, even though it really isn't appropriate
+ * for any other kernel modules.
+ */
+EXPORT_SYMBOL_GPL(__kernel_write);
ssize_t kernel_write(struct file *file, const void *buf, size_t count,
loff_t *pos)
diff --git a/fs/splice.c b/fs/splice.c
index d7c8a7c4db07..ce75aec52274 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -526,6 +526,22 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
return 1;
}
+/* We know we have a pipe buffer, but maybe it's empty? */
+static inline bool eat_empty_buffer(struct pipe_inode_info *pipe)
+{
+ unsigned int tail = pipe->tail;
+ unsigned int mask = pipe->ring_size - 1;
+ struct pipe_buffer *buf = &pipe->bufs[tail & mask];
+
+ if (unlikely(!buf->len)) {
+ pipe_buf_release(pipe, buf);
+ pipe->tail = tail+1;
+ return true;
+ }
+
+ return false;
+}
+
/**
* splice_from_pipe_next - wait for some data to splice from
* @pipe: pipe to splice from
@@ -545,6 +561,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des
if (signal_pending(current))
return -ERESTARTSYS;
+repeat:
while (pipe_empty(pipe->head, pipe->tail)) {
if (!pipe->writers)
return 0;
@@ -563,9 +580,12 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des
sd->need_wakeup = false;
}
- pipe_wait(pipe);
+ pipe_wait_readable(pipe);
}
+ if (eat_empty_buffer(pipe))
+ goto repeat;
+
return 1;
}
@@ -1077,7 +1097,7 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
return -EAGAIN;
if (signal_pending(current))
return -ERESTARTSYS;
- pipe_wait(pipe);
+ pipe_wait_writable(pipe);
}
}
@@ -1454,7 +1474,7 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
ret = -EAGAIN;
break;
}
- pipe_wait(pipe);
+ pipe_wait_readable(pipe);
}
pipe_unlock(pipe);
@@ -1493,7 +1513,7 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
ret = -ERESTARTSYS;
break;
}
- pipe_wait(pipe);
+ pipe_wait_writable(pipe);
}
pipe_unlock(pipe);
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index 8fe03b4a0d2b..25aade344192 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -384,7 +384,7 @@ fail_nomem:
static int vboxsf_parse_monolithic(struct fs_context *fc, void *data)
{
- char *options = data;
+ unsigned char *options = data;
if (options && options[0] == VBSF_MOUNT_SIGNATURE_BYTE_0 &&
options[1] == VBSF_MOUNT_SIGNATURE_BYTE_1 &&