summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2018-11-18 15:46:03 -0700
committerJens Axboe <axboe@kernel.dk>2018-11-18 15:46:03 -0700
commita78b03bc7300e4f17b1e510884bea1095d92b17b (patch)
tree855f219806462da09c8021b27848c58713bb1807 /fs
parentfce15a609f8f30cfacfaf684729add9582be780b (diff)
parent9ff01193a20d391e8dbce4403dd5ef87c7eaaca6 (diff)
Merge tag 'v4.20-rc3' into for-4.21/block
Merge in -rc3 to resolve a few conflicts, but also to get a few important fixes that have gone into mainline since the block 4.21 branch was forked off (most notably the SCSI queue issue, which is both a conflict AND needed fix). Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/disk-io.c63
-rw-r--r--fs/btrfs/free-space-cache.c22
-rw-r--r--fs/btrfs/inode.c37
-rw-r--r--fs/btrfs/ioctl.c14
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/btrfs/tree-checker.c2
-rw-r--r--fs/btrfs/tree-log.c17
-rw-r--r--fs/ceph/file.c11
-rw-r--r--fs/ceph/mds_client.c12
-rw-r--r--fs/ceph/quota.c3
-rw-r--r--fs/ext4/inode.c5
-rw-r--r--fs/ext4/namei.c5
-rw-r--r--fs/ext4/resize.c28
-rw-r--r--fs/ext4/super.c17
-rw-r--r--fs/ext4/xattr.c27
-rw-r--r--fs/fuse/dev.c16
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/gfs2/bmap.c54
-rw-r--r--fs/gfs2/rgrp.c3
-rw-r--r--fs/inode.c7
-rw-r--r--fs/namespace.c28
-rw-r--r--fs/nfs/callback_proc.c4
-rw-r--r--fs/nfs/delegation.c11
-rw-r--r--fs/nfs/nfs4state.c10
-rw-r--r--fs/nfsd/nfs4proc.c3
-rw-r--r--fs/notify/fanotify/fanotify.c10
-rw-r--r--fs/notify/fsnotify.c7
-rw-r--r--fs/ocfs2/aops.c12
-rw-r--r--fs/ocfs2/cluster/masklog.h9
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c11
-rw-r--r--fs/xfs/xfs_ioctl.c2
-rw-r--r--fs/xfs/xfs_message.c2
33 files changed, 301 insertions, 164 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80953528572d..68f322f600a0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3163,6 +3163,9 @@ void btrfs_destroy_inode(struct inode *inode);
int btrfs_drop_inode(struct inode *inode);
int __init btrfs_init_cachep(void);
void __cold btrfs_destroy_cachep(void);
+struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
+ struct btrfs_root *root, int *new,
+ struct btrfs_path *path);
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
struct btrfs_root *root, int *was_new);
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b0ab41da91d1..3f0b6d1936e8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1664,9 +1664,8 @@ static int cleaner_kthread(void *arg)
struct btrfs_root *root = arg;
struct btrfs_fs_info *fs_info = root->fs_info;
int again;
- struct btrfs_trans_handle *trans;
- do {
+ while (1) {
again = 0;
/* Make the cleaner go to sleep early. */
@@ -1715,42 +1714,16 @@ static int cleaner_kthread(void *arg)
*/
btrfs_delete_unused_bgs(fs_info);
sleep:
+ if (kthread_should_park())
+ kthread_parkme();
+ if (kthread_should_stop())
+ return 0;
if (!again) {
set_current_state(TASK_INTERRUPTIBLE);
- if (!kthread_should_stop())
- schedule();
+ schedule();
__set_current_state(TASK_RUNNING);
}
- } while (!kthread_should_stop());
-
- /*
- * Transaction kthread is stopped before us and wakes us up.
- * However we might have started a new transaction and COWed some
- * tree blocks when deleting unused block groups for example. So
- * make sure we commit the transaction we started to have a clean
- * shutdown when evicting the btree inode - if it has dirty pages
- * when we do the final iput() on it, eviction will trigger a
- * writeback for it which will fail with null pointer dereferences
- * since work queues and other resources were already released and
- * destroyed by the time the iput/eviction/writeback is made.
- */
- trans = btrfs_attach_transaction(root);
- if (IS_ERR(trans)) {
- if (PTR_ERR(trans) != -ENOENT)
- btrfs_err(fs_info,
- "cleaner transaction attach returned %ld",
- PTR_ERR(trans));
- } else {
- int ret;
-
- ret = btrfs_commit_transaction(trans);
- if (ret)
- btrfs_err(fs_info,
- "cleaner open transaction commit returned %d",
- ret);
}
-
- return 0;
}
static int transaction_kthread(void *arg)
@@ -3931,6 +3904,13 @@ void close_ctree(struct btrfs_fs_info *fs_info)
int ret;
set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
+ /*
+ * We don't want the cleaner to start new transactions, add more delayed
+ * iputs, etc. while we're closing. We can't use kthread_stop() yet
+ * because that frees the task_struct, and the transaction kthread might
+ * still try to wake up the cleaner.
+ */
+ kthread_park(fs_info->cleaner_kthread);
/* wait for the qgroup rescan worker to stop */
btrfs_qgroup_wait_for_completion(fs_info, false);
@@ -3958,9 +3938,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)
if (!sb_rdonly(fs_info->sb)) {
/*
- * If the cleaner thread is stopped and there are
- * block groups queued for removal, the deletion will be
- * skipped when we quit the cleaner thread.
+ * The cleaner kthread is stopped, so do one final pass over
+ * unused block groups.
*/
btrfs_delete_unused_bgs(fs_info);
@@ -4359,13 +4338,23 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
unpin = pinned_extents;
again:
while (1) {
+ /*
+ * The btrfs_finish_extent_commit() may get the same range as
+ * ours between find_first_extent_bit and clear_extent_dirty.
+ * Hence, hold the unused_bg_unpin_mutex to avoid double unpin
+ * the same extent range.
+ */
+ mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY, NULL);
- if (ret)
+ if (ret) {
+ mutex_unlock(&fs_info->unused_bg_unpin_mutex);
break;
+ }
clear_extent_dirty(unpin, start, end);
btrfs_error_unpin_extent_range(fs_info, start, end);
+ mutex_unlock(&fs_info->unused_bg_unpin_mutex);
cond_resched();
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 4ba0aedc878b..74aa552f4793 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -75,7 +75,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
* sure NOFS is set to keep us from deadlocking.
*/
nofs_flag = memalloc_nofs_save();
- inode = btrfs_iget(fs_info->sb, &location, root, NULL);
+ inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path);
+ btrfs_release_path(path);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(inode))
return inode;
@@ -838,6 +839,25 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
path->search_commit_root = 1;
path->skip_locking = 1;
+ /*
+ * We must pass a path with search_commit_root set to btrfs_iget in
+ * order to avoid a deadlock when allocating extents for the tree root.
+ *
+ * When we are COWing an extent buffer from the tree root, when looking
+ * for a free extent, at extent-tree.c:find_free_extent(), we can find
+ * block group without its free space cache loaded. When we find one
+ * we must load its space cache which requires reading its free space
+ * cache's inode item from the root tree. If this inode item is located
+ * in the same leaf that we started COWing before, then we end up in
+ * deadlock on the extent buffer (trying to read lock it when we
+ * previously write locked it).
+ *
+ * It's safe to read the inode item using the commit root because
+ * block groups, once loaded, stay in memory forever (until they are
+ * removed) as well as their space caches once loaded. New block groups
+ * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
+ * we will never try to read their inode item while the fs is mounted.
+ */
inode = lookup_free_space_inode(fs_info, block_group, path);
if (IS_ERR(inode)) {
btrfs_free_path(path);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d3df5b52278c..9ea4c6f0352f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1531,12 +1531,11 @@ out_check:
}
btrfs_release_path(path);
- if (cur_offset <= end && cow_start == (u64)-1) {
+ if (cur_offset <= end && cow_start == (u64)-1)
cow_start = cur_offset;
- cur_offset = end;
- }
if (cow_start != (u64)-1) {
+ cur_offset = end;
ret = cow_file_range(inode, locked_page, cow_start, end, end,
page_started, nr_written, 1, NULL);
if (ret)
@@ -3570,10 +3569,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
/*
* read an inode from the btree into the in-memory inode
*/
-static int btrfs_read_locked_inode(struct inode *inode)
+static int btrfs_read_locked_inode(struct inode *inode,
+ struct btrfs_path *in_path)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_path *path;
+ struct btrfs_path *path = in_path;
struct extent_buffer *leaf;
struct btrfs_inode_item *inode_item;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3589,15 +3589,18 @@ static int btrfs_read_locked_inode(struct inode *inode)
if (!ret)
filled = true;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
+ if (!path) {
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ }
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
if (ret) {
- btrfs_free_path(path);
+ if (path != in_path)
+ btrfs_free_path(path);
return ret;
}
@@ -3722,7 +3725,8 @@ cache_acl:
btrfs_ino(BTRFS_I(inode)),
root->root_key.objectid, ret);
}
- btrfs_free_path(path);
+ if (path != in_path)
+ btrfs_free_path(path);
if (!maybe_acls)
cache_no_acl(inode);
@@ -5644,8 +5648,9 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
/* Get an inode object given its location and corresponding root.
* Returns in *is_new if the inode was read from disk
*/
-struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
- struct btrfs_root *root, int *new)
+struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
+ struct btrfs_root *root, int *new,
+ struct btrfs_path *path)
{
struct inode *inode;
@@ -5656,7 +5661,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
if (inode->i_state & I_NEW) {
int ret;
- ret = btrfs_read_locked_inode(inode);
+ ret = btrfs_read_locked_inode(inode, path);
if (!ret) {
inode_tree_add(inode);
unlock_new_inode(inode);
@@ -5678,6 +5683,12 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
return inode;
}
+struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
+ struct btrfs_root *root, int *new)
+{
+ return btrfs_iget_path(s, location, root, new, NULL);
+}
+
static struct inode *new_simple_dir(struct super_block *s,
struct btrfs_key *key,
struct btrfs_root *root)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 3ca6943827ef..802a628e9f7d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3488,6 +3488,8 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize;
len = round_down(i_size_read(src), sz) - loff;
+ if (len == 0)
+ return 0;
olen = len;
}
}
@@ -4257,9 +4259,17 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
goto out_unlock;
if (len == 0)
olen = len = src->i_size - off;
- /* if we extend to eof, continue to block boundary */
- if (off + len == src->i_size)
+ /*
+ * If we extend to eof, continue to block boundary if and only if the
+ * destination end offset matches the destination file's size, otherwise
+ * we would be corrupting data by placing the eof block into the middle
+ * of a file.
+ */
+ if (off + len == src->i_size) {
+ if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size)
+ goto out_unlock;
len = ALIGN(src->i_size, bs) - off;
+ }
if (len == 0) {
ret = 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b362b45dd757..cbc9d0d2c12d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1916,7 +1916,7 @@ restore:
}
/* Used to sort the devices by max_avail(descending sort) */
-static int btrfs_cmp_device_free_bytes(const void *dev_info1,
+static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
const void *dev_info2)
{
if (((struct btrfs_device_info *)dev_info1)->max_avail >
@@ -1945,8 +1945,8 @@ static inline void btrfs_descending_sort_devices(
* The helper to calc the free space on the devices that can be used to store
* file data.
*/
-static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
- u64 *free_bytes)
+static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
+ u64 *free_bytes)
{
struct btrfs_device_info *devices_info;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index cab0b1f1f741..efcf89a8ba44 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -440,7 +440,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
type != (BTRFS_BLOCK_GROUP_METADATA |
BTRFS_BLOCK_GROUP_DATA)) {
block_group_err(fs_info, leaf, slot,
-"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx",
+"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
type, hweight64(type),
BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
BTRFS_BLOCK_GROUP_SYSTEM,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e07f3376b7df..a5ce99a6c936 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4396,6 +4396,23 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
logged_end = end;
list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
+ /*
+ * Skip extents outside our logging range. It's important to do
+ * it for correctness because if we don't ignore them, we may
+ * log them before their ordered extent completes, and therefore
+ * we could log them without logging their respective checksums
+ * (the checksum items are added to the csum tree at the very
+ * end of btrfs_finish_ordered_io()). Also leave such extents
+ * outside of our range in the list, since we may have another
+ * ranged fsync in the near future that needs them. If an extent
+ * outside our range corresponds to a hole, log it to avoid
+ * leaving gaps between extents (fsck will complain when we are
+ * not using the NO_HOLES feature).
+ */
+ if ((em->start > end || em->start + em->len <= start) &&
+ em->block_start != EXTENT_MAP_HOLE)
+ continue;
+
list_del_init(&em->list);
/*
* Just an arbitrary number, this can be really CPU intensive
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 27cad84dab23..189df668b6a0 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1931,10 +1931,17 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
if (!prealloc_cf)
return -ENOMEM;
- /* Start by sync'ing the source file */
+ /* Start by sync'ing the source and destination files */
ret = file_write_and_wait_range(src_file, src_off, (src_off + len));
- if (ret < 0)
+ if (ret < 0) {
+ dout("failed to write src file (%zd)\n", ret);
+ goto out;
+ }
+ ret = file_write_and_wait_range(dst_file, dst_off, (dst_off + len));
+ if (ret < 0) {
+ dout("failed to write dst file (%zd)\n", ret);
goto out;
+ }
/*
* We need FILE_WR caps for dst_ci and FILE_RD for src_ci as other
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 67a9aeb2f4ec..bd13a3267ae0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -80,12 +80,8 @@ static int parse_reply_info_in(void **p, void *end,
info->symlink = *p;
*p += info->symlink_len;
- if (features & CEPH_FEATURE_DIRLAYOUTHASH)
- ceph_decode_copy_safe(p, end, &info->dir_layout,
- sizeof(info->dir_layout), bad);
- else
- memset(&info->dir_layout, 0, sizeof(info->dir_layout));
-
+ ceph_decode_copy_safe(p, end, &info->dir_layout,
+ sizeof(info->dir_layout), bad);
ceph_decode_32_safe(p, end, info->xattr_len, bad);
ceph_decode_need(p, end, info->xattr_len, bad);
info->xattr_data = *p;
@@ -3182,10 +3178,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
recon_state.pagelist = pagelist;
if (session->s_con.peer_features & CEPH_FEATURE_MDSENC)
recon_state.msg_version = 3;
- else if (session->s_con.peer_features & CEPH_FEATURE_FLOCK)
- recon_state.msg_version = 2;
else
- recon_state.msg_version = 1;
+ recon_state.msg_version = 2;
err = iterate_session_caps(session, encode_caps_cb, &recon_state);
if (err < 0)
goto fail;
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 32d4f13784ba..03f4d24db8fe 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -237,7 +237,8 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
ceph_put_snap_realm(mdsc, realm);
realm = next;
}
- ceph_put_snap_realm(mdsc, realm);
+ if (realm)
+ ceph_put_snap_realm(mdsc, realm);
up_read(&mdsc->snap_rwsem);
return exceeded;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 05f01fbd9c7f..22a9d8159720 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5835,9 +5835,10 @@ int ext4_mark_iloc_dirty(handle_t *handle,
{
int err = 0;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
+ put_bh(iloc->bh);
return -EIO;
-
+ }
if (IS_I_VERSION(inode))
inode_inc_iversion(inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 17adcb16a9c8..437f71fe83ae 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -126,6 +126,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
if (!is_dx_block && type == INDEX) {
ext4_error_inode(inode, func, line, block,
"directory leaf block found instead of index block");
+ brelse(bh);
return ERR_PTR(-EFSCORRUPTED);
}
if (!ext4_has_metadata_csum(inode->i_sb) ||
@@ -2811,7 +2812,9 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
list_del_init(&EXT4_I(inode)->i_orphan);
mutex_unlock(&sbi->s_orphan_lock);
}
- }
+ } else
+ brelse(iloc.bh);
+
jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
jbd_debug(4, "orphan inode %lu will point to %d\n",
inode->i_ino, NEXT_ORPHAN(inode));
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ebbc663d0798..a5efee34415f 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -459,16 +459,18 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
BUFFER_TRACE(bh, "get_write_access");
err = ext4_journal_get_write_access(handle, bh);
- if (err)
+ if (err) {
+ brelse(bh);
return err;
+ }
ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n",
first_cluster, first_cluster - start, count2);
ext4_set_bits(bh->b_data, first_cluster - start, count2);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ brelse(bh);
if (unlikely(err))
return err;
- brelse(bh);
}
return 0;
@@ -605,7 +607,6 @@ handle_bb:
bh = bclean(handle, sb, block);
if (IS_ERR(bh)) {
err = PTR_ERR(bh);
- bh = NULL;
goto out;
}
overhead = ext4_group_overhead_blocks(sb, group);
@@ -618,9 +619,9 @@ handle_bb:
ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count),
sb->s_blocksize * 8, bh->b_data);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ brelse(bh);
if (err)
goto out;
- brelse(bh);
handle_ib:
if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
@@ -635,18 +636,16 @@ handle_ib:
bh = bclean(handle, sb, block);
if (IS_ERR(bh)) {
err = PTR_ERR(bh);
- bh = NULL;
goto out;
}
ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
sb->s_blocksize * 8, bh->b_data);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ brelse(bh);
if (err)
goto out;
- brelse(bh);
}
- bh = NULL;
/* Mark group tables in block bitmap */
for (j = 0; j < GROUP_TABLE_COUNT; j++) {
@@ -685,7 +684,6 @@ handle_ib:
}
out:
- brelse(bh);
err2 = ext4_journal_stop(handle);
if (err2 && !err)
err = err2;
@@ -873,6 +871,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
if (unlikely(err)) {
ext4_std_error(sb, err);
+ iloc.bh = NULL;
goto exit_inode;
}
brelse(dind);
@@ -924,6 +923,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
sizeof(struct buffer_head *),
GFP_NOFS);
if (!n_group_desc) {
+ brelse(gdb_bh);
err = -ENOMEM;
ext4_warning(sb, "not enough memory for %lu groups",
gdb_num + 1);
@@ -939,8 +939,6 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
kvfree(o_group_desc);
BUFFER_TRACE(gdb_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, gdb_bh);
- if (unlikely(err))
- brelse(gdb_bh);
return err;
}
@@ -1124,8 +1122,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
backup_block, backup_block -
ext4_group_first_block_no(sb, group));
BUFFER_TRACE(bh, "get_write_access");
- if ((err = ext4_journal_get_write_access(handle, bh)))
+ if ((err = ext4_journal_get_write_access(handle, bh))) {
+ brelse(bh);
break;
+ }
lock_buffer(bh);
memcpy(bh->b_data, data, size);
if (rest)
@@ -2023,7 +2023,7 @@ retry:
err = ext4_alloc_flex_bg_array(sb, n_group + 1);
if (err)
- return err;
+ goto out;
err = ext4_mb_alloc_groupinfo(sb, n_group + 1);
if (err)
@@ -2059,6 +2059,10 @@ retry:
n_blocks_count_retry = 0;
free_flex_gd(flex_gd);
flex_gd = NULL;
+ if (resize_inode) {
+ iput(resize_inode);
+ resize_inode = NULL;
+ }
goto retry;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a221f1cdf704..53ff6c2a26ed 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4075,6 +4075,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_groups_count = blocks_count;
sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
+ if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
+ le32_to_cpu(es->s_inodes_count)) {
+ ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
+ le32_to_cpu(es->s_inodes_count),
+ ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
+ ret = -EINVAL;
+ goto failed_mount;
+ }
db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb);
if (ext4_has_feature_meta_bg(sb)) {
@@ -4094,14 +4102,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ret = -ENOMEM;
goto failed_mount;
}
- if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
- le32_to_cpu(es->s_inodes_count)) {
- ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
- le32_to_cpu(es->s_inodes_count),
- ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
- ret = -EINVAL;
- goto failed_mount;
- }
bgl_lock_init(sbi->s_blockgroup_lock);
@@ -4510,6 +4510,7 @@ failed_mount6:
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
+ percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
failed_mount5:
ext4_ext_release(sb);
ext4_release_system_zone(sb);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index f36fc5d5b257..7643d52c776c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1031,10 +1031,8 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
inode_lock(ea_inode);
ret = ext4_reserve_inode_write(handle, ea_inode, &iloc);
- if (ret) {
- iloc.bh = NULL;
+ if (ret)
goto out;
- }
ref_count = ext4_xattr_inode_get_ref(ea_inode);
ref_count += ref_change;
@@ -1080,12 +1078,10 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
}
ret = ext4_mark_iloc_dirty(handle, ea_inode, &iloc);
- iloc.bh = NULL;
if (ret)
ext4_warning_inode(ea_inode,
"ext4_mark_iloc_dirty() failed ret=%d", ret);
out:
- brelse(iloc.bh);
inode_unlock(ea_inode);
return ret;
}
@@ -1388,6 +1384,12 @@ retry:
bh = ext4_getblk(handle, ea_inode, block, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
+ if (!bh) {
+ WARN_ON_ONCE(1);
+ EXT4_ERROR_INODE(ea_inode,
+ "ext4_getblk() return bh = NULL");
+ return -EFSCORRUPTED;
+ }
ret = ext4_journal_get_write_access(handle, bh);
if (ret)
goto out;
@@ -2276,8 +2278,10 @@ static struct buffer_head *ext4_xattr_get_block(struct inode *inode)
if (!bh)
return ERR_PTR(-EIO);
error = ext4_xattr_check_block(inode, bh);
- if (error)
+ if (error) {
+ brelse(bh);
return ERR_PTR(error);
+ }
return bh;
}
@@ -2397,6 +2401,8 @@ retry_inode:
error = ext4_xattr_block_set(handle, inode, &i, &bs);
} else if (error == -ENOSPC) {
if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
+ brelse(bs.bh);
+ bs.bh = NULL;
error = ext4_xattr_block_find(inode, &i, &bs);
if (error)
goto cleanup;
@@ -2617,6 +2623,8 @@ out:
kfree(buffer);
if (is)
brelse(is->iloc.bh);
+ if (bs)
+ brelse(bs->bh);
kfree(is);
kfree(bs);
@@ -2696,7 +2704,6 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle)
{
struct ext4_xattr_ibody_header *header;
- struct buffer_head *bh;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
static unsigned int mnt_count;
size_t min_offs;
@@ -2737,13 +2744,17 @@ retry:
* EA block can hold new_extra_isize bytes.
*/
if (EXT4_I(inode)->i_file_acl) {
+ struct buffer_head *bh;
+
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
error = -EIO;
if (!bh)
goto cleanup;
error = ext4_xattr_check_block(inode, bh);
- if (error)
+ if (error) {
+ brelse(bh);
goto cleanup;
+ }
base = BHDR(bh);
end = bh->b_data + bh->b_size;
min_offs = end - base;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ae813e609932..a5e516a40e7a 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -165,9 +165,13 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
static void fuse_drop_waiting(struct fuse_conn *fc)
{
- if (fc->connected) {
- atomic_dec(&fc->num_waiting);
- } else if (atomic_dec_and_test(&fc->num_waiting)) {
+ /*
+ * lockess check of fc->connected is okay, because atomic_dec_and_test()
+ * provides a memory barrier mached with the one in fuse_wait_aborted()
+ * to ensure no wake-up is missed.
+ */
+ if (atomic_dec_and_test(&fc->num_waiting) &&
+ !READ_ONCE(fc->connected)) {
/* wake up aborters */
wake_up_all(&fc->blocked_waitq);
}
@@ -1768,8 +1772,10 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
req->in.args[1].size = total_len;
err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
- if (err)
+ if (err) {
fuse_retrieve_end(fc, req);
+ fuse_put_request(fc, req);
+ }
return err;
}
@@ -2219,6 +2225,8 @@ EXPORT_SYMBOL_GPL(fuse_abort_conn);
void fuse_wait_aborted(struct fuse_conn *fc)
{
+ /* matches implicit memory barrier in fuse_drop_waiting() */
+ smp_mb();
wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index cc2121b37bf5..b52f9baaa3e7 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2924,10 +2924,12 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
}
if (io->async) {
+ bool blocking = io->blocking;
+
fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
/* we have a non-extending, async request, so return */
- if (!io->blocking)
+ if (!blocking)
return -EIOCBQUEUED;
wait_for_completion(&wait);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index a683d9b27d76..9a4a15d646eb 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -826,7 +826,7 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
ret = gfs2_meta_inode_buffer(ip, &dibh);
if (ret)
goto unlock;
- iomap->private = dibh;
+ mp->mp_bh[0] = dibh;
if (gfs2_is_stuffed(ip)) {
if (flags & IOMAP_WRITE) {
@@ -863,9 +863,6 @@ unstuff:
len = lblock_stop - lblock + 1;
iomap->length = len << inode->i_blkbits;
- get_bh(dibh);
- mp->mp_bh[0] = dibh;
-
height = ip->i_height;
while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
height++;
@@ -898,8 +895,6 @@ out:
iomap->bdev = inode->i_sb->s_bdev;
unlock:
up_read(&ip->i_rw_mutex);
- if (ret && dibh)
- brelse(dibh);
return ret;
do_alloc:
@@ -980,9 +975,9 @@ static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos,
static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
loff_t length, unsigned flags,
- struct iomap *iomap)
+ struct iomap *iomap,
+ struct metapath *mp)
{
- struct metapath mp = { .mp_aheight = 1, };
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
@@ -996,9 +991,9 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
unstuff = gfs2_is_stuffed(ip) &&
pos + length > gfs2_max_stuffed_size(ip);
- ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+ ret = gfs2_iomap_get(inode, pos, length, flags, iomap, mp);
if (ret)
- goto out_release;
+ goto out_unlock;
alloc_required = unstuff || iomap->type == IOMAP_HOLE;
@@ -1013,7 +1008,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
ret = gfs2_quota_lock_check(ip, &ap);
if (ret)
- goto out_release;
+ goto out_unlock;
ret = gfs2_inplace_reserve(ip, &ap);
if (ret)
@@ -1038,17 +1033,15 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
ret = gfs2_unstuff_dinode(ip, NULL);
if (ret)
goto out_trans_end;
- release_metapath(&mp);
- brelse(iomap->private);
- iomap->private = NULL;
+ release_metapath(mp);
ret = gfs2_iomap_get(inode, iomap->offset, iomap->length,
- flags, iomap, &mp);
+ flags, iomap, mp);
if (ret)
goto out_trans_end;
}
if (iomap->type == IOMAP_HOLE) {
- ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+ ret = gfs2_iomap_alloc(inode, iomap, flags, mp);
if (ret) {
gfs2_trans_end(sdp);
gfs2_inplace_release(ip);
@@ -1056,7 +1049,6 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
goto out_qunlock;
}
}
- release_metapath(&mp);
if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip))
iomap->page_done = gfs2_iomap_journaled_page_done;
return 0;
@@ -1069,10 +1061,7 @@ out_trans_fail:
out_qunlock:
if (alloc_required)
gfs2_quota_unlock(ip);
-out_release:
- if (iomap->private)
- brelse(iomap->private);
- release_metapath(&mp);
+out_unlock:
gfs2_write_unlock(inode);
return ret;
}
@@ -1088,10 +1077,10 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
trace_gfs2_iomap_start(ip, pos, length, flags);
if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
- ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap);
+ ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
} else {
ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
- release_metapath(&mp);
+
/*
* Silently fall back to buffered I/O for stuffed files or if
* we've hot a hole (see gfs2_file_direct_write).
@@ -1100,6 +1089,11 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
iomap->type != IOMAP_MAPPED)
ret = -ENOTBLK;
}
+ if (!ret) {
+ get_bh(mp.mp_bh[0]);
+ iomap->private = mp.mp_bh[0];
+ }
+ release_metapath(&mp);
trace_gfs2_iomap_end(ip, iomap, ret);
return ret;
}
@@ -1908,10 +1902,16 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
if (ret < 0)
goto out;
- /* issue read-ahead on metadata */
- if (mp.mp_aheight > 1) {
- for (; ret > 1; ret--) {
- metapointer_range(&mp, mp.mp_aheight - ret,
+ /* On the first pass, issue read-ahead on metadata. */
+ if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
+ unsigned int height = mp.mp_aheight - 1;
+
+ /* No read-ahead for data blocks. */
+ if (mp.mp_aheight - 1 == strip_h)
+ height--;
+
+ for (; height >= mp.mp_aheight - ret; height--) {
+ metapointer_range(&mp, height,
start_list, start_aligned,
end_list, end_aligned,
&start, &end);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ffe3032b1043..b08a530433ad 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -733,6 +733,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
if (gl) {
glock_clear_object(gl, rgd);
+ gfs2_rgrp_brelse(rgd);
gfs2_glock_put(gl);
}
@@ -1174,7 +1175,7 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd)
* @rgd: the struct gfs2_rgrpd describing the RG to read in
*
* Read in all of a Resource Group's header and bitmap blocks.
- * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
+ * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps.
*
* Returns: errno
*/
diff --git a/fs/inode.c b/fs/inode.c
index 9e198f00b64c..35d2108d567c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -730,8 +730,11 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
return LRU_REMOVED;
}
- /* recently referenced inodes get one more pass */
- if (inode->i_state & I_REFERENCED) {
+ /*
+ * Recently referenced inodes and inodes with many attached pages
+ * get one more pass.
+ */
+ if (inode->i_state & I_REFERENCED || inode->i_data.nrpages > 1) {
inode->i_state &= ~I_REFERENCED;
spin_unlock(&inode->i_lock);
return LRU_ROTATE;
diff --git a/fs/namespace.c b/fs/namespace.c
index 98d27da43304..a7f91265ea67 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -695,9 +695,6 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
hlist_for_each_entry(mp, chain, m_hash) {
if (mp->m_dentry == dentry) {
- /* might be worth a WARN_ON() */
- if (d_unlinked(dentry))
- return ERR_PTR(-ENOENT);
mp->m_count++;
return mp;
}
@@ -711,6 +708,9 @@ static struct mountpoint *get_mountpoint(struct dentry *dentry)
int ret;
if (d_mountpoint(dentry)) {
+ /* might be worth a WARN_ON() */
+ if (d_unlinked(dentry))
+ return ERR_PTR(-ENOENT);
mountpoint:
read_seqlock_excl(&mount_lock);
mp = lookup_mountpoint(dentry);
@@ -1540,8 +1540,13 @@ static int do_umount(struct mount *mnt, int flags)
namespace_lock();
lock_mount_hash();
- event++;
+ /* Recheck MNT_LOCKED with the locks held */
+ retval = -EINVAL;
+ if (mnt->mnt.mnt_flags & MNT_LOCKED)
+ goto out;
+
+ event++;
if (flags & MNT_DETACH) {
if (!list_empty(&mnt->mnt_list))
umount_tree(mnt, UMOUNT_PROPAGATE);
@@ -1555,6 +1560,7 @@ static int do_umount(struct mount *mnt, int flags)
retval = 0;
}
}
+out:
unlock_mount_hash();
namespace_unlock();
return retval;
@@ -1645,7 +1651,7 @@ int ksys_umount(char __user *name, int flags)
goto dput_and_out;
if (!check_mnt(mnt))
goto dput_and_out;
- if (mnt->mnt.mnt_flags & MNT_LOCKED)
+ if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
goto dput_and_out;
retval = -EPERM;
if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
@@ -1728,8 +1734,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
for (s = r; s; s = next_mnt(s, r)) {
if (!(flag & CL_COPY_UNBINDABLE) &&
IS_MNT_UNBINDABLE(s)) {
- s = skip_mnt_tree(s);
- continue;
+ if (s->mnt.mnt_flags & MNT_LOCKED) {
+ /* Both unbindable and locked. */
+ q = ERR_PTR(-EPERM);
+ goto out;
+ } else {
+ s = skip_mnt_tree(s);
+ continue;
+ }
}
if (!(flag & CL_COPY_MNT_NS_FILE) &&
is_mnt_ns_file(s->mnt.mnt_root)) {
@@ -1782,7 +1794,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
{
namespace_lock();
lock_mount_hash();
- umount_tree(real_mount(mnt), UMOUNT_SYNC);
+ umount_tree(real_mount(mnt), 0);
unlock_mount_hash();
namespace_unlock();
}
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index fa515d5ea5ba..7b861bbc0b43 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -66,7 +66,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
out_iput:
rcu_read_unlock();
trace_nfs4_cb_getattr(cps->clp, &args->fh, inode, -ntohl(res->status));
- iput(inode);
+ nfs_iput_and_deactive(inode);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
return res->status;
@@ -108,7 +108,7 @@ __be32 nfs4_callback_recall(void *argp, void *resp,
}
trace_nfs4_cb_recall(cps->clp, &args->fh, inode,
&args->stateid, -ntohl(res));
- iput(inode);
+ nfs_iput_and_deactive(inode);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
return res;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 07b839560576..6ec2f78c1e19 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -850,16 +850,23 @@ nfs_delegation_find_inode_server(struct nfs_server *server,
const struct nfs_fh *fhandle)
{
struct nfs_delegation *delegation;
- struct inode *res = NULL;
+ struct inode *freeme, *res = NULL;
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL &&
nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
- res = igrab(delegation->inode);
+ freeme = igrab(delegation->inode);
+ if (freeme && nfs_sb_active(freeme->i_sb))
+ res = freeme;
spin_unlock(&delegation->lock);
if (res != NULL)
return res;
+ if (freeme) {
+ rcu_read_unlock();
+ iput(freeme);
+ rcu_read_lock();
+ }
return ERR_PTR(-EAGAIN);
}
spin_unlock(&delegation->lock);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 62ae0fd345ad..ffea57885394 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2601,11 +2601,12 @@ static void nfs4_state_manager(struct nfs_client *clp)
nfs4_clear_state_manager_bit(clp);
/* Did we race with an attempt to give us more work? */
if (clp->cl_state == 0)
- break;
+ return;
if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
- break;
- } while (refcount_read(&clp->cl_count) > 1);
- return;
+ return;
+ } while (refcount_read(&clp->cl_count) > 1 && !signalled());
+ goto out_drain;
+
out_error:
if (strlen(section))
section_sep = ": ";
@@ -2613,6 +2614,7 @@ out_error:
" with error %d\n", section_sep, section,
clp->cl_hostname, -status);
ssleep(1);
+out_drain:
nfs4_end_drain_session(clp);
nfs4_clear_state_manager_bit(clp);
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index edff074d38c7..d505990dac7c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1038,6 +1038,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
__be32 status;
+ if (!cstate->save_fh.fh_dentry)
+ return nfserr_nofilehandle;
+
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
src_stateid, RD_STATE, src, NULL);
if (status) {
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 5769cf3ff035..e08a6647267b 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -115,12 +115,12 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
continue;
mark = iter_info->marks[type];
/*
- * if the event is for a child and this inode doesn't care about
- * events on the child, don't send it!
+ * If the event is for a child and this mark doesn't care about
+ * events on a child, don't send it!
*/
- if (type == FSNOTIFY_OBJ_TYPE_INODE &&
- (event_mask & FS_EVENT_ON_CHILD) &&
- !(mark->mask & FS_EVENT_ON_CHILD))
+ if (event_mask & FS_EVENT_ON_CHILD &&
+ (type != FSNOTIFY_OBJ_TYPE_INODE ||
+ !(mark->mask & FS_EVENT_ON_CHILD)))
continue;
marks_mask |= mark->mask;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 2172ba516c61..d2c34900ae05 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -167,9 +167,9 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask
parent = dget_parent(dentry);
p_inode = parent->d_inode;
- if (unlikely(!fsnotify_inode_watches_children(p_inode)))
+ if (unlikely(!fsnotify_inode_watches_children(p_inode))) {
__fsnotify_update_child_dentry_flags(p_inode);
- else if (p_inode->i_fsnotify_mask & mask) {
+ } else if (p_inode->i_fsnotify_mask & mask & ALL_FSNOTIFY_EVENTS) {
struct name_snapshot name;
/* we are notifying a parent so come up with the new mask which
@@ -339,6 +339,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
sb = mnt->mnt.mnt_sb;
mnt_or_sb_mask = mnt->mnt_fsnotify_mask | sb->s_fsnotify_mask;
}
+ /* An event "on child" is not intended for a mount/sb mark */
+ if (mask & FS_EVENT_ON_CHILD)
+ mnt_or_sb_mask = 0;
/*
* Optimization: srcu_read_lock() has a memory barrier which can
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index da578ad4c08f..eb1ce30412dc 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2411,8 +2411,16 @@ static int ocfs2_dio_end_io(struct kiocb *iocb,
/* this io's submitter should not have unlocked this before we could */
BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
- if (bytes > 0 && private)
- ret = ocfs2_dio_end_io_write(inode, private, offset, bytes);
+ if (bytes <= 0)
+ mlog_ratelimited(ML_ERROR, "Direct IO failed, bytes = %lld",
+ (long long)bytes);
+ if (private) {
+ if (bytes > 0)
+ ret = ocfs2_dio_end_io_write(inode, private, offset,
+ bytes);
+ else
+ ocfs2_dio_free_write_ctx(inode, private);
+ }
ocfs2_iocb_clear_rw_locked(iocb);
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 308ea0eb35fd..a396096a5099 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -178,6 +178,15 @@ do { \
##__VA_ARGS__); \
} while (0)
+#define mlog_ratelimited(mask, fmt, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ if (__ratelimit(&_rs)) \
+ mlog(mask, fmt, ##__VA_ARGS__); \
+} while (0)
+
#define mlog_errno(st) ({ \
int _st = (st); \
if (_st != -ERESTARTSYS && _st != -EINTR && \
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 6fc5425b1474..2652d00842d6 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -243,7 +243,7 @@ xfs_attr3_leaf_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_attr_leafblock *leaf = bp->b_addr;
struct xfs_attr_leaf_entry *entries;
- uint16_t end;
+ uint32_t end; /* must be 32bit - see below */
int i;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
@@ -293,6 +293,11 @@ xfs_attr3_leaf_verify(
/*
* Quickly check the freemap information. Attribute data has to be
* aligned to 4-byte boundaries, and likewise for the free space.
+ *
+ * Note that for 64k block size filesystems, the freemap entries cannot
+ * overflow as they are only be16 fields. However, when checking end
+ * pointer of the freemap, we have to be careful to detect overflows and
+ * so use uint32_t for those checks.
*/
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
if (ichdr.freemap[i].base > mp->m_attr_geo->blksize)
@@ -303,7 +308,9 @@ xfs_attr3_leaf_verify(
return __this_address;
if (ichdr.freemap[i].size & 0x3)
return __this_address;
- end = ichdr.freemap[i].base + ichdr.freemap[i].size;
+
+ /* be care of 16 bit overflows here */
+ end = (uint32_t)ichdr.freemap[i].base + ichdr.freemap[i].size;
if (end < ichdr.freemap[i].base)
return __this_address;
if (end > mp->m_attr_geo->blksize)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6e2c08f30f60..6ecdbb3af7de 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1608,7 +1608,7 @@ xfs_ioc_getbmap(
error = 0;
out_free_buf:
kmem_free(buf);
- return 0;
+ return error;
}
struct getfsmap_info {
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 576c375ce12a..6b736ea58d35 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -107,5 +107,5 @@ assfail(char *expr, char *file, int line)
void
xfs_hex_dump(void *p, int length)
{
- print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
+ print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1);
}