summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/fs_probe.c4
-rw-r--r--fs/btrfs/ctree.c36
-rw-r--r--fs/btrfs/ioctl.c23
-rw-r--r--fs/btrfs/qgroup.c9
-rw-r--r--fs/btrfs/send.c24
-rw-r--r--fs/btrfs/sysfs.c7
-rw-r--r--fs/btrfs/tree-log.c59
-rw-r--r--fs/btrfs/zoned.c9
-rw-r--r--fs/ceph/caps.c48
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/snap.c3
-rw-r--r--fs/cifs/cifsfs.c4
-rw-r--r--fs/cifs/connect.c14
-rw-r--r--fs/cifs/ioctl.c4
-rw-r--r--fs/cifs/sess.c4
-rw-r--r--fs/cifs/smb2ops.c4
-rw-r--r--fs/ext4/extents.c18
-rw-r--r--fs/file.c11
-rw-r--r--fs/fs-writeback.c30
-rw-r--r--fs/fscache/volume.c7
-rw-r--r--fs/fuse/file.c37
-rw-r--r--fs/kernfs/dir.c14
-rw-r--r--fs/ksmbd/vfs.c6
-rw-r--r--fs/namei.c3
-rw-r--r--fs/nfsd/trace.h5
-rw-r--r--fs/nfsd/vfs.c11
-rw-r--r--fs/nilfs2/sufile.c8
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/read_write.c19
-rw-r--r--fs/zonefs/super.c60
-rw-r--r--fs/zonefs/sysfs.c5
-rw-r--r--fs/zonefs/zonefs.h6
32 files changed, 333 insertions, 163 deletions
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index c0031a3ab42f..3ac5fcf98d0d 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -167,8 +167,8 @@ responded:
clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
}
- if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
- rtt_us < server->probe.rtt) {
+ rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
+ if (rtt_us < server->probe.rtt) {
server->probe.rtt = rtt_us;
server->rtt = rtt_us;
alist->preferred = index;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a9543f01184c..dcb510f38dda 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -4663,7 +4663,12 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
int ret;
int i;
- ASSERT(!path->nowait);
+ /*
+ * The nowait semantics are used only for write paths, where we don't
+ * use the tree mod log and sequence numbers.
+ */
+ if (time_seq)
+ ASSERT(!path->nowait);
nritems = btrfs_header_nritems(path->nodes[0]);
if (nritems == 0)
@@ -4683,7 +4688,14 @@ again:
if (path->need_commit_sem) {
path->need_commit_sem = 0;
need_commit_sem = true;
- down_read(&fs_info->commit_root_sem);
+ if (path->nowait) {
+ if (!down_read_trylock(&fs_info->commit_root_sem)) {
+ ret = -EAGAIN;
+ goto done;
+ }
+ } else {
+ down_read(&fs_info->commit_root_sem);
+ }
}
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
}
@@ -4759,7 +4771,7 @@ again:
next = c;
ret = read_block_for_search(root, path, &next, level,
slot, &key);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN && !path->nowait)
goto again;
if (ret < 0) {
@@ -4769,6 +4781,10 @@ again:
if (!path->skip_locking) {
ret = btrfs_try_tree_read_lock(next);
+ if (!ret && path->nowait) {
+ ret = -EAGAIN;
+ goto done;
+ }
if (!ret && time_seq) {
/*
* If we don't get the lock, we may be racing
@@ -4799,7 +4815,7 @@ again:
ret = read_block_for_search(root, path, &next, level,
0, &key);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN && !path->nowait)
goto again;
if (ret < 0) {
@@ -4807,8 +4823,16 @@ again:
goto done;
}
- if (!path->skip_locking)
- btrfs_tree_read_lock(next);
+ if (!path->skip_locking) {
+ if (path->nowait) {
+ if (!btrfs_try_tree_read_lock(next)) {
+ ret = -EAGAIN;
+ goto done;
+ }
+ } else {
+ btrfs_tree_read_lock(next);
+ }
+ }
}
ret = 0;
done:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d5dd8bed1488..5ba2e810dc6e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3105,6 +3105,8 @@ static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp)
}
}
+ btrfs_free_path(path);
+ path = NULL;
if (copy_to_user(argp, subvol_info, sizeof(*subvol_info)))
ret = -EFAULT;
@@ -3194,6 +3196,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
}
out:
+ btrfs_free_path(path);
+
if (!ret || ret == -EOVERFLOW) {
rootrefs->num_items = found;
/* update min_treeid for next search */
@@ -3205,7 +3209,6 @@ out:
}
kfree(rootrefs);
- btrfs_free_path(path);
return ret;
}
@@ -4231,6 +4234,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
ipath->fspath->val[i] = rel_ptr;
}
+ btrfs_free_path(path);
+ path = NULL;
ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
ipath->fspath, size);
if (ret) {
@@ -4281,21 +4286,20 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
size = min_t(u32, loi->size, SZ_16M);
}
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
inodes = init_data_container(size);
if (IS_ERR(inodes)) {
ret = PTR_ERR(inodes);
- inodes = NULL;
- goto out;
+ goto out_loi;
}
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
inodes, ignore_offset);
+ btrfs_free_path(path);
if (ret == -EINVAL)
ret = -ENOENT;
if (ret < 0)
@@ -4307,7 +4311,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
ret = -EFAULT;
out:
- btrfs_free_path(path);
kvfree(inodes);
out_loi:
kfree(loi);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 9334c3157c22..b74105a10f16 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2951,14 +2951,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
dstgroup->rsv_excl = inherit->lim.rsv_excl;
- ret = update_qgroup_limit_item(trans, dstgroup);
- if (ret) {
- qgroup_mark_inconsistent(fs_info);
- btrfs_info(fs_info,
- "unable to update quota limit for %llu",
- dstgroup->qgroupid);
- goto unlock;
- }
+ qgroup_dirty(fs_info, dstgroup);
}
if (srcid) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 145c84b44fd0..1c4b693ee4a3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5702,6 +5702,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
u64 ext_len;
u64 clone_len;
u64 clone_data_offset;
+ bool crossed_src_i_size = false;
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(clone_root->root, path);
@@ -5759,8 +5760,10 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
if (key.offset >= clone_src_i_size)
break;
- if (key.offset + ext_len > clone_src_i_size)
+ if (key.offset + ext_len > clone_src_i_size) {
ext_len = clone_src_i_size - key.offset;
+ crossed_src_i_size = true;
+ }
clone_data_offset = btrfs_file_extent_offset(leaf, ei);
if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
@@ -5821,6 +5824,25 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
ret = send_clone(sctx, offset, clone_len,
clone_root);
}
+ } else if (crossed_src_i_size && clone_len < len) {
+ /*
+ * If we are at i_size of the clone source inode and we
+ * can not clone from it, terminate the loop. This is
+ * to avoid sending two write operations, one with a
+ * length matching clone_len and the final one after
+ * this loop with a length of len - clone_len.
+ *
+ * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED
+ * was passed to the send ioctl), this helps avoid
+ * sending an encoded write for an offset that is not
+ * sector size aligned, in case the i_size of the source
+ * inode is not sector size aligned. That will make the
+ * receiver fallback to decompression of the data and
+ * writing it using regular buffered IO, therefore while
+ * not incorrect, it's not optimal due decompression and
+ * possible re-compression at the receiver.
+ */
+ break;
} else {
ret = send_extent_data(sctx, dst_path, offset,
clone_len);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 699b54b3acaa..74fef1f49c35 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -2321,8 +2321,11 @@ int __init btrfs_init_sysfs(void)
#ifdef CONFIG_BTRFS_DEBUG
ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group);
- if (ret)
- goto out2;
+ if (ret) {
+ sysfs_unmerge_group(&btrfs_kset->kobj,
+ &btrfs_static_feature_attr_group);
+ goto out_remove_group;
+ }
#endif
return 0;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 813986e38258..c3cf3dabe0b1 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3694,15 +3694,29 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans,
u64 *last_old_dentry_offset)
{
struct btrfs_root *log = inode->root->log_root;
- struct extent_buffer *src = path->nodes[0];
- const int nritems = btrfs_header_nritems(src);
+ struct extent_buffer *src;
+ const int nritems = btrfs_header_nritems(path->nodes[0]);
const u64 ino = btrfs_ino(inode);
bool last_found = false;
int batch_start = 0;
int batch_size = 0;
int i;
- for (i = path->slots[0]; i < nritems; i++) {
+ /*
+ * We need to clone the leaf, release the read lock on it, and use the
+ * clone before modifying the log tree. See the comment at copy_items()
+ * about why we need to do this.
+ */
+ src = btrfs_clone_extent_buffer(path->nodes[0]);
+ if (!src)
+ return -ENOMEM;
+
+ i = path->slots[0];
+ btrfs_release_path(path);
+ path->nodes[0] = src;
+ path->slots[0] = i;
+
+ for (; i < nritems; i++) {
struct btrfs_dir_item *di;
struct btrfs_key key;
int ret;
@@ -4303,7 +4317,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
{
struct btrfs_root *log = inode->root->log_root;
struct btrfs_file_extent_item *extent;
- struct extent_buffer *src = src_path->nodes[0];
+ struct extent_buffer *src;
int ret = 0;
struct btrfs_key *ins_keys;
u32 *ins_sizes;
@@ -4314,6 +4328,43 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM);
const u64 i_size = i_size_read(&inode->vfs_inode);
+ /*
+ * To keep lockdep happy and avoid deadlocks, clone the source leaf and
+ * use the clone. This is because otherwise we would be changing the log
+ * tree, to insert items from the subvolume tree or insert csum items,
+ * while holding a read lock on a leaf from the subvolume tree, which
+ * creates a nasty lock dependency when COWing log tree nodes/leaves:
+ *
+ * 1) Modifying the log tree triggers an extent buffer allocation while
+ * holding a write lock on a parent extent buffer from the log tree.
+ * Allocating the pages for an extent buffer, or the extent buffer
+ * struct, can trigger inode eviction and finally the inode eviction
+ * will trigger a release/remove of a delayed node, which requires
+ * taking the delayed node's mutex;
+ *
+ * 2) Allocating a metadata extent for a log tree can trigger the async
+ * reclaim thread and make us wait for it to release enough space and
+ * unblock our reservation ticket. The reclaim thread can start
+ * flushing delayed items, and that in turn results in the need to
+ * lock delayed node mutexes and in the need to write lock extent
+ * buffers of a subvolume tree - all this while holding a write lock
+ * on the parent extent buffer in the log tree.
+ *
+ * So one task in scenario 1) running in parallel with another task in
+ * scenario 2) could lead to a deadlock, one wanting to lock a delayed
+ * node mutex while having a read lock on a leaf from the subvolume,
+ * while the other is holding the delayed node's mutex and wants to
+ * write lock the same subvolume leaf for flushing delayed items.
+ */
+ src = btrfs_clone_extent_buffer(src_path->nodes[0]);
+ if (!src)
+ return -ENOMEM;
+
+ i = src_path->slots[0];
+ btrfs_release_path(src_path);
+ src_path->nodes[0] = src;
+ src_path->slots[0] = i;
+
ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
nr * sizeof(u32), GFP_NOFS);
if (!ins_data)
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 1912abf6d020..c9e2b0c85309 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -134,7 +134,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
super[i] = page_address(page[i]);
}
- if (super[0]->generation > super[1]->generation)
+ if (btrfs_super_generation(super[0]) >
+ btrfs_super_generation(super[1]))
sector = zones[1].start;
else
sector = zones[0].start;
@@ -466,7 +467,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
goto out;
}
- zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
+ zones = kvcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
if (!zones) {
ret = -ENOMEM;
goto out;
@@ -585,7 +586,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
}
- kfree(zones);
+ kvfree(zones);
switch (bdev_zoned_model(bdev)) {
case BLK_ZONED_HM:
@@ -617,7 +618,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
return 0;
out:
- kfree(zones);
+ kvfree(zones);
out_free_zone_info:
btrfs_destroy_dev_zone_info(device);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index fb023f9fafcb..e54814d0c2f7 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2248,7 +2248,6 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_request *req1 = NULL, *req2 = NULL;
- unsigned int max_sessions;
int ret, err = 0;
spin_lock(&ci->i_unsafe_lock);
@@ -2267,27 +2266,23 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
spin_unlock(&ci->i_unsafe_lock);
/*
- * The mdsc->max_sessions is unlikely to be changed
- * mostly, here we will retry it by reallocating the
- * sessions array memory to get rid of the mdsc->mutex
- * lock.
- */
-retry:
- max_sessions = mdsc->max_sessions;
-
- /*
* Trigger to flush the journal logs in all the relevant MDSes
* manually, or in the worst case we must wait at most 5 seconds
* to wait the journal logs to be flushed by the MDSes periodically.
*/
- if ((req1 || req2) && likely(max_sessions)) {
- struct ceph_mds_session **sessions = NULL;
- struct ceph_mds_session *s;
+ if (req1 || req2) {
struct ceph_mds_request *req;
+ struct ceph_mds_session **sessions;
+ struct ceph_mds_session *s;
+ unsigned int max_sessions;
int i;
+ mutex_lock(&mdsc->mutex);
+ max_sessions = mdsc->max_sessions;
+
sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
if (!sessions) {
+ mutex_unlock(&mdsc->mutex);
err = -ENOMEM;
goto out;
}
@@ -2299,16 +2294,6 @@ retry:
s = req->r_session;
if (!s)
continue;
- if (unlikely(s->s_mds >= max_sessions)) {
- spin_unlock(&ci->i_unsafe_lock);
- for (i = 0; i < max_sessions; i++) {
- s = sessions[i];
- if (s)
- ceph_put_mds_session(s);
- }
- kfree(sessions);
- goto retry;
- }
if (!sessions[s->s_mds]) {
s = ceph_get_mds_session(s);
sessions[s->s_mds] = s;
@@ -2321,16 +2306,6 @@ retry:
s = req->r_session;
if (!s)
continue;
- if (unlikely(s->s_mds >= max_sessions)) {
- spin_unlock(&ci->i_unsafe_lock);
- for (i = 0; i < max_sessions; i++) {
- s = sessions[i];
- if (s)
- ceph_put_mds_session(s);
- }
- kfree(sessions);
- goto retry;
- }
if (!sessions[s->s_mds]) {
s = ceph_get_mds_session(s);
sessions[s->s_mds] = s;
@@ -2342,11 +2317,12 @@ retry:
/* the auth MDS */
spin_lock(&ci->i_ceph_lock);
if (ci->i_auth_cap) {
- s = ci->i_auth_cap->session;
- if (!sessions[s->s_mds])
- sessions[s->s_mds] = ceph_get_mds_session(s);
+ s = ci->i_auth_cap->session;
+ if (!sessions[s->s_mds])
+ sessions[s->s_mds] = ceph_get_mds_session(s);
}
spin_unlock(&ci->i_ceph_lock);
+ mutex_unlock(&mdsc->mutex);
/* send flush mdlog request to MDSes */
for (i = 0; i < max_sessions; i++) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 4af5e55abc15..bad9eeb6a1a5 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2492,7 +2492,7 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
struct inode *parent;
parent = ceph_lookup_inode(sb, ceph_ino(inode));
- if (!parent)
+ if (IS_ERR(parent))
return PTR_ERR(parent);
pci = ceph_inode(parent);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 864cdaa0d2bd..e4151852184e 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -763,7 +763,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
struct ceph_mds_snap_realm *ri; /* encoded */
__le64 *snaps; /* encoded */
__le64 *prior_parent_snaps; /* encoded */
- struct ceph_snap_realm *realm = NULL;
+ struct ceph_snap_realm *realm;
struct ceph_snap_realm *first_realm = NULL;
struct ceph_snap_realm *realm_to_rebuild = NULL;
int rebuild_snapcs;
@@ -774,6 +774,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
dout("%s deletion=%d\n", __func__, deletion);
more:
+ realm = NULL;
rebuild_snapcs = 0;
ceph_decode_need(&p, e, sizeof(*ri), bad);
ri = p;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index fe220686bba4..712a43161448 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1281,7 +1281,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
rc = filemap_write_and_wait_range(src_inode->i_mapping, off,
off + len - 1);
if (rc)
- goto out;
+ goto unlock;
/* should we flush first and last page first */
truncate_inode_pages(&target_inode->i_data, 0);
@@ -1297,6 +1297,8 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
* that target is updated on the server
*/
CIFS_I(target_inode)->time = 0;
+
+unlock:
/* although unlocking in the reverse order from locking is not
* strictly necessary here it is a little cleaner to be consistent
*/
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 1cc47dd3b4d6..9db9527c61cf 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3855,9 +3855,13 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id);
out:
- free_xid(mnt_ctx.xid);
cifs_try_adding_channels(cifs_sb, mnt_ctx.ses);
- return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
+ rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
+ if (rc)
+ goto error;
+
+ free_xid(mnt_ctx.xid);
+ return rc;
error:
dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id);
@@ -3884,8 +3888,12 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
goto error;
}
+ rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
+ if (rc)
+ goto error;
+
free_xid(mnt_ctx.xid);
- return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
+ return rc;
error:
mount_put_conns(&mnt_ctx);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 89d5fa887364..6419ec47c2a8 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -343,7 +343,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
rc = put_user(ExtAttrBits &
FS_FL_USER_VISIBLE,
(int __user *)arg);
- if (rc != EOPNOTSUPP)
+ if (rc != -EOPNOTSUPP)
break;
}
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
@@ -373,7 +373,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
* pSMBFile->fid.netfid,
* extAttrBits,
* &ExtAttrMask);
- * if (rc != EOPNOTSUPP)
+ * if (rc != -EOPNOTSUPP)
* break;
*/
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 92e4278ec35d..9e7d9f0baa18 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -302,14 +302,14 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
/* now drop the ref to the current iface */
if (old_iface && iface) {
- kref_put(&old_iface->refcount, release_iface);
cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n",
&old_iface->sockaddr,
&iface->sockaddr);
- } else if (old_iface) {
kref_put(&old_iface->refcount, release_iface);
+ } else if (old_iface) {
cifs_dbg(FYI, "releasing ref to iface: %pIS\n",
&old_iface->sockaddr);
+ kref_put(&old_iface->refcount, release_iface);
} else {
WARN_ON(!iface);
cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 880cd494afea..bfaafd02fb1f 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1116,6 +1116,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
COMPOUND_FID, current->tgid,
FILE_FULL_EA_INFORMATION,
SMB2_O_INFO_FILE, 0, data, size);
+ if (rc)
+ goto sea_exit;
smb2_set_next_command(tcon, &rqst[1]);
smb2_set_related(&rqst[1]);
@@ -1126,6 +1128,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
rqst[2].rq_nvec = 1;
rc = SMB2_close_init(tcon, server,
&rqst[2], COMPOUND_FID, COMPOUND_FID, false);
+ if (rc)
+ goto sea_exit;
smb2_set_related(&rqst[2]);
rc = compound_send_recv(xid, ses, server,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f1956288307f..6c399a8b22b3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5184,6 +5184,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
* and it is decreased till we reach start.
*/
again:
+ ret = 0;
if (SHIFT == SHIFT_LEFT)
iterator = &start;
else
@@ -5227,14 +5228,21 @@ again:
ext4_ext_get_actual_len(extent);
} else {
extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
- if (le32_to_cpu(extent->ee_block) > 0)
+ if (le32_to_cpu(extent->ee_block) > start)
*iterator = le32_to_cpu(extent->ee_block) - 1;
- else
- /* Beginning is reached, end of the loop */
+ else if (le32_to_cpu(extent->ee_block) == start)
iterator = NULL;
- /* Update path extent in case we need to stop */
- while (le32_to_cpu(extent->ee_block) < start)
+ else {
+ extent = EXT_LAST_EXTENT(path[depth].p_hdr);
+ while (le32_to_cpu(extent->ee_block) >= start)
+ extent--;
+
+ if (extent == EXT_LAST_EXTENT(path[depth].p_hdr))
+ break;
+
extent++;
+ iterator = NULL;
+ }
path[depth].p_ext = extent;
}
ret = ext4_ext_shift_path_extents(path, shift, inode,
diff --git a/fs/file.c b/fs/file.c
index 5f9c802a5d8d..c942c89ca4cd 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -1003,7 +1003,16 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
struct files_struct *files = current->files;
struct file *file;
- if (atomic_read(&files->count) == 1) {
+ /*
+ * If another thread is concurrently calling close_fd() followed
+ * by put_files_struct(), we must not observe the old table
+ * entry combined with the new refcount - otherwise we could
+ * return a file that is concurrently being freed.
+ *
+ * atomic_read_acquire() pairs with atomic_dec_and_test() in
+ * put_files_struct().
+ */
+ if (atomic_read_acquire(&files->count) == 1) {
file = files_lookup_fd_raw(files, fd);
if (!file || unlikely(file->f_mode & mask))
return 0;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 443f83382b9b..9958d4020771 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1712,18 +1712,26 @@ static int writeback_single_inode(struct inode *inode,
wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
/*
- * If the inode is now fully clean, then it can be safely removed from
- * its writeback list (if any). Otherwise the flusher threads are
- * responsible for the writeback lists.
+ * If the inode is freeing, its i_io_list shoudn't be updated
+ * as it can be finally deleted at this moment.
*/
- if (!(inode->i_state & I_DIRTY_ALL))
- inode_cgwb_move_to_attached(inode, wb);
- else if (!(inode->i_state & I_SYNC_QUEUED)) {
- if ((inode->i_state & I_DIRTY))
- redirty_tail_locked(inode, wb);
- else if (inode->i_state & I_DIRTY_TIME) {
- inode->dirtied_when = jiffies;
- inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
+ if (!(inode->i_state & I_FREEING)) {
+ /*
+ * If the inode is now fully clean, then it can be safely
+ * removed from its writeback list (if any). Otherwise the
+ * flusher threads are responsible for the writeback lists.
+ */
+ if (!(inode->i_state & I_DIRTY_ALL))
+ inode_cgwb_move_to_attached(inode, wb);
+ else if (!(inode->i_state & I_SYNC_QUEUED)) {
+ if ((inode->i_state & I_DIRTY))
+ redirty_tail_locked(inode, wb);
+ else if (inode->i_state & I_DIRTY_TIME) {
+ inode->dirtied_when = jiffies;
+ inode_io_list_move_locked(inode,
+ wb,
+ &wb->b_dirty_time);
+ }
}
}
diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c
index a058e0136bfe..ab8ceddf9efa 100644
--- a/fs/fscache/volume.c
+++ b/fs/fscache/volume.c
@@ -203,7 +203,11 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
struct fscache_volume *volume;
struct fscache_cache *cache;
size_t klen, hlen;
- char *key;
+ u8 *key;
+
+ klen = strlen(volume_key);
+ if (klen > NAME_MAX)
+ return NULL;
if (!coherency_data)
coherency_len = 0;
@@ -229,7 +233,6 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
/* Stick the length on the front of the key and pad it out to make
* hashing easier.
*/
- klen = strlen(volume_key);
hlen = round_up(1 + klen + 1, sizeof(__le32));
key = kzalloc(hlen, GFP_KERNEL);
if (!key)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 71bfb663aac5..89f4741728ba 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2963,11 +2963,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
.mode = mode
};
int err;
- bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
- (mode & (FALLOC_FL_PUNCH_HOLE |
- FALLOC_FL_ZERO_RANGE));
-
- bool block_faults = FUSE_IS_DAX(inode) && lock_inode;
+ bool block_faults = FUSE_IS_DAX(inode) &&
+ (!(mode & FALLOC_FL_KEEP_SIZE) ||
+ (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)));
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_ZERO_RANGE))
@@ -2976,22 +2974,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (fm->fc->no_fallocate)
return -EOPNOTSUPP;
- if (lock_inode) {
- inode_lock(inode);
- if (block_faults) {
- filemap_invalidate_lock(inode->i_mapping);
- err = fuse_dax_break_layouts(inode, 0, 0);
- if (err)
- goto out;
- }
+ inode_lock(inode);
+ if (block_faults) {
+ filemap_invalidate_lock(inode->i_mapping);
+ err = fuse_dax_break_layouts(inode, 0, 0);
+ if (err)
+ goto out;
+ }
- if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) {
- loff_t endbyte = offset + length - 1;
+ if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) {
+ loff_t endbyte = offset + length - 1;
- err = fuse_writeback_range(inode, offset, endbyte);
- if (err)
- goto out;
- }
+ err = fuse_writeback_range(inode, offset, endbyte);
+ if (err)
+ goto out;
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
@@ -3039,8 +3035,7 @@ out:
if (block_faults)
filemap_invalidate_unlock(inode->i_mapping);
- if (lock_inode)
- inode_unlock(inode);
+ inode_unlock(inode);
fuse_flush_time_update(inode);
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 3990f3e270cb..f33b3baad07c 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -31,10 +31,15 @@ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
+static bool __kernfs_active(struct kernfs_node *kn)
+{
+ return atomic_read(&kn->active) >= 0;
+}
+
static bool kernfs_active(struct kernfs_node *kn)
{
lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem);
- return atomic_read(&kn->active) >= 0;
+ return __kernfs_active(kn);
}
static bool kernfs_lockdep(struct kernfs_node *kn)
@@ -705,7 +710,12 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
goto err_unlock;
}
- if (unlikely(!kernfs_active(kn) || !atomic_inc_not_zero(&kn->count)))
+ /*
+ * We should fail if @kn has never been activated and guarantee success
+ * if the caller knows that @kn is active. Both can be achieved by
+ * __kernfs_active() which tests @kn->active without kernfs_rwsem.
+ */
+ if (unlikely(!__kernfs_active(kn) || !atomic_inc_not_zero(&kn->count)))
goto err_unlock;
spin_unlock(&kernfs_idr_lock);
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index 8de970d6146f..94b8ed4ef870 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -1794,9 +1794,9 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
ret = vfs_copy_file_range(src_fp->filp, src_off,
dst_fp->filp, dst_off, len, 0);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
- ret = generic_copy_file_range(src_fp->filp, src_off,
- dst_fp->filp, dst_off,
- len, 0);
+ ret = vfs_copy_file_range(src_fp->filp, src_off,
+ dst_fp->filp, dst_off, len,
+ COPY_FILE_SPLICE);
if (ret < 0)
return ret;
diff --git a/fs/namei.c b/fs/namei.c
index 578c2110df02..9155ecb547ce 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3591,6 +3591,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns,
struct inode *dir = d_inode(parentpath->dentry);
struct inode *inode;
int error;
+ int open_flag = file->f_flags;
/* we want directory to be writable */
error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
@@ -3613,7 +3614,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns,
if (error)
return error;
inode = file_inode(file);
- if (!(file->f_flags & O_EXCL)) {
+ if (!(open_flag & O_EXCL)) {
spin_lock(&inode->i_lock);
inode->i_state |= I_LINKABLE;
spin_unlock(&inode->i_lock);
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 06a96e955bd0..d4b6839bb459 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -254,7 +254,10 @@ TRACE_EVENT_CONDITION(nfsd_fh_verify_err,
rqstp->rq_xprt->xpt_remotelen);
__entry->xid = be32_to_cpu(rqstp->rq_xid);
__entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
- __entry->inode = d_inode(fhp->fh_dentry);
+ if (fhp->fh_dentry)
+ __entry->inode = d_inode(fhp->fh_dentry);
+ else
+ __entry->inode = NULL;
__entry->type = type;
__entry->access = access;
__entry->error = be32_to_cpu(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index f650afedd67f..849a720ab43f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -596,8 +596,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
- ret = generic_copy_file_range(src, src_pos, dst, dst_pos,
- count, 0);
+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
+ COPY_FILE_SPLICE);
return ret;
}
@@ -871,10 +871,11 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct svc_rqst *rqstp = sd->u.data;
struct page *page = buf->page; // may be a compound one
unsigned offset = buf->offset;
+ struct page *last_page;
- page += offset / PAGE_SIZE;
- for (int i = sd->len; i > 0; i -= PAGE_SIZE)
- svc_rqst_replace_page(rqstp, page++);
+ last_page = page + (offset + sd->len - 1) / PAGE_SIZE;
+ for (page += offset / PAGE_SIZE; page <= last_page; page++)
+ svc_rqst_replace_page(rqstp, page);
if (rqstp->rq_res.page_len == 0) // first call
rqstp->rq_res.page_base = offset % PAGE_SIZE;
rqstp->rq_res.page_len += sd->len;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 77ff8e95421f..dc359b56fdfa 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -495,14 +495,22 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
{
struct buffer_head *bh;
+ void *kaddr;
+ struct nilfs_segment_usage *su;
int ret;
+ down_write(&NILFS_MDT(sufile)->mi_sem);
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
if (!ret) {
mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
+ kaddr = kmap_atomic(bh->b_page);
+ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+ nilfs_segment_usage_set_dirty(su);
+ kunmap_atomic(kaddr);
brelse(bh);
}
+ up_write(&NILFS_MDT(sufile)->mi_sem);
return ret;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 5101131e6047..440960110a42 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -115,7 +115,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#endif
show_val_kb(m, "PageTables: ",
global_node_page_state(NR_PAGETABLE));
- show_val_kb(m, "SecPageTables: ",
+ show_val_kb(m, "SecPageTables: ",
global_node_page_state(NR_SECONDARY_PAGETABLE));
show_val_kb(m, "NFS_Unstable: ", 0);
diff --git a/fs/read_write.c b/fs/read_write.c
index 328ce8cf9a85..24b9668d6377 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1388,6 +1388,8 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags)
{
+ lockdep_assert(sb_write_started(file_inode(file_out)->i_sb));
+
return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
}
@@ -1424,7 +1426,9 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
* and several different sets of file_operations, but they all end up
* using the same ->copy_file_range() function pointer.
*/
- if (file_out->f_op->copy_file_range) {
+ if (flags & COPY_FILE_SPLICE) {
+ /* cross sb splice is allowed */
+ } else if (file_out->f_op->copy_file_range) {
if (file_in->f_op->copy_file_range !=
file_out->f_op->copy_file_range)
return -EXDEV;
@@ -1474,8 +1478,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
size_t len, unsigned int flags)
{
ssize_t ret;
+ bool splice = flags & COPY_FILE_SPLICE;
- if (flags != 0)
+ if (flags & ~COPY_FILE_SPLICE)
return -EINVAL;
ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
@@ -1501,14 +1506,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
* same sb using clone, but for filesystems where both clone and copy
* are supported (e.g. nfs,cifs), we only call the copy method.
*/
- if (file_out->f_op->copy_file_range) {
+ if (!splice && file_out->f_op->copy_file_range) {
ret = file_out->f_op->copy_file_range(file_in, pos_in,
file_out, pos_out,
len, flags);
goto done;
}
- if (file_in->f_op->remap_file_range &&
+ if (!splice && file_in->f_op->remap_file_range &&
file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
ret = file_in->f_op->remap_file_range(file_in, pos_in,
file_out, pos_out,
@@ -1528,6 +1533,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
* consistent story about which filesystems support copy_file_range()
* and which filesystems do not, that will allow userspace tools to
* make consistent desicions w.r.t using copy_file_range().
+ *
+ * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE.
*/
ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
flags);
@@ -1582,6 +1589,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
pos_out = f_out.file->f_pos;
}
+ ret = -EINVAL;
+ if (flags != 0)
+ goto out;
+
ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
flags);
if (ret > 0) {
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 860f0b1032c6..2c53fbb8d918 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -41,6 +41,13 @@ static void zonefs_account_active(struct inode *inode)
return;
/*
+ * For zones that transitioned to the offline or readonly condition,
+ * we only need to clear the active state.
+ */
+ if (zi->i_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY))
+ goto out;
+
+ /*
* If the zone is active, that is, if it is explicitly open or
* partially written, check if it was already accounted as active.
*/
@@ -53,6 +60,7 @@ static void zonefs_account_active(struct inode *inode)
return;
}
+out:
/* The zone is not active. If it was, update the active count */
if (zi->i_flags & ZONEFS_ZONE_ACTIVE) {
zi->i_flags &= ~ZONEFS_ZONE_ACTIVE;
@@ -324,6 +332,7 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
inode->i_flags |= S_IMMUTABLE;
inode->i_mode &= ~0777;
zone->wp = zone->start;
+ zi->i_flags |= ZONEFS_ZONE_OFFLINE;
return 0;
case BLK_ZONE_COND_READONLY:
/*
@@ -342,8 +351,10 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
zone->cond = BLK_ZONE_COND_OFFLINE;
inode->i_mode &= ~0777;
zone->wp = zone->start;
+ zi->i_flags |= ZONEFS_ZONE_OFFLINE;
return 0;
}
+ zi->i_flags |= ZONEFS_ZONE_READONLY;
inode->i_mode &= ~0222;
return i_size_read(inode);
case BLK_ZONE_COND_FULL:
@@ -478,8 +489,7 @@ static void __zonefs_io_error(struct inode *inode, bool write)
struct super_block *sb = inode->i_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
unsigned int noio_flag;
- unsigned int nr_zones =
- zi->i_zone_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
+ unsigned int nr_zones = 1;
struct zonefs_ioerr_data err = {
.inode = inode,
.write = write,
@@ -487,6 +497,15 @@ static void __zonefs_io_error(struct inode *inode, bool write)
int ret;
/*
+ * The only files that have more than one zone are conventional zone
+ * files with aggregated conventional zones, for which the inode zone
+ * size is always larger than the device zone size.
+ */
+ if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev))
+ nr_zones = zi->i_zone_size >>
+ (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
+
+ /*
* Memory allocations in blkdev_report_zones() can trigger a memory
* reclaim which may in turn cause a recursion into zonefs as well as
* struct request allocations for the same device. The former case may
@@ -1407,6 +1426,14 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
zi->i_ztype = type;
zi->i_zsector = zone->start;
zi->i_zone_size = zone->len << SECTOR_SHIFT;
+ if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
+ !(sbi->s_features & ZONEFS_F_AGGRCNV)) {
+ zonefs_err(sb,
+ "zone size %llu doesn't match device's zone sectors %llu\n",
+ zi->i_zone_size,
+ bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT);
+ return -EINVAL;
+ }
zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE,
zone->capacity << SECTOR_SHIFT);
@@ -1456,11 +1483,11 @@ static struct dentry *zonefs_create_inode(struct dentry *parent,
struct inode *dir = d_inode(parent);
struct dentry *dentry;
struct inode *inode;
- int ret;
+ int ret = -ENOMEM;
dentry = d_alloc_name(parent, name);
if (!dentry)
- return NULL;
+ return ERR_PTR(ret);
inode = new_inode(parent->d_sb);
if (!inode)
@@ -1485,7 +1512,7 @@ static struct dentry *zonefs_create_inode(struct dentry *parent,
dput:
dput(dentry);
- return NULL;
+ return ERR_PTR(ret);
}
struct zonefs_zone_data {
@@ -1505,7 +1532,7 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
struct blk_zone *zone, *next, *end;
const char *zgroup_name;
char *file_name;
- struct dentry *dir;
+ struct dentry *dir, *dent;
unsigned int n = 0;
int ret;
@@ -1523,8 +1550,8 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
zgroup_name = "seq";
dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type);
- if (!dir) {
- ret = -ENOMEM;
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
goto free;
}
@@ -1570,8 +1597,9 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
* Use the file number within its group as file name.
*/
snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n);
- if (!zonefs_create_inode(dir, file_name, zone, type)) {
- ret = -ENOMEM;
+ dent = zonefs_create_inode(dir, file_name, zone, type);
+ if (IS_ERR(dent)) {
+ ret = PTR_ERR(dent);
goto free;
}
@@ -1905,18 +1933,18 @@ static int __init zonefs_init(void)
if (ret)
return ret;
- ret = register_filesystem(&zonefs_type);
+ ret = zonefs_sysfs_init();
if (ret)
goto destroy_inodecache;
- ret = zonefs_sysfs_init();
+ ret = register_filesystem(&zonefs_type);
if (ret)
- goto unregister_fs;
+ goto sysfs_exit;
return 0;
-unregister_fs:
- unregister_filesystem(&zonefs_type);
+sysfs_exit:
+ zonefs_sysfs_exit();
destroy_inodecache:
zonefs_destroy_inodecache();
@@ -1925,9 +1953,9 @@ destroy_inodecache:
static void __exit zonefs_exit(void)
{
+ unregister_filesystem(&zonefs_type);
zonefs_sysfs_exit();
zonefs_destroy_inodecache();
- unregister_filesystem(&zonefs_type);
}
MODULE_AUTHOR("Damien Le Moal");
diff --git a/fs/zonefs/sysfs.c b/fs/zonefs/sysfs.c
index 9cb6755ce39a..9920689dc098 100644
--- a/fs/zonefs/sysfs.c
+++ b/fs/zonefs/sysfs.c
@@ -15,11 +15,6 @@ struct zonefs_sysfs_attr {
ssize_t (*show)(struct zonefs_sb_info *sbi, char *buf);
};
-static inline struct zonefs_sysfs_attr *to_attr(struct attribute *attr)
-{
- return container_of(attr, struct zonefs_sysfs_attr, attr);
-}
-
#define ZONEFS_SYSFS_ATTR_RO(name) \
static struct zonefs_sysfs_attr zonefs_sysfs_attr_##name = __ATTR_RO(name)
diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
index 4b3de66c3233..1dbe78119ff1 100644
--- a/fs/zonefs/zonefs.h
+++ b/fs/zonefs/zonefs.h
@@ -39,8 +39,10 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone)
return ZONEFS_ZTYPE_SEQ;
}
-#define ZONEFS_ZONE_OPEN (1 << 0)
-#define ZONEFS_ZONE_ACTIVE (1 << 1)
+#define ZONEFS_ZONE_OPEN (1U << 0)
+#define ZONEFS_ZONE_ACTIVE (1U << 1)
+#define ZONEFS_ZONE_OFFLINE (1U << 2)
+#define ZONEFS_ZONE_READONLY (1U << 3)
/*
* In-memory inode data.