summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c10
-rw-r--r--fs/afs/rotate.c21
-rw-r--r--fs/afs/validation.c16
-rw-r--r--fs/bcachefs/alloc_background.c15
-rw-r--r--fs/bcachefs/alloc_foreground.c10
-rw-r--r--fs/bcachefs/bcachefs.h2
-rw-r--r--fs/bcachefs/btree_gc.c2
-rw-r--r--fs/bcachefs/btree_io.c12
-rw-r--r--fs/bcachefs/btree_key_cache.c2
-rw-r--r--fs/bcachefs/btree_update_interior.c44
-rw-r--r--fs/bcachefs/btree_update_interior.h1
-rw-r--r--fs/bcachefs/btree_write_buffer.c2
-rw-r--r--fs/bcachefs/buckets.c6
-rw-r--r--fs/bcachefs/debug.c2
-rw-r--r--fs/bcachefs/ec.c6
-rw-r--r--fs/bcachefs/error.h4
-rw-r--r--fs/bcachefs/extents.h6
-rw-r--r--fs/bcachefs/fs.c3
-rw-r--r--fs/bcachefs/fsck.c33
-rw-r--r--fs/bcachefs/journal.c12
-rw-r--r--fs/bcachefs/journal_io.c15
-rw-r--r--fs/bcachefs/logged_ops.c4
-rw-r--r--fs/bcachefs/movinggc.c3
-rw-r--r--fs/bcachefs/recovery.c6
-rw-r--r--fs/bcachefs/recovery_types.h2
-rw-r--r--fs/bcachefs/snapshot.c32
-rw-r--r--fs/bcachefs/super-io.c8
-rw-r--r--fs/bcachefs/super.c33
-rw-r--r--fs/bcachefs/util.h3
-rw-r--r--fs/btrfs/volumes.c58
-rw-r--r--fs/ceph/caps.c8
-rw-r--r--fs/ceph/file.c31
-rw-r--r--fs/debugfs/inode.c25
-rw-r--r--fs/dlm/dlm_internal.h2
-rw-r--r--fs/dlm/lock.c108
-rw-r--r--fs/dlm/user.c10
-rw-r--r--fs/exfat/dir.c290
-rw-r--r--fs/exfat/exfat_fs.h25
-rw-r--r--fs/exfat/inode.c2
-rw-r--r--fs/exfat/namei.c352
-rw-r--r--fs/f2fs/checkpoint.c74
-rw-r--r--fs/f2fs/compress.c55
-rw-r--r--fs/f2fs/data.c191
-rw-r--r--fs/f2fs/debug.c11
-rw-r--r--fs/f2fs/dir.c10
-rw-r--r--fs/f2fs/extent_cache.c5
-rw-r--r--fs/f2fs/f2fs.h241
-rw-r--r--fs/f2fs/file.c171
-rw-r--r--fs/f2fs/gc.c129
-rw-r--r--fs/f2fs/gc.h4
-rw-r--r--fs/f2fs/namei.c36
-rw-r--r--fs/f2fs/node.c26
-rw-r--r--fs/f2fs/node.h4
-rw-r--r--fs/f2fs/recovery.c56
-rw-r--r--fs/f2fs/segment.c444
-rw-r--r--fs/f2fs/segment.h90
-rw-r--r--fs/f2fs/super.c208
-rw-r--r--fs/f2fs/sysfs.c52
-rw-r--r--fs/f2fs/verity.c16
-rw-r--r--fs/kernfs/dir.c31
-rw-r--r--fs/kernfs/file.c8
-rw-r--r--fs/kernfs/kernfs-internal.h2
-rw-r--r--fs/netfs/fscache_io.c4
-rw-r--r--fs/nfsd/trace.h10
-rw-r--r--fs/orangefs/orangefs-cache.c2
-rw-r--r--fs/orangefs/orangefs-kernel.h10
-rw-r--r--fs/orangefs/super.c4
-rw-r--r--fs/overlayfs/copy_up.c6
-rw-r--r--fs/smb/client/cached_dir.c3
-rw-r--r--fs/smb/client/cifsfs.c4
-rw-r--r--fs/smb/client/cifsglob.h5
-rw-r--r--fs/smb/client/connect.c9
-rw-r--r--fs/smb/client/file.c20
-rw-r--r--fs/smb/client/inode.c26
-rw-r--r--fs/smb/client/misc.c3
-rw-r--r--fs/smb/client/sess.c4
-rw-r--r--fs/smb/client/smb2ops.c2
-rw-r--r--fs/smb/client/smb2pdu.c5
-rw-r--r--fs/smb/common/smb2pdu.h12
-rw-r--r--fs/smb/server/glob.h2
-rw-r--r--fs/smb/server/ksmbd_netlink.h1
-rw-r--r--fs/smb/server/mgmt/user_session.c28
-rw-r--r--fs/smb/server/mgmt/user_session.h3
-rw-r--r--fs/smb/server/oplock.c96
-rw-r--r--fs/smb/server/oplock.h7
-rw-r--r--fs/smb/server/server.c1
-rw-r--r--fs/smb/server/smb2misc.c26
-rw-r--r--fs/smb/server/smb2ops.c6
-rw-r--r--fs/smb/server/smb2pdu.c509
-rw-r--r--fs/smb/server/smb2pdu.h15
-rw-r--r--fs/smb/server/smb_common.c11
-rw-r--r--fs/smb/server/vfs.c12
-rw-r--r--fs/smb/server/vfs_cache.c137
-rw-r--r--fs/smb/server/vfs_cache.h9
-rw-r--r--fs/super.c18
-rw-r--r--fs/sysfs/group.c55
-rw-r--r--fs/tracefs/event_inode.c70
-rw-r--r--fs/tracefs/internal.h2
-rw-r--r--fs/ubifs/debug.c9
-rw-r--r--fs/ubifs/dir.c2
-rw-r--r--fs/ubifs/file.c443
-rw-r--r--fs/ubifs/find.c32
-rw-r--r--fs/ubifs/journal.c171
-rw-r--r--fs/ubifs/lprops.c6
-rw-r--r--fs/ubifs/lpt_commit.c1
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/ubifs/tnc.c9
-rw-r--r--fs/ubifs/tnc_misc.c22
-rw-r--r--fs/ubifs/ubifs.h5
-rw-r--r--fs/xfs/xfs_buf_mem.c4
-rw-r--r--fs/xfs/xfs_dquot.c18
-rw-r--r--fs/xfs/xfs_trace.h9
112 files changed, 3014 insertions, 1946 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 8a67fc427e74..67afe68972d5 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -474,16 +474,6 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
continue;
}
- /* Don't expose silly rename entries to userspace. */
- if (nlen > 6 &&
- dire->u.name[0] == '.' &&
- ctx->actor != afs_lookup_filldir &&
- ctx->actor != afs_lookup_one_filldir &&
- memcmp(dire->u.name, ".__afs", 6) == 0) {
- ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
- continue;
- }
-
/* found the next entry */
if (!dir_emit(ctx, dire->u.name, nlen,
ntohl(dire->u.vnode),
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 700a27bc8c25..ed04bd1eeae8 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -602,6 +602,8 @@ iterate_address:
goto wait_for_more_probe_results;
alist = op->estate->addresses;
+ best_prio = -1;
+ addr_index = 0;
for (i = 0; i < alist->nr_addrs; i++) {
if (alist->addrs[i].prio > best_prio) {
addr_index = i;
@@ -609,9 +611,7 @@ iterate_address:
}
}
- addr_index = READ_ONCE(alist->preferred);
- if (!test_bit(addr_index, &set))
- addr_index = __ffs(set);
+ alist->preferred = addr_index;
op->addr_index = addr_index;
set_bit(addr_index, &op->addr_tried);
@@ -656,12 +656,6 @@ wait_for_more_probe_results:
next_server:
trace_afs_rotate(op, afs_rotate_trace_next_server, 0);
_debug("next");
- ASSERT(op->estate);
- alist = op->estate->addresses;
- if (op->call_responded &&
- op->addr_index != READ_ONCE(alist->preferred) &&
- test_bit(alist->preferred, &op->addr_tried))
- WRITE_ONCE(alist->preferred, op->addr_index);
op->estate = NULL;
goto pick_server;
@@ -690,14 +684,7 @@ no_more_servers:
failed:
trace_afs_rotate(op, afs_rotate_trace_failed, 0);
op->flags |= AFS_OPERATION_STOP;
- if (op->estate) {
- alist = op->estate->addresses;
- if (op->call_responded &&
- op->addr_index != READ_ONCE(alist->preferred) &&
- test_bit(alist->preferred, &op->addr_tried))
- WRITE_ONCE(alist->preferred, op->addr_index);
- op->estate = NULL;
- }
+ op->estate = NULL;
_leave(" = f [failed %d]", afs_op_error(op));
return false;
}
diff --git a/fs/afs/validation.c b/fs/afs/validation.c
index 46b37f2cce7d..32a53fc8dfb2 100644
--- a/fs/afs/validation.c
+++ b/fs/afs/validation.c
@@ -122,6 +122,9 @@ bool afs_check_validity(const struct afs_vnode *vnode)
const struct afs_volume *volume = vnode->volume;
time64_t deadline = ktime_get_real_seconds() + 10;
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+ return true;
+
if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break) ||
atomic64_read(&vnode->cb_expires_at) <= deadline ||
volume->cb_expires_at <= deadline ||
@@ -389,12 +392,17 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
key_serial(key));
if (afs_check_validity(vnode))
- return 0;
+ return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? -ESTALE : 0;
ret = down_write_killable(&vnode->validate_lock);
if (ret < 0)
goto error;
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+ ret = -ESTALE;
+ goto error_unlock;
+ }
+
/* Validate a volume after the v_break has changed or the volume
* callback expired. We only want to do this once per volume per
* v_break change. The actual work will be done when parsing the
@@ -448,12 +456,6 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
vnode->cb_ro_snapshot = cb_ro_snapshot;
vnode->cb_scrub = cb_scrub;
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
- _debug("file already deleted");
- ret = -ESTALE;
- goto error_unlock;
- }
-
/* if the vnode's data version number changed then its contents are
* different */
zap |= test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index c47f72f2bd58..893e38f9db80 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -532,13 +532,13 @@ int bch2_bucket_gens_init(struct bch_fs *c)
u8 gen = bch2_alloc_to_v4(k, &a)->gen;
unsigned offset;
struct bpos pos = alloc_gens_pos(iter.pos, &offset);
+ int ret2 = 0;
if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
- ret = commit_do(trans, NULL, NULL,
- BCH_TRANS_COMMIT_no_enospc,
- bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
- if (ret)
- break;
+ ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+ if (ret2)
+ goto iter_err;
have_bucket_gens_key = false;
}
@@ -549,7 +549,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
}
g.v.gens[offset] = gen;
- 0;
+iter_err:
+ ret2;
}));
if (have_bucket_gens_key && !ret)
@@ -852,7 +853,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
bucket_journal_seq);
if (ret) {
bch2_fs_fatal_error(c,
- "error setting bucket_needs_journal_commit: %i", ret);
+ "setting bucket_needs_journal_commit: %s", bch2_err_str(ret));
return ret;
}
}
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index ca58193dd902..214b15c84d1f 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1356,15 +1356,17 @@ retry:
/* Don't retry from all devices if we're out of open buckets: */
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) {
- int ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
+ int ret2 = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, watermark,
flags, cl);
- if (!ret ||
- bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
- bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
+ if (!ret2 ||
+ bch2_err_matches(ret2, BCH_ERR_transaction_restart) ||
+ bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) {
+ ret = ret2;
goto alloc_done;
+ }
}
/*
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 339dc3e1dcd3..799aa32b6b4d 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -849,6 +849,8 @@ struct bch_fs {
struct workqueue_struct *btree_interior_update_worker;
struct work_struct btree_interior_update_work;
+ struct workqueue_struct *btree_node_rewrite_worker;
+
struct list_head pending_node_rewrites;
struct mutex pending_node_rewrites_lock;
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 584aee7010de..bdaed29f084a 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -1392,11 +1392,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
*old,
b->data_type);
gc = *b;
- percpu_up_read(&c->mark_lock);
if (gc.data_type != old_gc.data_type ||
gc.dirty_sectors != old_gc.dirty_sectors)
bch2_dev_usage_update_m(c, ca, &old_gc, &gc);
+ percpu_up_read(&c->mark_lock);
if (metadata_only &&
gc.data_type != BCH_DATA_sb &&
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 624c8287deb4..34df8ccc5fec 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1066,7 +1066,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
ret = bset_encrypt(c, i, b->written << 9);
if (bch2_fs_fatal_err_on(ret, c,
- "error decrypting btree node: %i", ret))
+ "decrypting btree node: %s", bch2_err_str(ret)))
goto fsck_err;
btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
@@ -1107,7 +1107,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
ret = bset_encrypt(c, i, b->written << 9);
if (bch2_fs_fatal_err_on(ret, c,
- "error decrypting btree node: %i\n", ret))
+ "decrypting btree node: %s", bch2_err_str(ret)))
goto fsck_err;
sectors = vstruct_sectors(bne, c->block_bits);
@@ -1338,7 +1338,7 @@ start:
if (saw_error && !btree_node_read_error(b)) {
printbuf_reset(&buf);
bch2_bpos_to_text(&buf, b->key.k.p);
- bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
+ bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
__func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf);
bch2_btree_node_rewrite_async(c, b);
@@ -1874,8 +1874,8 @@ out:
return;
err:
set_btree_node_noevict(b);
- if (!bch2_err_matches(ret, EROFS))
- bch2_fs_fatal_error(c, "fatal error writing btree node: %s", bch2_err_str(ret));
+ bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c,
+ "writing btree node: %s", bch2_err_str(ret));
goto out;
}
@@ -2131,7 +2131,7 @@ do_write:
ret = bset_encrypt(c, i, b->written << 9);
if (bch2_fs_fatal_err_on(ret, c,
- "error encrypting btree node: %i\n", ret))
+ "encrypting btree node: %s", bch2_err_str(ret)))
goto err;
nonce = btree_nonce(i, b->written << 9);
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 8a71d43444b9..581edcb0911b 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -676,7 +676,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
!bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
!bch2_journal_error(j), c,
- "error flushing key cache: %s", bch2_err_str(ret));
+ "flushing key cache: %s", bch2_err_str(ret));
if (ret)
goto out;
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 642213ef9f79..b2f5f2e50f7e 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -646,7 +646,7 @@ static void btree_update_nodes_written(struct btree_update *as)
bch2_trans_unlock(trans);
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
- "%s(): error %s", __func__, bch2_err_str(ret));
+ "%s", bch2_err_str(ret));
err:
if (as->b) {
@@ -1067,13 +1067,18 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
flags &= ~BCH_WATERMARK_MASK;
flags |= watermark;
- if (!(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
- watermark < c->journal.watermark) {
+ if (watermark < c->journal.watermark) {
struct journal_res res = { 0 };
+ unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
+
+ if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
+ watermark != BCH_WATERMARK_reclaim)
+ journal_flags |= JOURNAL_RES_GET_NONBLOCK;
ret = drop_locks_do(trans,
- bch2_journal_res_get(&c->journal, &res, 1,
- watermark|JOURNAL_RES_GET_CHECK));
+ bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
+ ret = -BCH_ERR_journal_reclaim_would_deadlock;
if (ret)
return ERR_PTR(ret);
}
@@ -1117,6 +1122,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
closure_init(&as->cl, NULL);
as->c = c;
as->start_time = start_time;
+ as->ip_started = _RET_IP_;
as->mode = BTREE_INTERIOR_NO_UPDATE;
as->took_gc_lock = true;
as->btree_id = path->btree_id;
@@ -1192,7 +1198,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
err:
bch2_btree_update_free(as, trans);
if (!bch2_err_matches(ret, ENOSPC) &&
- !bch2_err_matches(ret, EROFS))
+ !bch2_err_matches(ret, EROFS) &&
+ ret != -BCH_ERR_journal_reclaim_would_deadlock)
bch_err_fn_ratelimited(c, ret);
return ERR_PTR(ret);
}
@@ -2114,7 +2121,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
ret = bch2_trans_do(c, NULL, NULL, 0,
async_btree_node_rewrite_trans(trans, a));
- bch_err_fn(c, ret);
+ bch_err_fn_ratelimited(c, ret);
bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
kfree(a);
}
@@ -2161,7 +2168,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
}
- queue_work(c->btree_interior_update_worker, &a->work);
+ queue_work(c->btree_node_rewrite_worker, &a->work);
}
void bch2_do_pending_node_rewrites(struct bch_fs *c)
@@ -2173,7 +2180,7 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c)
list_del(&a->list);
bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
- queue_work(c->btree_interior_update_worker, &a->work);
+ queue_work(c->btree_node_rewrite_worker, &a->work);
}
mutex_unlock(&c->pending_node_rewrites_lock);
}
@@ -2441,12 +2448,12 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
mutex_lock(&c->btree_interior_update_lock);
list_for_each_entry(as, &c->btree_interior_update_list, list)
- prt_printf(out, "%p m %u w %u r %u j %llu\n",
- as,
- as->mode,
- as->nodes_written,
- closure_nr_remaining(&as->cl),
- as->journal.seq);
+ prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
+ (void *) as->ip_started,
+ as->mode,
+ as->nodes_written,
+ closure_nr_remaining(&as->cl),
+ as->journal.seq);
mutex_unlock(&c->btree_interior_update_lock);
}
@@ -2510,6 +2517,8 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c,
void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
{
+ if (c->btree_node_rewrite_worker)
+ destroy_workqueue(c->btree_node_rewrite_worker);
if (c->btree_interior_update_worker)
destroy_workqueue(c->btree_interior_update_worker);
mempool_exit(&c->btree_interior_update_pool);
@@ -2534,6 +2543,11 @@ int bch2_fs_btree_interior_update_init(struct bch_fs *c)
if (!c->btree_interior_update_worker)
return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
+ c->btree_node_rewrite_worker =
+ alloc_ordered_workqueue("btree_node_rewrite", WQ_UNBOUND);
+ if (!c->btree_node_rewrite_worker)
+ return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
+
if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
sizeof(struct btree_update)))
return -BCH_ERR_ENOMEM_btree_interior_update_pool_init;
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index 3439b03719c7..f651dd48aaa0 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -32,6 +32,7 @@ struct btree_update {
struct closure cl;
struct bch_fs *c;
u64 start_time;
+ unsigned long ip_started;
struct list_head list;
struct list_head unwritten_list;
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index b77e7b382b66..5cbad8445782 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -378,7 +378,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
}
}
err:
- bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
+ bch2_fs_fatal_err_on(ret, c, "%s", bch2_err_str(ret));
trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0);
bch2_journal_pin_drop(j, &wb->flushing.pin);
wb->flushing.keys.nr = 0;
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index c2f46b267b3a..96edf2c34d43 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -990,8 +990,8 @@ static int __trigger_extent(struct btree_trans *trans,
ret = !gc
? bch2_update_cached_sectors_list(trans, p.ptr.dev, disk_sectors)
: update_cached_sectors(c, k, p.ptr.dev, disk_sectors, 0, true);
- bch2_fs_fatal_err_on(ret && gc, c, "%s(): no replicas entry while updating cached sectors",
- __func__);
+ bch2_fs_fatal_err_on(ret && gc, c, "%s: no replicas entry while updating cached sectors",
+ bch2_err_str(ret));
if (ret)
return ret;
}
@@ -1020,7 +1020,7 @@ static int __trigger_extent(struct btree_trans *trans,
struct printbuf buf = PRINTBUF;
bch2_bkey_val_to_text(&buf, c, k);
- bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf);
+ bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf);
printbuf_exit(&buf);
}
if (ret)
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index b1f147e6be4d..208ce6f0fc43 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -170,7 +170,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
struct printbuf buf = PRINTBUF;
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
- bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf.buf);
+ bch2_fs_fatal_error(c, ": btree node verify failed for: %s\n", buf.buf);
printbuf_exit(&buf);
}
out:
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index b98e2c2b8bf0..082075244e16 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -448,7 +448,7 @@ int bch2_trigger_stripe(struct btree_trans *trans,
struct printbuf buf = PRINTBUF;
bch2_bkey_val_to_text(&buf, c, new);
- bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf);
+ bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf);
printbuf_exit(&buf);
return ret;
}
@@ -1868,10 +1868,10 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
return -BCH_ERR_stripe_alloc_blocked;
ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
+ bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c,
+ "reading stripe key: %s", bch2_err_str(ret));
if (ret) {
bch2_stripe_close(c, h->s);
- if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
- bch2_fs_fatal_error(c, "error reading stripe key: %s", bch2_err_str(ret));
return ret;
}
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 94491190e09e..ae1d6674c512 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -191,9 +191,9 @@ do { \
void bch2_fatal_error(struct bch_fs *);
-#define bch2_fs_fatal_error(c, ...) \
+#define bch2_fs_fatal_error(c, _msg, ...) \
do { \
- bch_err(c, __VA_ARGS__); \
+ bch_err(c, "%s(): fatal error " _msg, __func__, ##__VA_ARGS__); \
bch2_fatal_error(c); \
} while (0)
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 6219f2c08e4c..fd2669cdd76f 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -108,17 +108,17 @@ static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *en
static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
{
- return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
+ return __extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
}
static inline bool extent_entry_is_stripe_ptr(const union bch_extent_entry *e)
{
- return extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr;
+ return __extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr;
}
static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
{
- switch (extent_entry_type(e)) {
+ switch (__extent_entry_type(e)) {
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128:
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 3f073845bbd7..0ccee05f6887 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -108,7 +108,8 @@ retry:
goto retry;
bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
- "inode %u:%llu not found when updating",
+ "%s: inode %u:%llu not found when updating",
+ bch2_err_str(ret),
inode_inum(inode).subvol,
inode_inum(inode).inum);
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index f48033be3f6b..47d4eefaba7b 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -1114,10 +1114,9 @@ int bch2_check_inodes(struct bch_fs *c)
return ret;
}
-static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
+static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w)
{
struct bch_fs *c = trans->c;
- u32 restart_count = trans->restart_count;
int ret = 0;
s64 count2;
@@ -1149,7 +1148,14 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
}
fsck_err:
bch_err_fn(c, ret);
- return ret ?: trans_was_restarted(trans, restart_count);
+ return ret;
+}
+
+static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
+{
+ u32 restart_count = trans->restart_count;
+ return check_i_sectors_notnested(trans, w) ?:
+ trans_was_restarted(trans, restart_count);
}
struct extent_end {
@@ -1533,7 +1539,7 @@ int bch2_check_extents(struct bch_fs *c)
check_extent(trans, &iter, k, &w, &s, &extent_ends) ?:
check_extent_overbig(trans, &iter, k);
})) ?:
- check_i_sectors(trans, &w));
+ check_i_sectors_notnested(trans, &w));
bch2_disk_reservation_put(c, &res);
extent_ends_exit(&extent_ends);
@@ -1563,10 +1569,9 @@ int bch2_check_indirect_extents(struct bch_fs *c)
return ret;
}
-static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
+static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w)
{
struct bch_fs *c = trans->c;
- u32 restart_count = trans->restart_count;
int ret = 0;
s64 count2;
@@ -1598,7 +1603,14 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
}
fsck_err:
bch_err_fn(c, ret);
- return ret ?: trans_was_restarted(trans, restart_count);
+ return ret;
+}
+
+static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
+{
+ u32 restart_count = trans->restart_count;
+ return check_subdir_count_notnested(trans, w) ?:
+ trans_was_restarted(trans, restart_count);
}
static int check_dirent_inode_dirent(struct btree_trans *trans,
@@ -2003,7 +2015,8 @@ int bch2_check_dirents(struct bch_fs *c)
k,
NULL, NULL,
BCH_TRANS_COMMIT_no_enospc,
- check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)));
+ check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?:
+ check_subdir_count_notnested(trans, &dir));
snapshots_seen_exit(&s);
inode_walker_exit(&dir);
@@ -2022,8 +2035,10 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
int ret;
ret = check_key_has_snapshot(trans, iter, k);
- if (ret)
+ if (ret < 0)
return ret;
+ if (ret)
+ return 0;
i = walk_inode(trans, inode, k);
ret = PTR_ERR_OR_ZERO(i);
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index f314b2e78ec3..9c9a25dbd613 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -511,18 +511,18 @@ retry:
if (journal_res_get_fast(j, res, flags))
return 0;
+ if (bch2_journal_error(j))
+ return -BCH_ERR_erofs_journal_err;
+
+ if (j->blocked)
+ return -BCH_ERR_journal_res_get_blocked;
+
if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
ret = JOURNAL_ERR_journal_full;
can_discard = j->can_discard;
goto out;
}
- if (j->blocked)
- return -BCH_ERR_journal_res_get_blocked;
-
- if (bch2_journal_error(j))
- return -BCH_ERR_erofs_journal_err;
-
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) && !journal_entry_is_open(j)) {
ret = JOURNAL_ERR_max_in_flight;
goto out;
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index d76c3c0c203f..725fcf46f631 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1082,9 +1082,7 @@ reread:
ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
j->encrypted_start,
vstruct_end(j) - (void *) j->encrypted_start);
- bch2_fs_fatal_err_on(ret, c,
- "error decrypting journal entry: %s",
- bch2_err_str(ret));
+ bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret));
mutex_lock(&jlist->lock);
ret = journal_entry_add(c, ca, (struct journal_ptr) {
@@ -1820,7 +1818,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
jset_entry_for_each_key(i, k) {
ret = bch2_journal_key_to_wb(c, &wb, i->btree_id, k);
if (ret) {
- bch2_fs_fatal_error(c, "-ENOMEM flushing journal keys to btree write buffer");
+ bch2_fs_fatal_error(c, "flushing journal keys to btree write buffer: %s",
+ bch2_err_str(ret));
bch2_journal_keys_to_write_buffer_end(c, &wb);
return ret;
}
@@ -1848,7 +1847,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
bch2_journal_super_entries_add_common(c, &end, seq);
u64s = (u64 *) end - (u64 *) start;
- BUG_ON(u64s > j->entry_u64s_reserved);
+
+ WARN_ON(u64s > j->entry_u64s_reserved);
le32_add_cpu(&jset->u64s, u64s);
@@ -1856,7 +1856,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
bytes = vstruct_bytes(jset);
if (sectors > w->sectors) {
- bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
+ bch2_fs_fatal_error(c, ": journal write overran available space, %zu > %u (extra %u reserved %u/%u)",
vstruct_bytes(jset), w->sectors << 9,
u64s, w->u64s_reserved, j->entry_u64s_reserved);
return -EINVAL;
@@ -1884,8 +1884,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
jset->encrypted_start,
vstruct_end(jset) - (void *) jset->encrypted_start);
- if (bch2_fs_fatal_err_on(ret, c,
- "error decrypting journal entry: %i", ret))
+ if (bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret)))
return ret;
jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c
index ad598105c587..9fac838d123e 100644
--- a/fs/bcachefs/logged_ops.c
+++ b/fs/bcachefs/logged_ops.c
@@ -101,8 +101,8 @@ void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k)
struct printbuf buf = PRINTBUF;
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
- bch2_fs_fatal_error(c, "%s: error deleting logged operation %s: %s",
- __func__, buf.buf, bch2_err_str(ret));
+ bch2_fs_fatal_error(c, "deleting logged operation %s: %s",
+ buf.buf, bch2_err_str(ret));
printbuf_exit(&buf);
}
}
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 69e06a84dad4..0d2b82d8d11f 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -155,8 +155,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
if (bch2_err_matches(ret, EROFS))
return ret;
- if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_tryflush()",
- __func__, bch2_err_str(ret)))
+ if (bch2_fs_fatal_err_on(ret, c, "%s: from bch2_btree_write_buffer_tryflush()", bch2_err_str(ret)))
return ret;
ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru,
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 2af219aedfdb..03f9d6afe467 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -90,10 +90,12 @@ static void do_reconstruct_alloc(struct bch_fs *c)
struct journal_keys *keys = &c->journal_keys;
size_t src, dst;
+ move_gap(keys, keys->nr);
+
for (src = 0, dst = 0; src < keys->nr; src++)
if (!btree_id_is_alloc(keys->data[src].btree_id))
keys->data[dst++] = keys->data[src];
- keys->nr = dst;
+ keys->nr = keys->gap = dst;
}
/*
@@ -203,6 +205,8 @@ static int bch2_journal_replay(struct bch_fs *c)
BUG_ON(!atomic_read(&keys->ref));
+ move_gap(keys, keys->nr);
+
/*
* First, attempt to replay keys in sorted order. This is more
* efficient - better locality of btree access - but some might fail if
diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h
index 1361e34d4e64..4959e95e7c74 100644
--- a/fs/bcachefs/recovery_types.h
+++ b/fs/bcachefs/recovery_types.h
@@ -13,11 +13,11 @@
* must never change:
*/
#define BCH_RECOVERY_PASSES() \
+ x(check_topology, 4, 0) \
x(alloc_read, 0, PASS_ALWAYS) \
x(stripes_read, 1, PASS_ALWAYS) \
x(initialize_subvolumes, 2, 0) \
x(snapshots_read, 3, PASS_ALWAYS) \
- x(check_topology, 4, 0) \
x(check_allocations, 5, PASS_FSCK) \
x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \
x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index ac6ba04d5521..39debe814bf3 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -91,18 +91,20 @@ static int bch2_snapshot_tree_create(struct btree_trans *trans,
/* Snapshot nodes: */
-static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
+static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor)
{
- struct snapshot_table *t;
-
- rcu_read_lock();
- t = rcu_dereference(c->snapshots);
-
while (id && id < ancestor)
id = __snapshot_t(t, id)->parent;
+ return id == ancestor;
+}
+
+static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor)
+{
+ rcu_read_lock();
+ bool ret = __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor);
rcu_read_unlock();
- return id == ancestor;
+ return ret;
}
static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
@@ -120,13 +122,15 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances
bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
- struct snapshot_table *t;
bool ret;
- EBUG_ON(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots);
-
rcu_read_lock();
- t = rcu_dereference(c->snapshots);
+ struct snapshot_table *t = rcu_dereference(c->snapshots);
+
+ if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) {
+ ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
+ goto out;
+ }
while (id && id < ancestor - IS_ANCESTOR_BITMAP)
id = get_ancestor_below(t, id, ancestor);
@@ -134,11 +138,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
if (id && id < ancestor) {
ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
- EBUG_ON(ret != bch2_snapshot_is_ancestor_early(c, id, ancestor));
+ EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
} else {
ret = id == ancestor;
}
-
+out:
rcu_read_unlock();
return ret;
@@ -547,7 +551,7 @@ static int check_snapshot_tree(struct btree_trans *trans,
"snapshot tree points to missing subvolume:\n %s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
- fsck_err_on(!bch2_snapshot_is_ancestor_early(c,
+ fsck_err_on(!bch2_snapshot_is_ancestor(c,
le32_to_cpu(subvol.snapshot),
root_id),
c, snapshot_tree_to_wrong_subvol,
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index bceac29f3d86..ad28e370b640 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -985,7 +985,7 @@ int bch2_write_super(struct bch_fs *c)
prt_str(&buf, " > ");
bch2_version_to_text(&buf, bcachefs_metadata_version_current);
prt_str(&buf, ")");
- bch2_fs_fatal_error(c, "%s", buf.buf);
+ bch2_fs_fatal_error(c, ": %s", buf.buf);
printbuf_exit(&buf);
return -BCH_ERR_sb_not_downgraded;
}
@@ -1005,7 +1005,7 @@ int bch2_write_super(struct bch_fs *c)
if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) {
bch2_fs_fatal_error(c,
- "Superblock write was silently dropped! (seq %llu expected %llu)",
+ ": Superblock write was silently dropped! (seq %llu expected %llu)",
le64_to_cpu(ca->sb_read_scratch->seq),
ca->disk_sb.seq);
percpu_ref_put(&ca->io_ref);
@@ -1015,7 +1015,7 @@ int bch2_write_super(struct bch_fs *c)
if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) {
bch2_fs_fatal_error(c,
- "Superblock modified by another process (seq %llu expected %llu)",
+ ": Superblock modified by another process (seq %llu expected %llu)",
le64_to_cpu(ca->sb_read_scratch->seq),
ca->disk_sb.seq);
percpu_ref_put(&ca->io_ref);
@@ -1066,7 +1066,7 @@ int bch2_write_super(struct bch_fs *c)
!can_mount_with_written ||
(can_mount_without_written &&
!can_mount_with_written), c,
- "Unable to write superblock to sufficient devices (from %ps)",
+ ": Unable to write superblock to sufficient devices (from %ps)",
(void *) _RET_IP_))
ret = -1;
out:
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 233f864ed8b0..1ad6e5cd9476 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -87,20 +87,28 @@ const char * const bch2_fs_flag_strs[] = {
NULL
};
-void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...)
+__printf(2, 0)
+static void bch2_print_maybe_redirect(struct stdio_redirect *stdio, const char *fmt, va_list args)
{
- struct stdio_redirect *stdio = (void *)(unsigned long)opts->stdio;
-
- va_list args;
- va_start(args, fmt);
- if (likely(!stdio)) {
- vprintk(fmt, args);
- } else {
+#ifdef __KERNEL__
+ if (unlikely(stdio)) {
if (fmt[0] == KERN_SOH[0])
fmt += 2;
bch2_stdio_redirect_vprintf(stdio, true, fmt, args);
+ return;
}
+#endif
+ vprintk(fmt, args);
+}
+
+void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...)
+{
+ struct stdio_redirect *stdio = (void *)(unsigned long)opts->stdio;
+
+ va_list args;
+ va_start(args, fmt);
+ bch2_print_maybe_redirect(stdio, fmt, args);
va_end(args);
}
@@ -110,14 +118,7 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...)
va_list args;
va_start(args, fmt);
- if (likely(!stdio)) {
- vprintk(fmt, args);
- } else {
- if (fmt[0] == KERN_SOH[0])
- fmt += 2;
-
- bch2_stdio_redirect_vprintf(stdio, true, fmt, args);
- }
+ bch2_print_maybe_redirect(stdio, fmt, args);
va_end(args);
}
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 7ffbddb80400..175aee3074c7 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -683,6 +683,9 @@ static inline void __move_gap(void *array, size_t element_size,
/* Move the gap in a gap buffer: */
#define move_gap(_d, _new_gap) \
do { \
+ BUG_ON(_new_gap > (_d)->nr); \
+ BUG_ON((_d)->gap > (_d)->nr); \
+ \
__move_gap((_d)->data, sizeof((_d)->data[0]), \
(_d)->nr, (_d)->size, (_d)->gap, _new_gap); \
(_d)->gap = _new_gap; \
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a2d07fa3cfdf..1dc1f1946ae0 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1303,6 +1303,47 @@ int btrfs_forget_devices(dev_t devt)
return ret;
}
+static bool btrfs_skip_registration(struct btrfs_super_block *disk_super,
+ const char *path, dev_t devt,
+ bool mount_arg_dev)
+{
+ struct btrfs_fs_devices *fs_devices;
+
+ /*
+ * Do not skip device registration for mounted devices with matching
+ * maj:min but different paths. Booting without initrd relies on
+ * /dev/root initially, later replaced with the actual root device.
+ * A successful scan ensures grub2-probe selects the correct device.
+ */
+ list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
+ struct btrfs_device *device;
+
+ mutex_lock(&fs_devices->device_list_mutex);
+
+ if (!fs_devices->opened) {
+ mutex_unlock(&fs_devices->device_list_mutex);
+ continue;
+ }
+
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ if (device->bdev && (device->bdev->bd_dev == devt) &&
+ strcmp(device->name->str, path) != 0) {
+ mutex_unlock(&fs_devices->device_list_mutex);
+
+ /* Do not skip registration. */
+ return false;
+ }
+ }
+ mutex_unlock(&fs_devices->device_list_mutex);
+ }
+
+ if (!mount_arg_dev && btrfs_super_num_devices(disk_super) == 1 &&
+ !(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING))
+ return true;
+
+ return false;
+}
+
/*
* Look for a btrfs signature on a device. This may be called out of the mount path
* and we are not allowed to call set_blocksize during the scan. The superblock
@@ -1320,6 +1361,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
struct btrfs_device *device = NULL;
struct file *bdev_file;
u64 bytenr, bytenr_orig;
+ dev_t devt;
int ret;
lockdep_assert_held(&uuid_mutex);
@@ -1359,19 +1401,13 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
goto error_bdev_put;
}
- if (!mount_arg_dev && btrfs_super_num_devices(disk_super) == 1 &&
- !(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING)) {
- dev_t devt;
+ devt = file_bdev(bdev_file)->bd_dev;
+ if (btrfs_skip_registration(disk_super, path, devt, mount_arg_dev)) {
+ pr_debug("BTRFS: skip registering single non-seed device %s (%d:%d)\n",
+ path, MAJOR(devt), MINOR(devt));
- ret = lookup_bdev(path, &devt);
- if (ret)
- btrfs_warn(NULL, "lookup bdev failed for path %s: %d",
- path, ret);
- else
- btrfs_free_stale_devices(devt, NULL);
+ btrfs_free_stale_devices(devt, NULL);
- pr_debug("BTRFS: skip registering single non-seed device %s (%d:%d)\n",
- path, MAJOR(devt), MINOR(devt));
device = NULL;
goto free_disk_super;
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7fb4aae97412..55051ad09c19 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4634,6 +4634,14 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
iput(inode);
spin_lock(&mdsc->cap_delay_lock);
}
+
+ /*
+ * Make sure too many dirty caps or general
+ * slowness doesn't block mdsc delayed work,
+ * preventing send_renew_caps() from running.
+ */
+ if (jiffies - loop_start >= 5 * HZ)
+ break;
}
spin_unlock(&mdsc->cap_delay_lock);
doutc(cl, "done\n");
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index abe8028d95bf..16873d07692f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1138,7 +1138,12 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
}
idx = 0;
- left = ret > 0 ? ret : 0;
+ if (ret <= 0)
+ left = 0;
+ else if (off + ret > i_size)
+ left = i_size - off;
+ else
+ left = ret;
while (left > 0) {
size_t plen, copied;
@@ -1167,15 +1172,13 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
}
if (ret > 0) {
- if (off > *ki_pos) {
- if (off >= i_size) {
- *retry_op = CHECK_EOF;
- ret = i_size - *ki_pos;
- *ki_pos = i_size;
- } else {
- ret = off - *ki_pos;
- *ki_pos = off;
- }
+ if (off >= i_size) {
+ *retry_op = CHECK_EOF;
+ ret = i_size - *ki_pos;
+ *ki_pos = i_size;
+ } else {
+ ret = off - *ki_pos;
+ *ki_pos = off;
}
if (last_objver)
@@ -2126,14 +2129,16 @@ again:
int statret;
struct page *page = NULL;
loff_t i_size;
+ int mask = CEPH_STAT_CAP_SIZE;
if (retry_op == READ_INLINE) {
page = __page_cache_alloc(GFP_KERNEL);
if (!page)
return -ENOMEM;
+
+ mask = CEPH_STAT_CAP_INLINE_DATA;
}
- statret = __ceph_do_getattr(inode, page,
- CEPH_STAT_CAP_INLINE_DATA, !!page);
+ statret = __ceph_do_getattr(inode, page, mask, !!page);
if (statret < 0) {
if (page)
__free_page(page);
@@ -2174,7 +2179,7 @@ again:
/* hit EOF or hole? */
if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
ret < len) {
- doutc(cl, "hit hole, ppos %lld < size %lld, reading more\n",
+ doutc(cl, "may hit hole, ppos %lld < size %lld, reading more\n",
iocb->ki_pos, i_size);
read += ret;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 034a617cb1a5..a40da0065433 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -751,13 +751,28 @@ static void __debugfs_file_removed(struct dentry *dentry)
if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
return;
- /* if we hit zero, just wait for all to finish */
- if (!refcount_dec_and_test(&fsd->active_users)) {
- wait_for_completion(&fsd->active_users_drained);
+ /* if this was the last reference, we're done */
+ if (refcount_dec_and_test(&fsd->active_users))
return;
- }
- /* if we didn't hit zero, try to cancel any we can */
+ /*
+ * If there's still a reference, the code that obtained it can
+ * be in different states:
+ * - The common case of not using cancellations, or already
+ * after debugfs_leave_cancellation(), where we just need
+ * to wait for debugfs_file_put() which signals the completion;
+ * - inside a cancellation section, i.e. between
+ * debugfs_enter_cancellation() and debugfs_leave_cancellation(),
+ * in which case we need to trigger the ->cancel() function,
+ * and then wait for debugfs_file_put() just like in the
+ * previous case;
+ * - before debugfs_enter_cancellation() (but obviously after
+ * debugfs_file_get()), in which case we may not see the
+ * cancellation in the list on the first round of the loop,
+ * but debugfs_enter_cancellation() signals the completion
+ * after adding it, so this code gets woken up to call the
+ * ->cancel() function.
+ */
while (refcount_read(&fsd->active_users)) {
struct debugfs_cancellation *c;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index dfc444dad329..3b4dbce849f0 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -246,7 +246,7 @@ struct dlm_lkb {
int8_t lkb_highbast; /* highest mode bast sent for */
int8_t lkb_wait_type; /* type of reply waiting for */
- atomic_t lkb_wait_count;
+ int8_t lkb_wait_count;
int lkb_wait_nodeid; /* for debugging */
struct list_head lkb_statequeue; /* rsb g/c/w list */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 652c51fbbf76..fd752dd03896 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1407,7 +1407,6 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
int error = 0;
- int wc;
mutex_lock(&ls->ls_waiters_mutex);
@@ -1429,17 +1428,20 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
error = -EBUSY;
goto out;
}
- wc = atomic_inc_return(&lkb->lkb_wait_count);
+ lkb->lkb_wait_count++;
hold_lkb(lkb);
log_debug(ls, "addwait %x cur %d overlap %d count %d f %x",
- lkb->lkb_id, lkb->lkb_wait_type, mstype, wc,
- dlm_iflags_val(lkb));
+ lkb->lkb_id, lkb->lkb_wait_type, mstype,
+ lkb->lkb_wait_count, dlm_iflags_val(lkb));
goto out;
}
- wc = atomic_fetch_inc(&lkb->lkb_wait_count);
- DLM_ASSERT(!wc, dlm_print_lkb(lkb); printk("wait_count %d\n", wc););
+ DLM_ASSERT(!lkb->lkb_wait_count,
+ dlm_print_lkb(lkb);
+ printk("wait_count %d\n", lkb->lkb_wait_count););
+
+ lkb->lkb_wait_count++;
lkb->lkb_wait_type = mstype;
lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
hold_lkb(lkb);
@@ -1502,7 +1504,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
log_debug(ls, "remwait %x convert_reply zap overlap_cancel",
lkb->lkb_id);
lkb->lkb_wait_type = 0;
- atomic_dec(&lkb->lkb_wait_count);
+ lkb->lkb_wait_count--;
unhold_lkb(lkb);
goto out_del;
}
@@ -1529,15 +1531,16 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
if (overlap_done && lkb->lkb_wait_type) {
log_error(ls, "remwait error %x reply %d wait_type %d overlap",
lkb->lkb_id, mstype, lkb->lkb_wait_type);
- atomic_dec(&lkb->lkb_wait_count);
+ lkb->lkb_wait_count--;
unhold_lkb(lkb);
lkb->lkb_wait_type = 0;
}
- DLM_ASSERT(atomic_read(&lkb->lkb_wait_count), dlm_print_lkb(lkb););
+ DLM_ASSERT(lkb->lkb_wait_count, dlm_print_lkb(lkb););
clear_bit(DLM_IFL_RESEND_BIT, &lkb->lkb_iflags);
- if (atomic_dec_and_test(&lkb->lkb_wait_count))
+ lkb->lkb_wait_count--;
+ if (!lkb->lkb_wait_count)
list_del_init(&lkb->lkb_wait_reply);
unhold_lkb(lkb);
return 0;
@@ -2666,7 +2669,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
goto out;
/* lock not allowed if there's any op in progress */
- if (lkb->lkb_wait_type || atomic_read(&lkb->lkb_wait_count))
+ if (lkb->lkb_wait_type || lkb->lkb_wait_count)
goto out;
if (is_overlap(lkb))
@@ -2728,7 +2731,7 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
/* normal unlock not allowed if there's any op in progress */
if (!(args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) &&
- (lkb->lkb_wait_type || atomic_read(&lkb->lkb_wait_count)))
+ (lkb->lkb_wait_type || lkb->lkb_wait_count))
goto out;
/* an lkb may be waiting for an rsb lookup to complete where the
@@ -5011,21 +5014,32 @@ static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
return lkb;
}
-/* Deal with lookups and lkb's marked RESEND from _pre. We may now be the
- master or dir-node for r. Processing the lkb may result in it being placed
- back on waiters. */
-
-/* We do this after normal locking has been enabled and any saved messages
- (in requestqueue) have been processed. We should be confident that at
- this point we won't get or process a reply to any of these waiting
- operations. But, new ops may be coming in on the rsbs/locks here from
- userspace or remotely. */
-
-/* there may have been an overlap unlock/cancel prior to recovery or after
- recovery. if before, the lkb may still have a pos wait_count; if after, the
- overlap flag would just have been set and nothing new sent. we can be
- confident here than any replies to either the initial op or overlap ops
- prior to recovery have been received. */
+/*
+ * Forced state reset for locks that were in the middle of remote operations
+ * when recovery happened (i.e. lkbs that were on the waiters list, waiting
+ * for a reply from a remote operation.) The lkbs remaining on the waiters
+ * list need to be reevaluated; some may need resending to a different node
+ * than previously, and some may now need local handling rather than remote.
+ *
+ * First, the lkb state for the voided remote operation is forcibly reset,
+ * equivalent to what remove_from_waiters() would normally do:
+ * . lkb removed from ls_waiters list
+ * . lkb wait_type cleared
+ * . lkb waiters_count cleared
+ * . lkb ref count decremented for each waiters_count (almost always 1,
+ * but possibly 2 in case of cancel/unlock overlapping, which means
+ * two remote replies were being expected for the lkb.)
+ *
+ * Second, the lkb is reprocessed like an original operation would be,
+ * by passing it to _request_lock or _convert_lock, which will either
+ * process the lkb operation locally, or send it to a remote node again
+ * and put the lkb back onto the waiters list.
+ *
+ * When reprocessing the lkb, we may find that it's flagged for an overlapping
+ * force-unlock or cancel, either from before recovery began, or after recovery
+ * finished. If this is the case, the unlock/cancel is done directly, and the
+ * original operation is not initiated again (no _request_lock/_convert_lock.)
+ */
int dlm_recover_waiters_post(struct dlm_ls *ls)
{
@@ -5040,6 +5054,11 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
break;
}
+ /*
+ * Find an lkb from the waiters list that's been affected by
+ * recovery node changes, and needs to be reprocessed. Does
+ * hold_lkb(), adding a refcount.
+ */
lkb = find_resend_waiter(ls);
if (!lkb)
break;
@@ -5048,6 +5067,11 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
hold_rsb(r);
lock_rsb(r);
+ /*
+ * If the lkb has been flagged for a force unlock or cancel,
+ * then the reprocessing below will be replaced by just doing
+ * the unlock/cancel directly.
+ */
mstype = lkb->lkb_wait_type;
oc = test_and_clear_bit(DLM_IFL_OVERLAP_CANCEL_BIT,
&lkb->lkb_iflags);
@@ -5061,22 +5085,40 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
r->res_nodeid, lkb->lkb_nodeid, lkb->lkb_wait_nodeid,
dlm_dir_nodeid(r), oc, ou);
- /* At this point we assume that we won't get a reply to any
- previous op or overlap op on this lock. First, do a big
- remove_from_waiters() for all previous ops. */
+ /*
+ * No reply to the pre-recovery operation will now be received,
+ * so a forced equivalent of remove_from_waiters() is needed to
+ * reset the waiters state that was in place before recovery.
+ */
clear_bit(DLM_IFL_RESEND_BIT, &lkb->lkb_iflags);
+
+ /* Forcibly clear wait_type */
lkb->lkb_wait_type = 0;
- /* drop all wait_count references we still
- * hold a reference for this iteration.
+
+ /*
+ * Forcibly reset wait_count and associated refcount. The
+ * wait_count will almost always be 1, but in case of an
+ * overlapping unlock/cancel it could be 2: see where
+ * add_to_waiters() finds the lkb is already on the waiters
+ * list and does lkb_wait_count++; hold_lkb().
*/
- while (!atomic_dec_and_test(&lkb->lkb_wait_count))
+ while (lkb->lkb_wait_count) {
+ lkb->lkb_wait_count--;
unhold_lkb(lkb);
+ }
+ /* Forcibly remove from waiters list */
mutex_lock(&ls->ls_waiters_mutex);
list_del_init(&lkb->lkb_wait_reply);
mutex_unlock(&ls->ls_waiters_mutex);
+ /*
+ * The lkb is now clear of all prior waiters state and can be
+ * processed locally, or sent to remote node again, or directly
+ * cancelled/unlocked.
+ */
+
if (oc || ou) {
/* do an unlock or cancel instead of resending */
switch (mstype) {
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 695e691b38b3..9f9b68448830 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -806,7 +806,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
struct dlm_lkb *lkb;
DECLARE_WAITQUEUE(wait, current);
struct dlm_callback *cb;
- int rv, copy_lvb = 0;
+ int rv, ret, copy_lvb = 0;
int old_mode, new_mode;
if (count == sizeof(struct dlm_device_version)) {
@@ -906,9 +906,9 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
trace_dlm_ast(lkb->lkb_resource->res_ls, lkb);
}
- rv = copy_result_to_user(lkb->lkb_ua,
- test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags),
- cb->flags, cb->mode, copy_lvb, buf, count);
+ ret = copy_result_to_user(lkb->lkb_ua,
+ test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags),
+ cb->flags, cb->mode, copy_lvb, buf, count);
kref_put(&cb->ref, dlm_release_callback);
@@ -916,7 +916,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
if (rv == DLM_DEQUEUE_CALLBACK_LAST)
dlm_put_lkb(lkb);
- return rv;
+ return ret;
}
static __poll_t device_poll(struct file *file, poll_table *wait)
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 9f9295847a4e..077944d3c2c0 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -448,88 +448,34 @@ static void exfat_init_name_entry(struct exfat_dentry *ep,
}
}
-int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir,
- int entry, unsigned int type, unsigned int start_clu,
- unsigned long long size)
+void exfat_init_dir_entry(struct exfat_entry_set_cache *es,
+ unsigned int type, unsigned int start_clu,
+ unsigned long long size, struct timespec64 *ts)
{
- struct super_block *sb = inode->i_sb;
+ struct super_block *sb = es->sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
- struct timespec64 ts = current_time(inode);
struct exfat_dentry *ep;
- struct buffer_head *bh;
-
- /*
- * We cannot use exfat_get_dentry_set here because file ep is not
- * initialized yet.
- */
- ep = exfat_get_dentry(sb, p_dir, entry, &bh);
- if (!ep)
- return -EIO;
+ ep = exfat_get_dentry_cached(es, ES_IDX_FILE);
exfat_set_entry_type(ep, type);
- exfat_set_entry_time(sbi, &ts,
+ exfat_set_entry_time(sbi, ts,
&ep->dentry.file.create_tz,
&ep->dentry.file.create_time,
&ep->dentry.file.create_date,
&ep->dentry.file.create_time_cs);
- exfat_set_entry_time(sbi, &ts,
+ exfat_set_entry_time(sbi, ts,
&ep->dentry.file.modify_tz,
&ep->dentry.file.modify_time,
&ep->dentry.file.modify_date,
&ep->dentry.file.modify_time_cs);
- exfat_set_entry_time(sbi, &ts,
+ exfat_set_entry_time(sbi, ts,
&ep->dentry.file.access_tz,
&ep->dentry.file.access_time,
&ep->dentry.file.access_date,
NULL);
- exfat_update_bh(bh, IS_DIRSYNC(inode));
- brelse(bh);
-
- ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh);
- if (!ep)
- return -EIO;
-
+ ep = exfat_get_dentry_cached(es, ES_IDX_STREAM);
exfat_init_stream_entry(ep, start_clu, size);
- exfat_update_bh(bh, IS_DIRSYNC(inode));
- brelse(bh);
-
- return 0;
-}
-
-int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir,
- int entry)
-{
- struct super_block *sb = inode->i_sb;
- int ret = 0;
- int i, num_entries;
- u16 chksum;
- struct exfat_dentry *ep, *fep;
- struct buffer_head *fbh, *bh;
-
- fep = exfat_get_dentry(sb, p_dir, entry, &fbh);
- if (!fep)
- return -EIO;
-
- num_entries = fep->dentry.file.num_ext + 1;
- chksum = exfat_calc_chksum16(fep, DENTRY_SIZE, 0, CS_DIR_ENTRY);
-
- for (i = 1; i < num_entries; i++) {
- ep = exfat_get_dentry(sb, p_dir, entry + i, &bh);
- if (!ep) {
- ret = -EIO;
- goto release_fbh;
- }
- chksum = exfat_calc_chksum16(ep, DENTRY_SIZE, chksum,
- CS_DEFAULT);
- brelse(bh);
- }
-
- fep->dentry.file.checksum = cpu_to_le16(chksum);
- exfat_update_bh(fbh, IS_DIRSYNC(inode));
-release_fbh:
- brelse(fbh);
- return ret;
}
static void exfat_free_benign_secondary_clusters(struct inode *inode,
@@ -551,76 +497,49 @@ static void exfat_free_benign_secondary_clusters(struct inode *inode,
exfat_free_cluster(inode, &dir);
}
-int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir,
- int entry, int num_entries, struct exfat_uni_name *p_uniname)
+void exfat_init_ext_entry(struct exfat_entry_set_cache *es, int num_entries,
+ struct exfat_uni_name *p_uniname)
{
- struct super_block *sb = inode->i_sb;
int i;
unsigned short *uniname = p_uniname->name;
struct exfat_dentry *ep;
- struct buffer_head *bh;
- int sync = IS_DIRSYNC(inode);
-
- ep = exfat_get_dentry(sb, p_dir, entry, &bh);
- if (!ep)
- return -EIO;
+ ep = exfat_get_dentry_cached(es, ES_IDX_FILE);
ep->dentry.file.num_ext = (unsigned char)(num_entries - 1);
- exfat_update_bh(bh, sync);
- brelse(bh);
-
- ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh);
- if (!ep)
- return -EIO;
+ ep = exfat_get_dentry_cached(es, ES_IDX_STREAM);
ep->dentry.stream.name_len = p_uniname->name_len;
ep->dentry.stream.name_hash = cpu_to_le16(p_uniname->name_hash);
- exfat_update_bh(bh, sync);
- brelse(bh);
-
- for (i = EXFAT_FIRST_CLUSTER; i < num_entries; i++) {
- ep = exfat_get_dentry(sb, p_dir, entry + i, &bh);
- if (!ep)
- return -EIO;
-
- if (exfat_get_entry_type(ep) & TYPE_BENIGN_SEC)
- exfat_free_benign_secondary_clusters(inode, ep);
+ for (i = ES_IDX_FIRST_FILENAME; i < num_entries; i++) {
+ ep = exfat_get_dentry_cached(es, i);
exfat_init_name_entry(ep, uniname);
- exfat_update_bh(bh, sync);
- brelse(bh);
uniname += EXFAT_FILE_NAME_LEN;
}
- exfat_update_dir_chksum(inode, p_dir, entry);
- return 0;
+ exfat_update_dir_chksum(es);
}
-int exfat_remove_entries(struct inode *inode, struct exfat_chain *p_dir,
- int entry, int order, int num_entries)
+void exfat_remove_entries(struct inode *inode, struct exfat_entry_set_cache *es,
+ int order)
{
- struct super_block *sb = inode->i_sb;
int i;
struct exfat_dentry *ep;
- struct buffer_head *bh;
- for (i = order; i < num_entries; i++) {
- ep = exfat_get_dentry(sb, p_dir, entry + i, &bh);
- if (!ep)
- return -EIO;
+ for (i = order; i < es->num_entries; i++) {
+ ep = exfat_get_dentry_cached(es, i);
if (exfat_get_entry_type(ep) & TYPE_BENIGN_SEC)
exfat_free_benign_secondary_clusters(inode, ep);
exfat_set_entry_type(ep, TYPE_DELETED);
- exfat_update_bh(bh, IS_DIRSYNC(inode));
- brelse(bh);
}
- return 0;
+ if (order < es->num_entries)
+ es->modified = true;
}
-void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es)
+void exfat_update_dir_chksum(struct exfat_entry_set_cache *es)
{
int chksum_type = CS_DIR_ENTRY, i;
unsigned short chksum = 0;
@@ -775,7 +694,6 @@ struct exfat_dentry *exfat_get_dentry(struct super_block *sb,
}
enum exfat_validate_dentry_mode {
- ES_MODE_STARTED,
ES_MODE_GET_FILE_ENTRY,
ES_MODE_GET_STRM_ENTRY,
ES_MODE_GET_NAME_ENTRY,
@@ -790,11 +708,6 @@ static bool exfat_validate_entry(unsigned int type,
return false;
switch (*mode) {
- case ES_MODE_STARTED:
- if (type != TYPE_FILE && type != TYPE_DIR)
- return false;
- *mode = ES_MODE_GET_FILE_ENTRY;
- break;
case ES_MODE_GET_FILE_ENTRY:
if (type != TYPE_STREAM)
return false;
@@ -834,7 +747,7 @@ struct exfat_dentry *exfat_get_dentry_cached(
}
/*
- * Returns a set of dentries for a file or dir.
+ * Returns a set of dentries.
*
* Note It provides a direct pointer to bh->data via exfat_get_dentry_cached().
* User should call exfat_get_dentry_set() after setting 'modified' to apply
@@ -842,22 +755,24 @@ struct exfat_dentry *exfat_get_dentry_cached(
*
* in:
* sb+p_dir+entry: indicates a file/dir
- * type: specifies how many dentries should be included.
+ * num_entries: specifies how many dentries should be included.
+ * It will be set to es->num_entries if it is not 0.
+ * If num_entries is 0, es->num_entries will be obtained
+ * from the first dentry.
+ * out:
+ * es: pointer of entry set on success.
* return:
- * pointer of entry set on success,
- * NULL on failure.
+ * 0 on success
+ * -error code on failure
*/
-int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
+static int __exfat_get_dentry_set(struct exfat_entry_set_cache *es,
struct super_block *sb, struct exfat_chain *p_dir, int entry,
- unsigned int type)
+ unsigned int num_entries)
{
int ret, i, num_bh;
unsigned int off;
sector_t sec;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
- struct exfat_dentry *ep;
- int num_entries;
- enum exfat_validate_dentry_mode mode = ES_MODE_STARTED;
struct buffer_head *bh;
if (p_dir->dir == DIR_DELETED) {
@@ -880,12 +795,18 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
return -EIO;
es->bh[es->num_bh++] = bh;
- ep = exfat_get_dentry_cached(es, ES_IDX_FILE);
- if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode))
- goto put_es;
+ if (num_entries == ES_ALL_ENTRIES) {
+ struct exfat_dentry *ep;
+
+ ep = exfat_get_dentry_cached(es, ES_IDX_FILE);
+ if (ep->type != EXFAT_FILE) {
+ brelse(bh);
+ return -EIO;
+ }
+
+ num_entries = ep->dentry.file.num_ext + 1;
+ }
- num_entries = type == ES_ALL_ENTRIES ?
- ep->dentry.file.num_ext + 1 : type;
es->num_entries = num_entries;
num_bh = EXFAT_B_TO_BLK_ROUND_UP(off + num_entries * DENTRY_SIZE, sb);
@@ -918,8 +839,27 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
es->bh[es->num_bh++] = bh;
}
+ return 0;
+
+put_es:
+ exfat_put_dentry_set(es, false);
+ return -EIO;
+}
+
+int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
+ struct super_block *sb, struct exfat_chain *p_dir,
+ int entry, unsigned int num_entries)
+{
+ int ret, i;
+ struct exfat_dentry *ep;
+ enum exfat_validate_dentry_mode mode = ES_MODE_GET_FILE_ENTRY;
+
+ ret = __exfat_get_dentry_set(es, sb, p_dir, entry, num_entries);
+ if (ret < 0)
+ return ret;
+
/* validate cached dentries */
- for (i = ES_IDX_STREAM; i < num_entries; i++) {
+ for (i = ES_IDX_STREAM; i < es->num_entries; i++) {
ep = exfat_get_dentry_cached(es, i);
if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode))
goto put_es;
@@ -931,6 +871,85 @@ put_es:
return -EIO;
}
+static int exfat_validate_empty_dentry_set(struct exfat_entry_set_cache *es)
+{
+ struct exfat_dentry *ep;
+ struct buffer_head *bh;
+ int i, off;
+ bool unused_hit = false;
+
+ /*
+ * ONLY UNUSED OR DELETED DENTRIES ARE ALLOWED:
+ * Although it violates the specification for a deleted entry to
+ * follow an unused entry, some exFAT implementations could work
+ * like this. Therefore, to improve compatibility, let's allow it.
+ */
+ for (i = 0; i < es->num_entries; i++) {
+ ep = exfat_get_dentry_cached(es, i);
+ if (ep->type == EXFAT_UNUSED) {
+ unused_hit = true;
+ } else if (!IS_EXFAT_DELETED(ep->type)) {
+ if (unused_hit)
+ goto err_used_follow_unused;
+ i++;
+ goto count_skip_entries;
+ }
+ }
+
+ return 0;
+
+err_used_follow_unused:
+ off = es->start_off + (i << DENTRY_SIZE_BITS);
+ bh = es->bh[EXFAT_B_TO_BLK(off, es->sb)];
+
+ exfat_fs_error(es->sb,
+ "in sector %lld, dentry %d should be unused, but 0x%x",
+ bh->b_blocknr, off >> DENTRY_SIZE_BITS, ep->type);
+
+ return -EIO;
+
+count_skip_entries:
+ es->num_entries = EXFAT_B_TO_DEN(EXFAT_BLK_TO_B(es->num_bh, es->sb) - es->start_off);
+ for (; i < es->num_entries; i++) {
+ ep = exfat_get_dentry_cached(es, i);
+ if (IS_EXFAT_DELETED(ep->type))
+ break;
+ }
+
+ return i;
+}
+
+/*
+ * Get an empty dentry set.
+ *
+ * in:
+ * sb+p_dir+entry: indicates the empty dentry location
+ * num_entries: specifies how many empty dentries should be included.
+ * out:
+ * es: pointer of empty dentry set on success.
+ * return:
+ * 0 : on success
+ * >0 : the dentries are not empty, the return value is the number of
+ * dentries to be skipped for the next lookup.
+ * <0 : on failure
+ */
+int exfat_get_empty_dentry_set(struct exfat_entry_set_cache *es,
+ struct super_block *sb, struct exfat_chain *p_dir,
+ int entry, unsigned int num_entries)
+{
+ int ret;
+
+ ret = __exfat_get_dentry_set(es, sb, p_dir, entry, num_entries);
+ if (ret < 0)
+ return ret;
+
+ ret = exfat_validate_empty_dentry_set(es);
+ if (ret)
+ exfat_put_dentry_set(es, false);
+
+ return ret;
+}
+
static inline void exfat_reset_empty_hint(struct exfat_hint_femp *hint_femp)
{
hint_femp->eidx = EXFAT_HINT_NONE;
@@ -1187,27 +1206,6 @@ found:
return dentry - num_ext;
}
-int exfat_count_ext_entries(struct super_block *sb, struct exfat_chain *p_dir,
- int entry, struct exfat_dentry *ep)
-{
- int i, count = 0;
- unsigned int type;
- struct exfat_dentry *ext_ep;
- struct buffer_head *bh;
-
- for (i = 0, entry++; i < ep->dentry.file.num_ext; i++, entry++) {
- ext_ep = exfat_get_dentry(sb, p_dir, entry, &bh);
- if (!ext_ep)
- return -EIO;
-
- type = exfat_get_entry_type(ext_ep);
- brelse(bh);
- if (type & TYPE_CRITICAL_SEC || type & TYPE_BENIGN_SEC)
- count++;
- }
- return count;
-}
-
int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir)
{
int i, count = 0;
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 361595433480..ecc5db952deb 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -431,8 +431,6 @@ int exfat_ent_get(struct super_block *sb, unsigned int loc,
unsigned int *content);
int exfat_ent_set(struct super_block *sb, unsigned int loc,
unsigned int content);
-int exfat_count_ext_entries(struct super_block *sb, struct exfat_chain *p_dir,
- int entry, struct exfat_dentry *p_entry);
int exfat_chain_cont_cluster(struct super_block *sb, unsigned int chain,
unsigned int len);
int exfat_zeroed_cluster(struct inode *dir, unsigned int clu);
@@ -480,16 +478,14 @@ int exfat_get_cluster(struct inode *inode, unsigned int cluster,
extern const struct inode_operations exfat_dir_inode_operations;
extern const struct file_operations exfat_dir_operations;
unsigned int exfat_get_entry_type(struct exfat_dentry *p_entry);
-int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir,
- int entry, unsigned int type, unsigned int start_clu,
- unsigned long long size);
-int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir,
- int entry, int num_entries, struct exfat_uni_name *p_uniname);
-int exfat_remove_entries(struct inode *inode, struct exfat_chain *p_dir,
- int entry, int order, int num_entries);
-int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir,
- int entry);
-void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es);
+void exfat_init_dir_entry(struct exfat_entry_set_cache *es,
+ unsigned int type, unsigned int start_clu,
+ unsigned long long size, struct timespec64 *ts);
+void exfat_init_ext_entry(struct exfat_entry_set_cache *es, int num_entries,
+ struct exfat_uni_name *p_uniname);
+void exfat_remove_entries(struct inode *inode, struct exfat_entry_set_cache *es,
+ int order);
+void exfat_update_dir_chksum(struct exfat_entry_set_cache *es);
int exfat_calc_num_entries(struct exfat_uni_name *p_uniname);
int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname,
@@ -501,7 +497,10 @@ struct exfat_dentry *exfat_get_dentry_cached(struct exfat_entry_set_cache *es,
int num);
int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
struct super_block *sb, struct exfat_chain *p_dir, int entry,
- unsigned int type);
+ unsigned int num_entries);
+int exfat_get_empty_dentry_set(struct exfat_entry_set_cache *es,
+ struct super_block *sb, struct exfat_chain *p_dir, int entry,
+ unsigned int num_entries);
int exfat_put_dentry_set(struct exfat_entry_set_cache *es, int sync);
int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir);
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 0687f952956c..dd894e558c91 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -94,7 +94,7 @@ int __exfat_write_inode(struct inode *inode, int sync)
ep2->dentry.stream.start_clu = EXFAT_FREE_CLUSTER;
}
- exfat_update_dir_chksum_with_entry_set(&es);
+ exfat_update_dir_chksum(&es);
return exfat_put_dentry_set(&es, sync);
}
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 9c549fd11fc8..631ad9e8e32a 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -204,21 +204,16 @@ const struct dentry_operations exfat_utf8_dentry_ops = {
.d_compare = exfat_utf8_d_cmp,
};
-/* used only in search empty_slot() */
-#define CNT_UNUSED_NOHIT (-1)
-#define CNT_UNUSED_HIT (-2)
/* search EMPTY CONTINUOUS "num_entries" entries */
static int exfat_search_empty_slot(struct super_block *sb,
struct exfat_hint_femp *hint_femp, struct exfat_chain *p_dir,
- int num_entries)
+ int num_entries, struct exfat_entry_set_cache *es)
{
- int i, dentry, num_empty = 0;
+ int i, dentry, ret;
int dentries_per_clu;
- unsigned int type;
struct exfat_chain clu;
- struct exfat_dentry *ep;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
- struct buffer_head *bh;
+ int total_entries = EXFAT_CLU_TO_DEN(p_dir->size, sbi);
dentries_per_clu = sbi->dentries_per_clu;
@@ -231,7 +226,7 @@ static int exfat_search_empty_slot(struct super_block *sb,
* Otherwise, and if "dentry + hint_famp->count" is also equal
* to "p_dir->size * dentries_per_clu", it means ENOSPC.
*/
- if (dentry + hint_femp->count == p_dir->size * dentries_per_clu &&
+ if (dentry + hint_femp->count == total_entries &&
num_entries > hint_femp->count)
return -ENOSPC;
@@ -242,69 +237,41 @@ static int exfat_search_empty_slot(struct super_block *sb,
dentry = 0;
}
- while (clu.dir != EXFAT_EOF_CLUSTER) {
+ while (dentry + num_entries < total_entries &&
+ clu.dir != EXFAT_EOF_CLUSTER) {
i = dentry & (dentries_per_clu - 1);
- for (; i < dentries_per_clu; i++, dentry++) {
- ep = exfat_get_dentry(sb, &clu, i, &bh);
- if (!ep)
- return -EIO;
- type = exfat_get_entry_type(ep);
- brelse(bh);
-
- if (type == TYPE_UNUSED || type == TYPE_DELETED) {
- num_empty++;
- if (hint_femp->eidx == EXFAT_HINT_NONE) {
- hint_femp->eidx = dentry;
- hint_femp->count = CNT_UNUSED_NOHIT;
- exfat_chain_set(&hint_femp->cur,
- clu.dir, clu.size, clu.flags);
- }
-
- if (type == TYPE_UNUSED &&
- hint_femp->count != CNT_UNUSED_HIT)
- hint_femp->count = CNT_UNUSED_HIT;
+ ret = exfat_get_empty_dentry_set(es, sb, &clu, i, num_entries);
+ if (ret < 0)
+ return ret;
+ else if (ret == 0)
+ return dentry;
+
+ dentry += ret;
+ i += ret;
+
+ while (i >= dentries_per_clu) {
+ if (clu.flags == ALLOC_NO_FAT_CHAIN) {
+ if (--clu.size > 0)
+ clu.dir++;
+ else
+ clu.dir = EXFAT_EOF_CLUSTER;
} else {
- if (hint_femp->eidx != EXFAT_HINT_NONE &&
- hint_femp->count == CNT_UNUSED_HIT) {
- /* unused empty group means
- * an empty group which includes
- * unused dentry
- */
- exfat_fs_error(sb,
- "found bogus dentry(%d) beyond unused empty group(%d) (start_clu : %u, cur_clu : %u)",
- dentry, hint_femp->eidx,
- p_dir->dir, clu.dir);
+ if (exfat_get_next_cluster(sb, &clu.dir))
return -EIO;
- }
-
- num_empty = 0;
- hint_femp->eidx = EXFAT_HINT_NONE;
}
- if (num_empty >= num_entries) {
- /* found and invalidate hint_femp */
- hint_femp->eidx = EXFAT_HINT_NONE;
- return (dentry - (num_entries - 1));
- }
- }
-
- if (clu.flags == ALLOC_NO_FAT_CHAIN) {
- if (--clu.size > 0)
- clu.dir++;
- else
- clu.dir = EXFAT_EOF_CLUSTER;
- } else {
- if (exfat_get_next_cluster(sb, &clu.dir))
- return -EIO;
+ i -= dentries_per_clu;
}
}
- hint_femp->eidx = p_dir->size * dentries_per_clu - num_empty;
- hint_femp->count = num_empty;
- if (num_empty == 0)
+ hint_femp->eidx = dentry;
+ hint_femp->count = 0;
+ if (dentry == total_entries || clu.dir == EXFAT_EOF_CLUSTER)
exfat_chain_set(&hint_femp->cur, EXFAT_EOF_CLUSTER, 0,
clu.flags);
+ else
+ hint_femp->cur = clu;
return -ENOSPC;
}
@@ -325,7 +292,8 @@ static int exfat_check_max_dentries(struct inode *inode)
* if there isn't any empty slot, expand cluster chain.
*/
static int exfat_find_empty_entry(struct inode *inode,
- struct exfat_chain *p_dir, int num_entries)
+ struct exfat_chain *p_dir, int num_entries,
+ struct exfat_entry_set_cache *es)
{
int dentry;
unsigned int ret, last_clu;
@@ -344,7 +312,7 @@ static int exfat_find_empty_entry(struct inode *inode,
}
while ((dentry = exfat_search_empty_slot(sb, &hint_femp, p_dir,
- num_entries)) < 0) {
+ num_entries, es)) < 0) {
if (dentry == -EIO)
break;
@@ -499,6 +467,8 @@ static int exfat_add_entry(struct inode *inode, const char *path,
struct exfat_sb_info *sbi = EXFAT_SB(sb);
struct exfat_uni_name uniname;
struct exfat_chain clu;
+ struct timespec64 ts = current_time(inode);
+ struct exfat_entry_set_cache es;
int clu_size = 0;
unsigned int start_clu = EXFAT_FREE_CLUSTER;
@@ -513,7 +483,7 @@ static int exfat_add_entry(struct inode *inode, const char *path,
}
/* exfat_find_empty_entry must be called before alloc_cluster() */
- dentry = exfat_find_empty_entry(inode, p_dir, num_entries);
+ dentry = exfat_find_empty_entry(inode, p_dir, num_entries, &es);
if (dentry < 0) {
ret = dentry; /* -EIO or -ENOSPC */
goto out;
@@ -521,8 +491,10 @@ static int exfat_add_entry(struct inode *inode, const char *path,
if (type == TYPE_DIR && !sbi->options.zero_size_dir) {
ret = exfat_alloc_new_dir(inode, &clu);
- if (ret)
+ if (ret) {
+ exfat_put_dentry_set(&es, false);
goto out;
+ }
start_clu = clu.dir;
clu_size = sbi->cluster_size;
}
@@ -531,12 +503,10 @@ static int exfat_add_entry(struct inode *inode, const char *path,
/* fill the dos name directory entry information of the created file.
* the first cluster is not determined yet. (0)
*/
- ret = exfat_init_dir_entry(inode, p_dir, dentry, type,
- start_clu, clu_size);
- if (ret)
- goto out;
+ exfat_init_dir_entry(&es, type, start_clu, clu_size, &ts);
+ exfat_init_ext_entry(&es, num_entries, &uniname);
- ret = exfat_init_ext_entry(inode, p_dir, dentry, num_entries, &uniname);
+ ret = exfat_put_dentry_set(&es, IS_DIRSYNC(inode));
if (ret)
goto out;
@@ -577,6 +547,7 @@ static int exfat_create(struct mnt_idmap *idmap, struct inode *dir,
struct exfat_dir_entry info;
loff_t i_pos;
int err;
+ loff_t size = i_size_read(dir);
mutex_lock(&EXFAT_SB(sb)->s_lock);
exfat_set_volume_dirty(sb);
@@ -587,7 +558,7 @@ static int exfat_create(struct mnt_idmap *idmap, struct inode *dir,
inode_inc_iversion(dir);
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
- if (IS_DIRSYNC(dir))
+ if (IS_DIRSYNC(dir) && size != i_size_read(dir))
exfat_sync_inode(dir);
else
mark_inode_dirty(dir);
@@ -795,12 +766,11 @@ unlock:
static int exfat_unlink(struct inode *dir, struct dentry *dentry)
{
struct exfat_chain cdir;
- struct exfat_dentry *ep;
struct super_block *sb = dir->i_sb;
struct inode *inode = dentry->d_inode;
struct exfat_inode_info *ei = EXFAT_I(inode);
- struct buffer_head *bh;
- int num_entries, entry, err = 0;
+ struct exfat_entry_set_cache es;
+ int entry, err = 0;
mutex_lock(&EXFAT_SB(sb)->s_lock);
exfat_chain_dup(&cdir, &ei->dir);
@@ -811,26 +781,20 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
goto unlock;
}
- ep = exfat_get_dentry(sb, &cdir, entry, &bh);
- if (!ep) {
- err = -EIO;
- goto unlock;
- }
- num_entries = exfat_count_ext_entries(sb, &cdir, entry, ep);
- if (num_entries < 0) {
+ err = exfat_get_dentry_set(&es, sb, &cdir, entry, ES_ALL_ENTRIES);
+ if (err) {
err = -EIO;
- brelse(bh);
goto unlock;
}
- num_entries++;
- brelse(bh);
exfat_set_volume_dirty(sb);
+
/* update the directory entry */
- if (exfat_remove_entries(dir, &cdir, entry, 0, num_entries)) {
- err = -EIO;
+ exfat_remove_entries(inode, &es, ES_IDX_FILE);
+
+ err = exfat_put_dentry_set(&es, IS_DIRSYNC(inode));
+ if (err)
goto unlock;
- }
/* This doesn't modify ei */
ei->dir.dir = DIR_DELETED;
@@ -838,10 +802,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
inode_inc_iversion(dir);
simple_inode_init_ts(dir);
exfat_truncate_inode_atime(dir);
- if (IS_DIRSYNC(dir))
- exfat_sync_inode(dir);
- else
- mark_inode_dirty(dir);
+ mark_inode_dirty(dir);
clear_nlink(inode);
simple_inode_init_ts(inode);
@@ -862,6 +823,7 @@ static int exfat_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct exfat_chain cdir;
loff_t i_pos;
int err;
+ loff_t size = i_size_read(dir);
mutex_lock(&EXFAT_SB(sb)->s_lock);
exfat_set_volume_dirty(sb);
@@ -872,7 +834,7 @@ static int exfat_mkdir(struct mnt_idmap *idmap, struct inode *dir,
inode_inc_iversion(dir);
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
- if (IS_DIRSYNC(dir))
+ if (IS_DIRSYNC(dir) && size != i_size_read(dir))
exfat_sync_inode(dir);
else
mark_inode_dirty(dir);
@@ -946,13 +908,12 @@ static int exfat_check_dir_empty(struct super_block *sb,
static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
- struct exfat_dentry *ep;
struct exfat_chain cdir, clu_to_free;
struct super_block *sb = inode->i_sb;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
struct exfat_inode_info *ei = EXFAT_I(inode);
- struct buffer_head *bh;
- int num_entries, entry, err;
+ struct exfat_entry_set_cache es;
+ int entry, err;
mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock);
@@ -976,27 +937,20 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
goto unlock;
}
- ep = exfat_get_dentry(sb, &cdir, entry, &bh);
- if (!ep) {
- err = -EIO;
- goto unlock;
- }
-
- num_entries = exfat_count_ext_entries(sb, &cdir, entry, ep);
- if (num_entries < 0) {
+ err = exfat_get_dentry_set(&es, sb, &cdir, entry, ES_ALL_ENTRIES);
+ if (err) {
err = -EIO;
- brelse(bh);
goto unlock;
}
- num_entries++;
- brelse(bh);
exfat_set_volume_dirty(sb);
- err = exfat_remove_entries(dir, &cdir, entry, 0, num_entries);
- if (err) {
- exfat_err(sb, "failed to exfat_remove_entries : err(%d)", err);
+
+ exfat_remove_entries(inode, &es, ES_IDX_FILE);
+
+ err = exfat_put_dentry_set(&es, IS_DIRSYNC(dir));
+ if (err)
goto unlock;
- }
+
ei->dir.dir = DIR_DELETED;
inode_inc_iversion(dir);
@@ -1022,67 +976,52 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir,
int oldentry, struct exfat_uni_name *p_uniname,
struct exfat_inode_info *ei)
{
- int ret, num_old_entries, num_new_entries;
+ int ret, num_new_entries;
struct exfat_dentry *epold, *epnew;
struct super_block *sb = inode->i_sb;
- struct buffer_head *new_bh, *old_bh;
+ struct exfat_entry_set_cache old_es, new_es;
int sync = IS_DIRSYNC(inode);
- epold = exfat_get_dentry(sb, p_dir, oldentry, &old_bh);
- if (!epold)
- return -EIO;
-
- num_old_entries = exfat_count_ext_entries(sb, p_dir, oldentry, epold);
- if (num_old_entries < 0)
- return -EIO;
- num_old_entries++;
-
num_new_entries = exfat_calc_num_entries(p_uniname);
if (num_new_entries < 0)
return num_new_entries;
- if (num_old_entries < num_new_entries) {
- int newentry;
+ ret = exfat_get_dentry_set(&old_es, sb, p_dir, oldentry, ES_ALL_ENTRIES);
+ if (ret) {
+ ret = -EIO;
+ return ret;
+ }
- newentry =
- exfat_find_empty_entry(inode, p_dir, num_new_entries);
- if (newentry < 0)
- return newentry; /* -EIO or -ENOSPC */
+ epold = exfat_get_dentry_cached(&old_es, ES_IDX_FILE);
- epnew = exfat_get_dentry(sb, p_dir, newentry, &new_bh);
- if (!epnew)
- return -EIO;
+ if (old_es.num_entries < num_new_entries) {
+ int newentry;
+ newentry = exfat_find_empty_entry(inode, p_dir, num_new_entries,
+ &new_es);
+ if (newentry < 0) {
+ ret = newentry; /* -EIO or -ENOSPC */
+ goto put_old_es;
+ }
+
+ epnew = exfat_get_dentry_cached(&new_es, ES_IDX_FILE);
*epnew = *epold;
if (exfat_get_entry_type(epnew) == TYPE_FILE) {
epnew->dentry.file.attr |= cpu_to_le16(EXFAT_ATTR_ARCHIVE);
ei->attr |= EXFAT_ATTR_ARCHIVE;
}
- exfat_update_bh(new_bh, sync);
- brelse(old_bh);
- brelse(new_bh);
-
- epold = exfat_get_dentry(sb, p_dir, oldentry + 1, &old_bh);
- if (!epold)
- return -EIO;
- epnew = exfat_get_dentry(sb, p_dir, newentry + 1, &new_bh);
- if (!epnew) {
- brelse(old_bh);
- return -EIO;
- }
+ epold = exfat_get_dentry_cached(&old_es, ES_IDX_STREAM);
+ epnew = exfat_get_dentry_cached(&new_es, ES_IDX_STREAM);
*epnew = *epold;
- exfat_update_bh(new_bh, sync);
- brelse(old_bh);
- brelse(new_bh);
- ret = exfat_init_ext_entry(inode, p_dir, newentry,
- num_new_entries, p_uniname);
+ exfat_init_ext_entry(&new_es, num_new_entries, p_uniname);
+
+ ret = exfat_put_dentry_set(&new_es, sync);
if (ret)
- return ret;
+ goto put_old_es;
- exfat_remove_entries(inode, p_dir, oldentry, 0,
- num_old_entries);
+ exfat_remove_entries(inode, &old_es, ES_IDX_FILE);
ei->dir = *p_dir;
ei->entry = newentry;
} else {
@@ -1090,85 +1029,72 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir,
epold->dentry.file.attr |= cpu_to_le16(EXFAT_ATTR_ARCHIVE);
ei->attr |= EXFAT_ATTR_ARCHIVE;
}
- exfat_update_bh(old_bh, sync);
- brelse(old_bh);
- ret = exfat_init_ext_entry(inode, p_dir, oldentry,
- num_new_entries, p_uniname);
- if (ret)
- return ret;
- exfat_remove_entries(inode, p_dir, oldentry, num_new_entries,
- num_old_entries);
+ exfat_remove_entries(inode, &old_es, ES_IDX_FIRST_FILENAME + 1);
+ exfat_init_ext_entry(&old_es, num_new_entries, p_uniname);
}
- return 0;
+ return exfat_put_dentry_set(&old_es, sync);
+
+put_old_es:
+ exfat_put_dentry_set(&old_es, false);
+ return ret;
}
static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir,
int oldentry, struct exfat_chain *p_newdir,
struct exfat_uni_name *p_uniname, struct exfat_inode_info *ei)
{
- int ret, newentry, num_new_entries, num_old_entries;
+ int ret, newentry, num_new_entries;
struct exfat_dentry *epmov, *epnew;
struct super_block *sb = inode->i_sb;
- struct buffer_head *mov_bh, *new_bh;
-
- epmov = exfat_get_dentry(sb, p_olddir, oldentry, &mov_bh);
- if (!epmov)
- return -EIO;
-
- num_old_entries = exfat_count_ext_entries(sb, p_olddir, oldentry,
- epmov);
- if (num_old_entries < 0)
- return -EIO;
- num_old_entries++;
+ struct exfat_entry_set_cache mov_es, new_es;
num_new_entries = exfat_calc_num_entries(p_uniname);
if (num_new_entries < 0)
return num_new_entries;
- newentry = exfat_find_empty_entry(inode, p_newdir, num_new_entries);
- if (newentry < 0)
- return newentry; /* -EIO or -ENOSPC */
-
- epnew = exfat_get_dentry(sb, p_newdir, newentry, &new_bh);
- if (!epnew)
+ ret = exfat_get_dentry_set(&mov_es, sb, p_olddir, oldentry,
+ ES_ALL_ENTRIES);
+ if (ret)
return -EIO;
+ newentry = exfat_find_empty_entry(inode, p_newdir, num_new_entries,
+ &new_es);
+ if (newentry < 0) {
+ ret = newentry; /* -EIO or -ENOSPC */
+ goto put_mov_es;
+ }
+
+ epmov = exfat_get_dentry_cached(&mov_es, ES_IDX_FILE);
+ epnew = exfat_get_dentry_cached(&new_es, ES_IDX_FILE);
*epnew = *epmov;
if (exfat_get_entry_type(epnew) == TYPE_FILE) {
epnew->dentry.file.attr |= cpu_to_le16(EXFAT_ATTR_ARCHIVE);
ei->attr |= EXFAT_ATTR_ARCHIVE;
}
- exfat_update_bh(new_bh, IS_DIRSYNC(inode));
- brelse(mov_bh);
- brelse(new_bh);
-
- epmov = exfat_get_dentry(sb, p_olddir, oldentry + 1, &mov_bh);
- if (!epmov)
- return -EIO;
- epnew = exfat_get_dentry(sb, p_newdir, newentry + 1, &new_bh);
- if (!epnew) {
- brelse(mov_bh);
- return -EIO;
- }
+ epmov = exfat_get_dentry_cached(&mov_es, ES_IDX_STREAM);
+ epnew = exfat_get_dentry_cached(&new_es, ES_IDX_STREAM);
*epnew = *epmov;
- exfat_update_bh(new_bh, IS_DIRSYNC(inode));
- brelse(mov_bh);
- brelse(new_bh);
-
- ret = exfat_init_ext_entry(inode, p_newdir, newentry, num_new_entries,
- p_uniname);
- if (ret)
- return ret;
- exfat_remove_entries(inode, p_olddir, oldentry, 0, num_old_entries);
+ exfat_init_ext_entry(&new_es, num_new_entries, p_uniname);
+ exfat_remove_entries(inode, &mov_es, ES_IDX_FILE);
exfat_chain_set(&ei->dir, p_newdir->dir, p_newdir->size,
p_newdir->flags);
ei->entry = newentry;
- return 0;
+
+ ret = exfat_put_dentry_set(&new_es, IS_DIRSYNC(inode));
+ if (ret)
+ goto put_mov_es;
+
+ return exfat_put_dentry_set(&mov_es, IS_DIRSYNC(inode));
+
+put_mov_es:
+ exfat_put_dentry_set(&mov_es, false);
+
+ return ret;
}
/* rename or move a old file into a new file */
@@ -1186,7 +1112,6 @@ static int __exfat_rename(struct inode *old_parent_inode,
struct exfat_sb_info *sbi = EXFAT_SB(sb);
const unsigned char *new_path = new_dentry->d_name.name;
struct inode *new_inode = new_dentry->d_inode;
- int num_entries;
struct exfat_inode_info *new_ei = NULL;
unsigned int new_entry_type = TYPE_UNUSED;
int new_entry = 0;
@@ -1257,25 +1182,21 @@ static int __exfat_rename(struct inode *old_parent_inode,
&newdir, &uni_name, ei);
if (!ret && new_inode) {
+ struct exfat_entry_set_cache es;
+
/* delete entries of new_dir */
- ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh);
- if (!ep) {
+ ret = exfat_get_dentry_set(&es, sb, p_dir, new_entry,
+ ES_ALL_ENTRIES);
+ if (ret) {
ret = -EIO;
goto del_out;
}
- num_entries = exfat_count_ext_entries(sb, p_dir, new_entry, ep);
- if (num_entries < 0) {
- ret = -EIO;
- goto del_out;
- }
- brelse(new_bh);
+ exfat_remove_entries(new_inode, &es, ES_IDX_FILE);
- if (exfat_remove_entries(new_inode, p_dir, new_entry, 0,
- num_entries + 1)) {
- ret = -EIO;
+ ret = exfat_put_dentry_set(&es, IS_DIRSYNC(new_inode));
+ if (ret)
goto del_out;
- }
/* Free the clusters if new_inode is a dir(as if exfat_rmdir) */
if (new_entry_type == TYPE_DIR &&
@@ -1317,6 +1238,7 @@ static int exfat_rename(struct mnt_idmap *idmap,
struct super_block *sb = old_dir->i_sb;
loff_t i_pos;
int err;
+ loff_t size = i_size_read(new_dir);
/*
* The VFS already checks for existence, so for local filesystems
@@ -1338,7 +1260,7 @@ static int exfat_rename(struct mnt_idmap *idmap,
simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
EXFAT_I(new_dir)->i_crtime = current_time(new_dir);
exfat_truncate_inode_atime(new_dir);
- if (IS_DIRSYNC(new_dir))
+ if (IS_DIRSYNC(new_dir) && size != i_size_read(new_dir))
exfat_sync_inode(new_dir);
else
mark_inode_dirty(new_dir);
@@ -1359,9 +1281,7 @@ static int exfat_rename(struct mnt_idmap *idmap,
}
inode_inc_iversion(old_dir);
- if (IS_DIRSYNC(old_dir))
- exfat_sync_inode(old_dir);
- else
+ if (new_dir != old_dir)
mark_inode_dirty(old_dir);
if (new_inode) {
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index b0597a539fc5..eac698b8dd38 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -154,49 +154,47 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
if (unlikely(f2fs_cp_error(sbi)))
return exist;
- if (exist && type == DATA_GENERIC_ENHANCE_UPDATE) {
- f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
- blkaddr, exist);
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- return exist;
- }
+ if ((exist && type == DATA_GENERIC_ENHANCE_UPDATE) ||
+ (!exist && type == DATA_GENERIC_ENHANCE))
+ goto out_err;
+ if (!exist && type != DATA_GENERIC_ENHANCE_UPDATE)
+ goto out_handle;
+ return exist;
- if (!exist && type == DATA_GENERIC_ENHANCE) {
- f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
- blkaddr, exist);
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- dump_stack();
- }
+out_err:
+ f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
+ blkaddr, exist);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ dump_stack();
+out_handle:
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
return exist;
}
-bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
- if (time_to_inject(sbi, FAULT_BLKADDR))
- return false;
-
switch (type) {
case META_NAT:
break;
case META_SIT:
if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
- return false;
+ goto err;
break;
case META_SSA:
if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
blkaddr < SM_I(sbi)->ssa_blkaddr))
- return false;
+ goto err;
break;
case META_CP:
if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
blkaddr < __start_cp_addr(sbi)))
- return false;
+ goto err;
break;
case META_POR:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
blkaddr < MAIN_BLKADDR(sbi)))
- return false;
+ goto err;
break;
case DATA_GENERIC:
case DATA_GENERIC_ENHANCE:
@@ -213,7 +211,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
blkaddr);
set_sbi_flag(sbi, SBI_NEED_FSCK);
dump_stack();
- return false;
+ goto err;
} else {
return __is_bitmap_valid(sbi, blkaddr, type);
}
@@ -221,13 +219,30 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
case META_GENERIC:
if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
blkaddr >= MAIN_BLKADDR(sbi)))
- return false;
+ goto err;
break;
default:
BUG();
}
return true;
+err:
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
+ return false;
+}
+
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ if (time_to_inject(sbi, FAULT_BLKADDR_VALIDITY))
+ return false;
+ return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
+}
+
+bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
}
/*
@@ -889,7 +904,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count);
- if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) {
+ if (cp_blocks > BLKS_PER_SEG(sbi) || cp_blocks <= F2FS_CP_PACKS) {
f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u",
le32_to_cpu(cp_block->cp_pack_total_block_count));
goto invalid_cp;
@@ -1324,7 +1339,7 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (cpc->reason & CP_UMOUNT) {
if (le32_to_cpu(ckpt->cp_pack_total_block_count) +
- NM_I(sbi)->nat_bits_blocks > sbi->blocks_per_seg) {
+ NM_I(sbi)->nat_bits_blocks > BLKS_PER_SEG(sbi)) {
clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
f2fs_notice(sbi, "Disable nat_bits due to no space");
} else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
@@ -1527,7 +1542,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
cp_ver |= ((__u64)crc32 << 32);
*(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
- blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
+ blk = start_blk + BLKS_PER_SEG(sbi) - nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++)
f2fs_update_meta_page(sbi, nm_i->nat_bits +
(i << F2FS_BLKSIZE_BITS), blk + i);
@@ -1587,8 +1602,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
*/
if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) ||
f2fs_sb_has_compression(sbi))
- invalidate_mapping_pages(META_MAPPING(sbi),
- MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
+ f2fs_bug_on(sbi,
+ invalidate_inode_pages2_range(META_MAPPING(sbi),
+ MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1));
f2fs_release_ino_entry(sbi, false);
@@ -1730,9 +1746,9 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
im->ino_num = 0;
}
- sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
+ sbi->max_orphans = (BLKS_PER_SEG(sbi) - F2FS_CP_PACKS -
NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
- F2FS_ORPHANS_PER_BLOCK;
+ F2FS_ORPHANS_PER_BLOCK;
}
int __init f2fs_create_checkpoint_caches(void)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 531517dac079..8892c8262141 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -512,8 +512,8 @@ static int lzorle_compress_pages(struct compress_ctx *cc)
ret = lzorle1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata,
&cc->clen, cc->private);
if (ret != LZO_E_OK) {
- printk_ratelimited("%sF2FS-fs (%s): lzo-rle compress failed, ret:%d\n",
- KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret);
+ f2fs_err_ratelimited(F2FS_I_SB(cc->inode),
+ "lzo-rle compress failed, ret:%d", ret);
return -EIO;
}
return 0;
@@ -780,9 +780,9 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task)
if (provided != calculated) {
if (!is_inode_flag_set(dic->inode, FI_COMPRESS_CORRUPT)) {
set_inode_flag(dic->inode, FI_COMPRESS_CORRUPT);
- printk_ratelimited(
- "%sF2FS-fs (%s): checksum invalid, nid = %lu, %x vs %x",
- KERN_INFO, sbi->sb->s_id, dic->inode->i_ino,
+ f2fs_info_ratelimited(sbi,
+ "checksum invalid, nid = %lu, %x vs %x",
+ dic->inode->i_ino,
provided, calculated);
}
set_sbi_flag(sbi, SBI_NEED_FSCK);
@@ -1418,6 +1418,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
struct f2fs_sb_info *sbi = bio->bi_private;
struct compress_io_ctx *cic =
(struct compress_io_ctx *)page_private(page);
+ enum count_type type = WB_DATA_TYPE(page,
+ f2fs_is_compressed_page(page));
int i;
if (unlikely(bio->bi_status))
@@ -1425,7 +1427,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
f2fs_compress_free_page(page);
- dec_page_count(sbi, F2FS_WB_DATA);
+ dec_page_count(sbi, type);
if (atomic_dec_return(&cic->pending_pages))
return;
@@ -1441,12 +1443,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
}
static int f2fs_write_raw_pages(struct compress_ctx *cc,
- int *submitted,
+ int *submitted_p,
struct writeback_control *wbc,
enum iostat_type io_type)
{
struct address_space *mapping = cc->inode->i_mapping;
- int _submitted, compr_blocks, ret, i;
+ struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+ int submitted, compr_blocks, i;
+ int ret = 0;
compr_blocks = f2fs_compressed_blocks(cc);
@@ -1461,6 +1465,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
if (compr_blocks < 0)
return compr_blocks;
+ /* overwrite compressed cluster w/ normal cluster */
+ if (compr_blocks > 0)
+ f2fs_lock_op(sbi);
+
for (i = 0; i < cc->cluster_size; i++) {
if (!cc->rpages[i])
continue;
@@ -1485,7 +1493,7 @@ continue_unlock:
if (!clear_page_dirty_for_io(cc->rpages[i]))
goto continue_unlock;
- ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted,
+ ret = f2fs_write_single_data_page(cc->rpages[i], &submitted,
NULL, NULL, wbc, io_type,
compr_blocks, false);
if (ret) {
@@ -1493,26 +1501,29 @@ continue_unlock:
unlock_page(cc->rpages[i]);
ret = 0;
} else if (ret == -EAGAIN) {
+ ret = 0;
/*
* for quota file, just redirty left pages to
* avoid deadlock caused by cluster update race
* from foreground operation.
*/
if (IS_NOQUOTA(cc->inode))
- return 0;
- ret = 0;
+ goto out;
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
goto retry_write;
}
- return ret;
+ goto out;
}
- *submitted += _submitted;
+ *submitted_p += submitted;
}
- f2fs_balance_fs(F2FS_M_SB(mapping), true);
+out:
+ if (compr_blocks > 0)
+ f2fs_unlock_op(sbi);
- return 0;
+ f2fs_balance_fs(sbi, true);
+ return ret;
}
int f2fs_write_multi_pages(struct compress_ctx *cc,
@@ -1806,16 +1817,18 @@ void f2fs_put_page_dic(struct page *page, bool in_task)
* check whether cluster blocks are contiguous, and add extent cache entry
* only if cluster blocks are logically and physically contiguous.
*/
-unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn)
+unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn,
+ unsigned int ofs_in_node)
{
- bool compressed = f2fs_data_blkaddr(dn) == COMPRESS_ADDR;
+ bool compressed = data_blkaddr(dn->inode, dn->node_page,
+ ofs_in_node) == COMPRESS_ADDR;
int i = compressed ? 1 : 0;
block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page,
- dn->ofs_in_node + i);
+ ofs_in_node + i);
for (i += 1; i < F2FS_I(dn->inode)->i_cluster_size; i++) {
block_t blkaddr = data_blkaddr(dn->inode, dn->node_page,
- dn->ofs_in_node + i);
+ ofs_in_node + i);
if (!__is_valid_data_blkaddr(blkaddr))
break;
@@ -1878,12 +1891,8 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page,
set_page_private_data(cpage, ino);
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ))
- goto out;
-
memcpy(page_address(cpage), page_address(page), PAGE_SIZE);
SetPageUptodate(cpage);
-out:
f2fs_put_page(cpage, 1);
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 26e317696b33..d9494b5fc7c1 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -48,7 +48,7 @@ void f2fs_destroy_bioset(void)
bioset_exit(&f2fs_bioset);
}
-static bool __is_cp_guaranteed(struct page *page)
+bool f2fs_is_cp_guaranteed(struct page *page)
{
struct address_space *mapping = page->mapping;
struct inode *inode;
@@ -65,8 +65,6 @@ static bool __is_cp_guaranteed(struct page *page)
S_ISDIR(inode->i_mode))
return true;
- if (f2fs_is_compressed_page(page))
- return false;
if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
page_private_gcing(page))
return true;
@@ -338,18 +336,7 @@ static void f2fs_write_end_io(struct bio *bio)
bio_for_each_segment_all(bvec, bio, iter_all) {
struct page *page = bvec->bv_page;
- enum count_type type = WB_DATA_TYPE(page);
-
- if (page_private_dummy(page)) {
- clear_page_private_dummy(page);
- unlock_page(page);
- mempool_free(page, sbi->write_io_dummy);
-
- if (unlikely(bio->bi_status))
- f2fs_stop_checkpoint(sbi, true,
- STOP_CP_REASON_WRITE_FAIL);
- continue;
- }
+ enum count_type type = WB_DATA_TYPE(page, false);
fscrypt_finalize_bounce_page(&page);
@@ -524,50 +511,13 @@ void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
submit_bio(bio);
}
-static void f2fs_align_write_bio(struct f2fs_sb_info *sbi, struct bio *bio)
-{
- unsigned int start =
- (bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS) % F2FS_IO_SIZE(sbi);
-
- if (start == 0)
- return;
-
- /* fill dummy pages */
- for (; start < F2FS_IO_SIZE(sbi); start++) {
- struct page *page =
- mempool_alloc(sbi->write_io_dummy,
- GFP_NOIO | __GFP_NOFAIL);
- f2fs_bug_on(sbi, !page);
-
- lock_page(page);
-
- zero_user_segment(page, 0, PAGE_SIZE);
- set_page_private_dummy(page);
-
- if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
- f2fs_bug_on(sbi, 1);
- }
-}
-
static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio,
enum page_type type)
{
WARN_ON_ONCE(is_read_io(bio_op(bio)));
- if (type == DATA || type == NODE) {
- if (f2fs_lfs_mode(sbi) && current->plug)
- blk_finish_plug(current->plug);
-
- if (F2FS_IO_ALIGNED(sbi)) {
- f2fs_align_write_bio(sbi, bio);
- /*
- * In the NODE case, we lose next block address chain.
- * So, we need to do checkpoint in f2fs_sync_file.
- */
- if (type == NODE)
- set_sbi_flag(sbi, SBI_NEED_CP);
- }
- }
+ if (f2fs_lfs_mode(sbi) && current->plug && PAGE_TYPE_ON_MAIN(type))
+ blk_finish_plug(current->plug);
trace_f2fs_submit_write_bio(sbi->sb, type, bio);
iostat_update_submit_ctx(bio, type);
@@ -740,10 +690,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
fio->is_por ? META_POR : (__is_meta_io(fio) ?
- META_GENERIC : DATA_GENERIC_ENHANCE))) {
- f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
+ META_GENERIC : DATA_GENERIC_ENHANCE)))
return -EFSCORRUPTED;
- }
trace_f2fs_submit_page_bio(page, fio);
@@ -762,7 +710,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
inc_page_count(fio->sbi, is_read_io(fio->op) ?
- __read_io_type(page) : WB_DATA_TYPE(fio->page));
+ __read_io_type(page) : WB_DATA_TYPE(fio->page, false));
if (is_read_io(bio_op(bio)))
f2fs_submit_read_bio(fio->sbi, bio, fio->type);
@@ -796,16 +744,6 @@ static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
block_t last_blkaddr,
block_t cur_blkaddr)
{
- if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
- unsigned int filled_blocks =
- F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
- unsigned int io_size = F2FS_IO_SIZE(sbi);
- unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
-
- /* IOs in bio is aligned and left space of vectors is not enough */
- if (!(filled_blocks % io_size) && left_vecs < io_size)
- return false;
- }
if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
return false;
return io_type_is_mergeable(io, fio);
@@ -948,10 +886,8 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
fio->encrypted_page : fio->page;
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
- __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) {
- f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
return -EFSCORRUPTED;
- }
trace_f2fs_submit_page_bio(page, fio);
@@ -973,7 +909,7 @@ alloc_new:
if (fio->io_wbc)
wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
- inc_page_count(fio->sbi, WB_DATA_TYPE(page));
+ inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
*fio->last_block = fio->new_blkaddr;
*fio->bio = bio;
@@ -1007,11 +943,12 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
struct page *bio_page;
+ enum count_type type;
f2fs_bug_on(sbi, is_read_io(fio->op));
f2fs_down_write(&io->io_rwsem);
-
+next:
#ifdef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
wait_for_completion_io(&io->zone_wait);
@@ -1021,7 +958,6 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
}
#endif
-next:
if (fio->in_list) {
spin_lock(&io->io_lock);
if (list_empty(&io->io_list)) {
@@ -1046,7 +982,8 @@ next:
/* set submitted = true as a return value */
fio->submitted = 1;
- inc_page_count(sbi, WB_DATA_TYPE(bio_page));
+ type = WB_DATA_TYPE(bio_page, fio->compressed_page);
+ inc_page_count(sbi, type);
if (io->bio &&
(!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
@@ -1056,13 +993,6 @@ next:
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
- if (F2FS_IO_ALIGNED(sbi) &&
- (fio->type == DATA || fio->type == NODE) &&
- fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
- dec_page_count(sbi, WB_DATA_TYPE(bio_page));
- fio->retry = 1;
- goto skip;
- }
io->bio = __bio_alloc(fio, BIO_MAX_VECS);
f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
bio_page->index, fio, GFP_NOIO);
@@ -1080,10 +1010,6 @@ alloc_new:
io->last_block_in_bio = fio->new_blkaddr;
trace_f2fs_submit_page_write(fio->page, fio);
-skip:
- if (fio->in_list)
- goto next;
-out:
#ifdef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
@@ -1096,6 +1022,9 @@ out:
__submit_merged_bio(io);
}
#endif
+ if (fio->in_list)
+ goto next;
+out:
if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
!f2fs_is_checkpoint_ready(sbi))
__submit_merged_bio(io);
@@ -1218,7 +1147,8 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
- if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
+ err = inc_valid_block_count(sbi, dn->inode, &count, true);
+ if (unlikely(err))
return err;
trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
@@ -1285,8 +1215,6 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
DATA_GENERIC_ENHANCE_READ)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(F2FS_I_SB(inode),
- ERROR_INVALID_BLKADDR);
goto put_err;
}
goto got_it;
@@ -1312,8 +1240,6 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
dn.data_blkaddr,
DATA_GENERIC_ENHANCE)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(F2FS_I_SB(inode),
- ERROR_INVALID_BLKADDR);
goto put_err;
}
got_it:
@@ -1475,15 +1401,18 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
dn->data_blkaddr = f2fs_data_blkaddr(dn);
if (dn->data_blkaddr == NULL_ADDR) {
- err = inc_valid_block_count(sbi, dn->inode, &count);
+ err = inc_valid_block_count(sbi, dn->inode, &count, true);
if (unlikely(err))
return err;
}
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
old_blkaddr = dn->data_blkaddr;
- f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
- &sum, seg_type, NULL);
+ err = f2fs_allocate_data_block(sbi, NULL, old_blkaddr,
+ &dn->data_blkaddr, &sum, seg_type, NULL);
+ if (err)
+ return err;
+
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
f2fs_invalidate_internal_cache(sbi, old_blkaddr);
@@ -1641,7 +1570,6 @@ next_block:
if (!is_hole &&
!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto sync_out;
}
@@ -2165,8 +2093,6 @@ got_it:
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
DATA_GENERIC_ENHANCE_READ)) {
ret = -EFSCORRUPTED;
- f2fs_handle_error(F2FS_I_SB(inode),
- ERROR_INVALID_BLKADDR);
goto out;
}
} else {
@@ -2668,8 +2594,6 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
if (fio) {
if (page_private_gcing(fio->page))
return true;
- if (page_private_dummy(fio->page))
- return true;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
return true;
@@ -2706,11 +2630,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
f2fs_lookup_read_extent_cache_block(inode, page->index,
&fio->old_blkaddr)) {
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
- DATA_GENERIC_ENHANCE)) {
- f2fs_handle_error(fio->sbi,
- ERROR_INVALID_BLKADDR);
+ DATA_GENERIC_ENHANCE))
return -EFSCORRUPTED;
- }
ipu_force = true;
fio->need_lock = LOCK_DONE;
@@ -2738,7 +2659,6 @@ got_it:
!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
DATA_GENERIC_ENHANCE)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
goto out_writepage;
}
@@ -2838,7 +2758,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
.encrypted_page = NULL,
.submitted = 0,
.compr_blocks = compr_blocks,
- .need_lock = LOCK_RETRY,
+ .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
.post_read = f2fs_post_read_required(inode) ? 1 : 0,
.io_type = io_type,
.io_wbc = wbc,
@@ -2919,6 +2839,7 @@ write:
if (err == -EAGAIN) {
err = f2fs_do_write_data_page(&fio);
if (err == -EAGAIN) {
+ f2fs_bug_on(sbi, compr_blocks);
fio.need_lock = LOCK_REQ;
err = f2fs_do_write_data_page(&fio);
}
@@ -3704,7 +3625,6 @@ repeat:
if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
DATA_GENERIC_ENHANCE_READ)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto fail;
}
err = f2fs_submit_page_read(use_cow ?
@@ -3905,26 +3825,36 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
unsigned int blkofs;
unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
+ unsigned int end_blk = start_blk + blkcnt - 1;
unsigned int secidx = start_blk / blk_per_sec;
- unsigned int end_sec = secidx + blkcnt / blk_per_sec;
+ unsigned int end_sec;
int ret = 0;
+ if (!blkcnt)
+ return 0;
+ end_sec = end_blk / blk_per_sec;
+
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
set_inode_flag(inode, FI_ALIGNED_WRITE);
set_inode_flag(inode, FI_OPU_WRITE);
- for (; secidx < end_sec; secidx++) {
+ for (; secidx <= end_sec; secidx++) {
+ unsigned int blkofs_end = secidx == end_sec ?
+ end_blk % blk_per_sec : blk_per_sec - 1;
+
f2fs_down_write(&sbi->pin_sem);
- f2fs_lock_op(sbi);
- f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
- f2fs_unlock_op(sbi);
+ ret = f2fs_allocate_pinning_section(sbi);
+ if (ret) {
+ f2fs_up_write(&sbi->pin_sem);
+ break;
+ }
set_inode_flag(inode, FI_SKIP_WRITES);
- for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
+ for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
struct page *page;
unsigned int blkidx = secidx * blk_per_sec + blkofs;
@@ -4013,27 +3943,34 @@ retry:
nr_pblocks = map.m_len;
if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
- nr_pblocks & sec_blks_mask) {
+ nr_pblocks & sec_blks_mask ||
+ !f2fs_valid_pinned_area(sbi, pblock)) {
+ bool last_extent = false;
+
not_aligned++;
nr_pblocks = roundup(nr_pblocks, blks_per_sec);
if (cur_lblock + nr_pblocks > sis->max)
nr_pblocks -= blks_per_sec;
+ /* this extent is last one */
if (!nr_pblocks) {
- /* this extent is last one */
- nr_pblocks = map.m_len;
- f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
- goto next;
+ nr_pblocks = last_lblock - cur_lblock;
+ last_extent = true;
}
ret = f2fs_migrate_blocks(inode, cur_lblock,
nr_pblocks);
- if (ret)
+ if (ret) {
+ if (ret == -ENOENT)
+ ret = -EINVAL;
goto out;
- goto retry;
+ }
+
+ if (!last_extent)
+ goto retry;
}
-next:
+
if (cur_lblock + nr_pblocks >= sis->max)
nr_pblocks = sis->max - cur_lblock;
@@ -4071,17 +4008,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
sector_t *span)
{
struct inode *inode = file_inode(file);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
- if (f2fs_readonly(F2FS_I_SB(inode)->sb))
+ if (f2fs_readonly(sbi->sb))
return -EROFS;
- if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
- f2fs_err(F2FS_I_SB(inode),
- "Swapfile not supported in LFS mode");
+ if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_err(sbi, "Swapfile not supported in LFS mode");
return -EINVAL;
}
@@ -4092,6 +4029,10 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
if (!f2fs_disable_compressed_file(inode))
return -EINVAL;
+ ret = filemap_fdatawrite(inode->i_mapping);
+ if (ret < 0)
+ return ret;
+
f2fs_precache_extents(inode);
ret = check_swap_activate(sis, file, span);
@@ -4100,7 +4041,7 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
stat_inc_swapfile_inode(inode);
set_inode_flag(inode, FI_PIN_FILE);
- f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ f2fs_update_time(sbi, REQ_TIME);
return ret;
}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index fdbf994f1271..8b0e1e71b667 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -41,7 +41,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi)
total_vblocks = 0;
blks_per_sec = CAP_BLKS_PER_SEC(sbi);
hblks_per_sec = blks_per_sec / 2;
- for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
+ for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
vblocks = get_valid_blocks(sbi, segno, true);
dist = abs(vblocks - hblks_per_sec);
bimodal += dist * dist;
@@ -135,7 +135,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->cur_ckpt_time = sbi->cprc_info.cur_time;
si->peak_ckpt_time = sbi->cprc_info.peak_time;
spin_unlock(&sbi->cprc_info.stat_lock);
- si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
+ si->total_count = BLKS_TO_SEGS(sbi, (int)sbi->user_block_count);
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
si->valid_count = valid_user_blocks(sbi);
@@ -176,11 +176,10 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
si->io_skip_bggc = sbi->io_skip_bggc;
si->other_skip_bggc = sbi->other_skip_bggc;
- si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
+ si->util_free = (int)(BLKS_TO_SEGS(sbi, free_user_blocks(sbi)))
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
- si->util_valid = (int)(written_block_count(sbi) >>
- sbi->log_blocks_per_seg)
+ si->util_valid = (int)(BLKS_TO_SEGS(sbi, written_block_count(sbi)))
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
si->util_invalid = 50 - si->util_free - si->util_valid;
@@ -208,7 +207,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
if (!blks)
continue;
- if (blks == sbi->blocks_per_seg)
+ if (blks == BLKS_PER_SEG(sbi))
si->full_seg[type]++;
else
si->dirty_seg[type]++;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 042593aed1ec..02c9355176d3 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -830,13 +830,14 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
return err;
}
-int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
+int f2fs_do_tmpfile(struct inode *inode, struct inode *dir,
+ struct f2fs_filename *fname)
{
struct page *page;
int err = 0;
f2fs_down_write(&F2FS_I(inode)->i_sem);
- page = f2fs_init_inode_metadata(inode, dir, NULL, NULL);
+ page = f2fs_init_inode_metadata(inode, dir, fname, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -995,9 +996,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
de = &d->dentry[bit_pos];
if (de->name_len == 0) {
if (found_valid_dirent || !bit_pos) {
- printk_ratelimited(
- "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.",
- KERN_WARNING, sbi->sb->s_id,
+ f2fs_warn_ratelimited(sbi,
+ "invalid namelen(0), ino:%u, run fsck to fix.",
le32_to_cpu(de->ino));
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index ad8dfac73bd4..48048fa36427 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -43,7 +43,6 @@ bool sanity_check_extent_cache(struct inode *inode)
if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) ||
!f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
DATA_GENERIC_ENHANCE)) {
- set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
__func__, inode->i_ino,
ei->blk, ei->fofs, ei->len);
@@ -856,10 +855,8 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
goto out;
if (__is_valid_data_blkaddr(blkaddr) &&
- !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
- f2fs_bug_on(sbi, 1);
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
return -EINVAL;
- }
out:
/*
* init block age with zero, this can happen when the block age extent
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 3ff428bee958..fced2b7652f4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -61,7 +61,9 @@ enum {
FAULT_SLAB_ALLOC,
FAULT_DQUOT_INIT,
FAULT_LOCK_OP,
- FAULT_BLKADDR,
+ FAULT_BLKADDR_VALIDITY,
+ FAULT_BLKADDR_CONSISTENCE,
+ FAULT_NO_SEGMENT,
FAULT_MAX,
};
@@ -76,6 +78,11 @@ struct f2fs_fault_info {
extern const char *f2fs_fault_name[FAULT_MAX];
#define IS_FAULT_SET(fi, type) ((fi)->inject_type & BIT(type))
+
+/* maximum retry count for injected failure */
+#define DEFAULT_FAILURE_RETRY_COUNT 8
+#else
+#define DEFAULT_FAILURE_RETRY_COUNT 1
#endif
/*
@@ -143,7 +150,6 @@ struct f2fs_rwsem {
struct f2fs_mount_info {
unsigned int opt;
- int write_io_size_bits; /* Write IO size bits */
block_t root_reserved_blocks; /* root reserved blocks */
kuid_t s_resuid; /* reserved blocks for uid */
kgid_t s_resgid; /* reserved blocks for gid */
@@ -1081,7 +1087,8 @@ struct f2fs_sm_info {
* f2fs monitors the number of several block types such as on-writeback,
* dirty dentry blocks, dirty node blocks, and dirty meta blocks.
*/
-#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
+#define WB_DATA_TYPE(p, f) \
+ (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_DATA,
@@ -1111,6 +1118,7 @@ enum count_type {
* ... Only can be used with META.
*/
#define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type))
+#define PAGE_TYPE_ON_MAIN(type) ((type) == DATA || (type) == NODE)
enum page_type {
DATA = 0,
NODE = 1, /* should not change this */
@@ -1205,7 +1213,6 @@ struct f2fs_io_info {
unsigned int submitted:1; /* indicate IO submission */
unsigned int in_list:1; /* indicate fio is in io_list */
unsigned int is_por:1; /* indicate IO is from recovery or not */
- unsigned int retry:1; /* need to reallocate block address */
unsigned int encrypted:1; /* indicate file is encrypted */
unsigned int post_read:1; /* require post read */
enum iostat_type io_type; /* io type */
@@ -1407,18 +1414,16 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr);
* Layout A: lowest bit should be 1
* | bit0 = 1 | bit1 | bit2 | ... | bit MAX | private data .... |
* bit 0 PAGE_PRIVATE_NOT_POINTER
- * bit 1 PAGE_PRIVATE_DUMMY_WRITE
- * bit 2 PAGE_PRIVATE_ONGOING_MIGRATION
- * bit 3 PAGE_PRIVATE_INLINE_INODE
- * bit 4 PAGE_PRIVATE_REF_RESOURCE
- * bit 5- f2fs private data
+ * bit 1 PAGE_PRIVATE_ONGOING_MIGRATION
+ * bit 2 PAGE_PRIVATE_INLINE_INODE
+ * bit 3 PAGE_PRIVATE_REF_RESOURCE
+ * bit 4- f2fs private data
*
* Layout B: lowest bit should be 0
* page.private is a wrapped pointer.
*/
enum {
PAGE_PRIVATE_NOT_POINTER, /* private contains non-pointer data */
- PAGE_PRIVATE_DUMMY_WRITE, /* data page for padding aligned IO */
PAGE_PRIVATE_ONGOING_MIGRATION, /* data page which is on-going migrating */
PAGE_PRIVATE_INLINE_INODE, /* inode page contains inline data */
PAGE_PRIVATE_REF_RESOURCE, /* dirty page has referenced resources */
@@ -1565,7 +1570,6 @@ struct f2fs_sb_info {
struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */
/* keep migration IO order for LFS mode */
struct f2fs_rwsem io_order_lock;
- mempool_t *write_io_dummy; /* Dummy pages */
pgoff_t page_eio_ofs[NR_PAGE_TYPE]; /* EIO page offset */
int page_eio_cnt[NR_PAGE_TYPE]; /* EIO count */
@@ -1811,6 +1815,37 @@ struct f2fs_sb_info {
#endif
};
+/* Definitions to access f2fs_sb_info */
+#define SEGS_TO_BLKS(sbi, segs) \
+ ((segs) << (sbi)->log_blocks_per_seg)
+#define BLKS_TO_SEGS(sbi, blks) \
+ ((blks) >> (sbi)->log_blocks_per_seg)
+
+#define BLKS_PER_SEG(sbi) ((sbi)->blocks_per_seg)
+#define BLKS_PER_SEC(sbi) (SEGS_TO_BLKS(sbi, (sbi)->segs_per_sec))
+#define SEGS_PER_SEC(sbi) ((sbi)->segs_per_sec)
+
+__printf(3, 4)
+void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, const char *fmt, ...);
+
+#define f2fs_err(sbi, fmt, ...) \
+ f2fs_printk(sbi, false, KERN_ERR fmt, ##__VA_ARGS__)
+#define f2fs_warn(sbi, fmt, ...) \
+ f2fs_printk(sbi, false, KERN_WARNING fmt, ##__VA_ARGS__)
+#define f2fs_notice(sbi, fmt, ...) \
+ f2fs_printk(sbi, false, KERN_NOTICE fmt, ##__VA_ARGS__)
+#define f2fs_info(sbi, fmt, ...) \
+ f2fs_printk(sbi, false, KERN_INFO fmt, ##__VA_ARGS__)
+#define f2fs_debug(sbi, fmt, ...) \
+ f2fs_printk(sbi, false, KERN_DEBUG fmt, ##__VA_ARGS__)
+
+#define f2fs_err_ratelimited(sbi, fmt, ...) \
+ f2fs_printk(sbi, true, KERN_ERR fmt, ##__VA_ARGS__)
+#define f2fs_warn_ratelimited(sbi, fmt, ...) \
+ f2fs_printk(sbi, true, KERN_WARNING fmt, ##__VA_ARGS__)
+#define f2fs_info_ratelimited(sbi, fmt, ...) \
+ f2fs_printk(sbi, true, KERN_INFO fmt, ##__VA_ARGS__)
+
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define time_to_inject(sbi, type) __time_to_inject(sbi, type, __func__, \
__builtin_return_address(0))
@@ -1828,9 +1863,8 @@ static inline bool __time_to_inject(struct f2fs_sb_info *sbi, int type,
atomic_inc(&ffi->inject_ops);
if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) {
atomic_set(&ffi->inject_ops, 0);
- printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n",
- KERN_INFO, sbi->sb->s_id, f2fs_fault_name[type],
- func, parent_func);
+ f2fs_info_ratelimited(sbi, "inject %s in %s of %pS",
+ f2fs_fault_name[type], func, parent_func);
return true;
}
return false;
@@ -2250,9 +2284,30 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
return false;
}
+static inline unsigned int get_available_block_count(struct f2fs_sb_info *sbi,
+ struct inode *inode, bool cap)
+{
+ block_t avail_user_block_count;
+
+ avail_user_block_count = sbi->user_block_count -
+ sbi->current_reserved_blocks;
+
+ if (!__allow_reserved_blocks(sbi, inode, cap))
+ avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (avail_user_block_count > sbi->unusable_block_count)
+ avail_user_block_count -= sbi->unusable_block_count;
+ else
+ avail_user_block_count = 0;
+ }
+
+ return avail_user_block_count;
+}
+
static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
- struct inode *inode, blkcnt_t *count)
+ struct inode *inode, blkcnt_t *count, bool partial)
{
blkcnt_t diff = 0, release = 0;
block_t avail_user_block_count;
@@ -2275,23 +2330,14 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
sbi->total_valid_block_count += (block_t)(*count);
- avail_user_block_count = sbi->user_block_count -
- sbi->current_reserved_blocks;
-
- if (!__allow_reserved_blocks(sbi, inode, true))
- avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
-
- if (F2FS_IO_ALIGNED(sbi))
- avail_user_block_count -= sbi->blocks_per_seg *
- SM_I(sbi)->additional_reserved_segments;
+ avail_user_block_count = get_available_block_count(sbi, inode, true);
- if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
- if (avail_user_block_count > sbi->unusable_block_count)
- avail_user_block_count -= sbi->unusable_block_count;
- else
- avail_user_block_count = 0;
- }
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
+ if (!partial) {
+ spin_unlock(&sbi->stat_lock);
+ goto enospc;
+ }
+
diff = sbi->total_valid_block_count - avail_user_block_count;
if (diff > *count)
diff = *count;
@@ -2319,20 +2365,6 @@ release_quota:
return -ENOSPC;
}
-__printf(2, 3)
-void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...);
-
-#define f2fs_err(sbi, fmt, ...) \
- f2fs_printk(sbi, KERN_ERR fmt, ##__VA_ARGS__)
-#define f2fs_warn(sbi, fmt, ...) \
- f2fs_printk(sbi, KERN_WARNING fmt, ##__VA_ARGS__)
-#define f2fs_notice(sbi, fmt, ...) \
- f2fs_printk(sbi, KERN_NOTICE fmt, ##__VA_ARGS__)
-#define f2fs_info(sbi, fmt, ...) \
- f2fs_printk(sbi, KERN_INFO fmt, ##__VA_ARGS__)
-#define f2fs_debug(sbi, fmt, ...) \
- f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__)
-
#define PAGE_PRIVATE_GET_FUNC(name, flagname) \
static inline bool page_private_##name(struct page *page) \
{ \
@@ -2361,17 +2393,14 @@ static inline void clear_page_private_##name(struct page *page) \
PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER);
PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE);
PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION);
-PAGE_PRIVATE_GET_FUNC(dummy, DUMMY_WRITE);
PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE);
PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE);
PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION);
-PAGE_PRIVATE_SET_FUNC(dummy, DUMMY_WRITE);
PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE);
PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE);
PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION);
-PAGE_PRIVATE_CLEAR_FUNC(dummy, DUMMY_WRITE);
static inline unsigned long get_page_private_data(struct page *page)
{
@@ -2505,11 +2534,8 @@ static inline int get_dirty_pages(struct inode *inode)
static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
{
- unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
- unsigned int segs = (get_pages(sbi, block_type) + pages_per_sec - 1) >>
- sbi->log_blocks_per_seg;
-
- return segs / sbi->segs_per_sec;
+ return div_u64(get_pages(sbi, block_type) + BLKS_PER_SEC(sbi) - 1,
+ BLKS_PER_SEC(sbi));
}
static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
@@ -2573,7 +2599,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
if (sbi->cur_cp_pack == 2)
- start_addr += sbi->blocks_per_seg;
+ start_addr += BLKS_PER_SEG(sbi);
return start_addr;
}
@@ -2582,7 +2608,7 @@ static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi)
block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
if (sbi->cur_cp_pack == 1)
- start_addr += sbi->blocks_per_seg;
+ start_addr += BLKS_PER_SEG(sbi);
return start_addr;
}
@@ -2601,7 +2627,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
struct inode *inode, bool is_inode)
{
block_t valid_block_count;
- unsigned int valid_node_count, user_block_count;
+ unsigned int valid_node_count;
+ unsigned int avail_user_block_count;
int err;
if (is_inode) {
@@ -2621,21 +2648,10 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
- valid_block_count = sbi->total_valid_block_count +
- sbi->current_reserved_blocks + 1;
-
- if (!__allow_reserved_blocks(sbi, inode, false))
- valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+ valid_block_count = sbi->total_valid_block_count + 1;
+ avail_user_block_count = get_available_block_count(sbi, inode, false);
- if (F2FS_IO_ALIGNED(sbi))
- valid_block_count += sbi->blocks_per_seg *
- SM_I(sbi)->additional_reserved_segments;
-
- user_block_count = sbi->user_block_count;
- if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
- user_block_count -= sbi->unusable_block_count;
-
- if (unlikely(valid_block_count > user_block_count)) {
+ if (unlikely(valid_block_count > avail_user_block_count)) {
spin_unlock(&sbi->stat_lock);
goto enospc;
}
@@ -3022,6 +3038,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
case FI_INLINE_DOTS:
case FI_PIN_FILE:
case FI_COMPRESS_RELEASED:
+ case FI_ATOMIC_COMMITTED:
f2fs_mark_inode_dirty_sync(inode, true);
}
}
@@ -3445,7 +3462,7 @@ static inline __le32 *get_dnode_addr(struct inode *inode,
sizeof((f2fs_inode)->field)) \
<= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \
-#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
+#define __is_large_section(sbi) (SEGS_PER_SEC(sbi) > 1)
#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META)
@@ -3454,11 +3471,9 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type))
f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.",
blkaddr, type);
- f2fs_bug_on(sbi, 1);
- }
}
static inline bool __is_valid_data_blkaddr(block_t blkaddr)
@@ -3560,7 +3575,8 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode);
void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct inode *dir, struct inode *inode);
-int f2fs_do_tmpfile(struct inode *inode, struct inode *dir);
+int f2fs_do_tmpfile(struct inode *inode, struct inode *dir,
+ struct f2fs_filename *fname);
bool f2fs_empty_dir(struct inode *dir);
static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
@@ -3675,15 +3691,14 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno);
-void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
+int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi);
void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi);
-void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
- unsigned int *newseg, bool new_sec, int dir);
-void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end);
-void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
-void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
+int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
+int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi);
+int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
struct cp_control *cpc);
@@ -3704,7 +3719,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
block_t old_addr, block_t new_addr,
unsigned char version, bool recover_curseg,
bool recover_newaddr);
-void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio);
@@ -3754,6 +3769,8 @@ struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
+bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync);
void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
@@ -3794,6 +3811,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi);
*/
int __init f2fs_init_bioset(void);
void f2fs_destroy_bioset(void);
+bool f2fs_is_cp_guaranteed(struct page *page);
int f2fs_init_bio_entry_cache(void);
void f2fs_destroy_bio_entry_cache(void);
void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
@@ -3857,6 +3875,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
+int f2fs_gc_range(struct f2fs_sb_info *sbi,
+ unsigned int start_seg, unsigned int end_seg,
+ bool dry_run, unsigned int dry_run_sections);
int f2fs_resize_fs(struct file *filp, __u64 block_count);
int __init f2fs_create_garbage_collection_cache(void);
void f2fs_destroy_garbage_collection_cache(void);
@@ -4277,7 +4298,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed,
bool in_task);
void f2fs_put_page_dic(struct page *page, bool in_task);
-unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn);
+unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn,
+ unsigned int ofs_in_node);
int f2fs_init_compress_ctx(struct compress_ctx *cc);
void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse);
void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
@@ -4334,7 +4356,8 @@ static inline void f2fs_put_page_dic(struct page *page, bool in_task)
{
WARN_ON_ONCE(1);
}
-static inline unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) { return 0; }
+static inline unsigned int f2fs_cluster_blocks_are_contiguous(
+ struct dnode_of_data *dn, unsigned int ofs_in_node) { return 0; }
static inline bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) { return false; }
static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { }
@@ -4391,15 +4414,24 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
- if (!f2fs_compressed_file(inode))
+ f2fs_down_write(&F2FS_I(inode)->i_sem);
+
+ if (!f2fs_compressed_file(inode)) {
+ f2fs_up_write(&F2FS_I(inode)->i_sem);
return true;
- if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))
+ }
+ if (f2fs_is_mmap_file(inode) ||
+ (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) {
+ f2fs_up_write(&F2FS_I(inode)->i_sem);
return false;
+ }
fi->i_flags &= ~F2FS_COMPR_FL;
stat_dec_compr_inode(inode);
clear_inode_flag(inode, FI_COMPRESSED_FILE);
f2fs_mark_inode_dirty_sync(inode, true);
+
+ f2fs_up_write(&F2FS_I(inode)->i_sem);
return true;
}
@@ -4502,6 +4534,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
}
+static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
+{
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ int devi = f2fs_target_device_index(sbi, blkaddr);
+
+ return !bdev_is_zoned(FDEV(devi).bdev);
+ }
+ return true;
+}
+
static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
{
return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW;
@@ -4603,10 +4646,36 @@ static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi)
return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb);
}
+static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi,
+ block_t blkaddr, unsigned int cnt)
+{
+ bool need_submit = false;
+ int i = 0;
+
+ do {
+ struct page *page;
+
+ page = find_get_page(META_MAPPING(sbi), blkaddr + i);
+ if (page) {
+ if (PageWriteback(page))
+ need_submit = true;
+ f2fs_put_page(page, 0);
+ }
+ } while (++i < cnt && !need_submit);
+
+ if (need_submit)
+ f2fs_submit_merged_write_cond(sbi, sbi->meta_inode,
+ NULL, 0, DATA);
+
+ truncate_inode_pages_range(META_MAPPING(sbi),
+ F2FS_BLK_TO_BYTES((loff_t)blkaddr),
+ F2FS_BLK_END_BYTES((loff_t)(blkaddr + cnt - 1)));
+}
+
static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi,
block_t blkaddr)
{
- invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr);
+ f2fs_truncate_meta_inode_pages(sbi, blkaddr, 1);
f2fs_invalidate_compress_page(sbi, blkaddr);
}
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index b58ab1157b7e..1761ad125f97 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -39,6 +39,7 @@
static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
+ vm_flags_t flags = vmf->vma->vm_flags;
vm_fault_t ret;
ret = filemap_fault(vmf);
@@ -46,7 +47,7 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
f2fs_update_iostat(F2FS_I_SB(inode), inode,
APP_MAPPED_READ_IO, F2FS_BLKSIZE);
- trace_f2fs_filemap_fault(inode, vmf->pgoff, vmf->vma->vm_flags, ret);
+ trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret);
return ret;
}
@@ -394,9 +395,20 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return f2fs_do_sync_file(file, start, end, datasync, false);
}
-static bool __found_offset(struct address_space *mapping, block_t blkaddr,
- pgoff_t index, int whence)
+static bool __found_offset(struct address_space *mapping,
+ struct dnode_of_data *dn, pgoff_t index, int whence)
{
+ block_t blkaddr = f2fs_data_blkaddr(dn);
+ struct inode *inode = mapping->host;
+ bool compressed_cluster = false;
+
+ if (f2fs_compressed_file(inode)) {
+ block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page,
+ ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size));
+
+ compressed_cluster = first_blkaddr == COMPRESS_ADDR;
+ }
+
switch (whence) {
case SEEK_DATA:
if (__is_valid_data_blkaddr(blkaddr))
@@ -404,8 +416,12 @@ static bool __found_offset(struct address_space *mapping, block_t blkaddr,
if (blkaddr == NEW_ADDR &&
xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY))
return true;
+ if (compressed_cluster)
+ return true;
break;
case SEEK_HOLE:
+ if (compressed_cluster)
+ return false;
if (blkaddr == NULL_ADDR)
return true;
break;
@@ -474,7 +490,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
goto fail;
}
- if (__found_offset(file->f_mapping, blkaddr,
+ if (__found_offset(file->f_mapping, &dn,
pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
@@ -590,8 +606,10 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
f2fs_set_data_blkaddr(dn, NULL_ADDR);
if (__is_valid_data_blkaddr(blkaddr)) {
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
- DATA_GENERIC_ENHANCE))
+ if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE))
+ continue;
+ if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE))
continue;
if (compressed_cluster)
valid_blocks++;
@@ -818,8 +836,6 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw)
*/
if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE))
return true;
- if (f2fs_lfs_mode(sbi) && rw == WRITE && F2FS_IO_ALIGNED(sbi))
- return true;
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
return true;
@@ -1192,7 +1208,6 @@ next_dnode:
!f2fs_is_valid_blkaddr(sbi, *blkaddr,
DATA_GENERIC_ENHANCE)) {
f2fs_put_dnode(&dn);
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
return -EFSCORRUPTED;
}
@@ -1478,7 +1493,6 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr,
DATA_GENERIC_ENHANCE)) {
ret = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
break;
}
@@ -1662,10 +1676,12 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
}
filemap_invalidate_unlock(mapping);
f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ if (ret)
+ return ret;
/* write out all moved pages, if possible */
filemap_invalidate_lock(mapping);
- filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
+ ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
filemap_invalidate_unlock(mapping);
@@ -1731,9 +1747,11 @@ next_alloc:
f2fs_down_write(&sbi->pin_sem);
- f2fs_lock_op(sbi);
- f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
- f2fs_unlock_op(sbi);
+ err = f2fs_allocate_pinning_section(sbi);
+ if (err) {
+ f2fs_up_write(&sbi->pin_sem);
+ goto out_err;
+ }
map.m_seg_type = CURSEG_COLD_DATA_PINNED;
err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
@@ -2066,7 +2084,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
inode_lock(inode);
- if (!f2fs_disable_compressed_file(inode)) {
+ if (!f2fs_disable_compressed_file(inode) ||
+ f2fs_is_pinned_file(inode)) {
ret = -EINVAL;
goto out;
}
@@ -2243,8 +2262,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
case F2FS_GOING_DOWN_METASYNC:
/* do checkpoint only */
ret = f2fs_sync_fs(sb, 1);
- if (ret)
+ if (ret) {
+ if (ret == -EIO)
+ ret = 0;
goto out;
+ }
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NOSYNC:
@@ -2260,6 +2282,8 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
set_sbi_flag(sbi, SBI_IS_DIRTY);
/* do checkpoint only */
ret = f2fs_sync_fs(sb, 1);
+ if (ret == -EIO)
+ ret = 0;
goto out;
default:
ret = -EINVAL;
@@ -2578,7 +2602,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
.m_may_create = false };
struct extent_info ei = {};
pgoff_t pg_start, pg_end, next_pgofs;
- unsigned int blk_per_seg = sbi->blocks_per_seg;
unsigned int total = 0, sec_num;
block_t blk_end = 0;
bool fragmented = false;
@@ -2687,7 +2710,8 @@ do_map:
set_inode_flag(inode, FI_SKIP_WRITES);
idx = map.m_lblk;
- while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
+ while (idx < map.m_lblk + map.m_len &&
+ cnt < BLKS_PER_SEG(sbi)) {
struct page *page;
page = f2fs_get_lock_data_page(inode, idx, true);
@@ -2707,7 +2731,7 @@ do_map:
map.m_lblk = idx;
check:
- if (map.m_lblk < pg_end && cnt < blk_per_seg)
+ if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi))
goto do_map;
clear_inode_flag(inode, FI_SKIP_WRITES);
@@ -2976,8 +3000,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
__is_large_section(sbi)) {
- f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1",
- range.dev_num, sbi->s_ndevs, sbi->segs_per_sec);
+ f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1",
+ range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi));
return -EINVAL;
}
@@ -3183,6 +3207,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
__u32 pin;
int ret = 0;
@@ -3192,7 +3217,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
if (!S_ISREG(inode->i_mode))
return -EINVAL;
- if (f2fs_readonly(F2FS_I_SB(inode)->sb))
+ if (f2fs_readonly(sbi->sb))
return -EROFS;
ret = mnt_want_write_file(filp);
@@ -3205,9 +3230,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
clear_inode_flag(inode, FI_PIN_FILE);
f2fs_i_gc_failures_write(inode, 0);
goto done;
+ } else if (f2fs_is_pinned_file(inode)) {
+ goto done;
}
- if (f2fs_should_update_outplace(inode, NULL)) {
+ if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
+ ret = -EFBIG;
+ goto out;
+ }
+
+ /* Let's allow file pinning on zoned device. */
+ if (!f2fs_sb_has_blkzoned(sbi) &&
+ f2fs_should_update_outplace(inode, NULL)) {
ret = -EINVAL;
goto out;
}
@@ -3229,7 +3263,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
set_inode_flag(inode, FI_PIN_FILE);
ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
done:
- f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ f2fs_update_time(sbi, REQ_TIME);
out:
inode_unlock(inode);
mnt_drop_write_file(filp);
@@ -3438,10 +3472,8 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
if (!__is_valid_data_blkaddr(blkaddr))
continue;
if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
- DATA_GENERIC_ENHANCE))) {
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
+ DATA_GENERIC_ENHANCE)))
return -EFSCORRUPTED;
- }
}
while (count) {
@@ -3588,10 +3620,10 @@ out:
return ret;
}
-static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
+static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count,
+ unsigned int *reserved_blocks)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
- unsigned int reserved_blocks = 0;
int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
block_t blkaddr;
int i;
@@ -3603,10 +3635,8 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
if (!__is_valid_data_blkaddr(blkaddr))
continue;
if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
- DATA_GENERIC_ENHANCE))) {
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
+ DATA_GENERIC_ENHANCE)))
return -EFSCORRUPTED;
- }
}
while (count) {
@@ -3614,40 +3644,53 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
blkcnt_t reserved;
int ret;
- for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
- blkaddr = f2fs_data_blkaddr(dn);
+ for (i = 0; i < cluster_size; i++) {
+ blkaddr = data_blkaddr(dn->inode, dn->node_page,
+ dn->ofs_in_node + i);
if (i == 0) {
- if (blkaddr == COMPRESS_ADDR)
- continue;
- dn->ofs_in_node += cluster_size;
- goto next;
+ if (blkaddr != COMPRESS_ADDR) {
+ dn->ofs_in_node += cluster_size;
+ goto next;
+ }
+ continue;
}
- if (__is_valid_data_blkaddr(blkaddr)) {
+ /*
+ * compressed cluster was not released due to it
+ * fails in release_compress_blocks(), so NEW_ADDR
+ * is a possible case.
+ */
+ if (blkaddr == NEW_ADDR ||
+ __is_valid_data_blkaddr(blkaddr)) {
compr_blocks++;
continue;
}
-
- f2fs_set_data_blkaddr(dn, NEW_ADDR);
}
reserved = cluster_size - compr_blocks;
- ret = inc_valid_block_count(sbi, dn->inode, &reserved);
- if (ret)
+
+ /* for the case all blocks in cluster were reserved */
+ if (reserved == 1)
+ goto next;
+
+ ret = inc_valid_block_count(sbi, dn->inode, &reserved, false);
+ if (unlikely(ret))
return ret;
- if (reserved != cluster_size - compr_blocks)
- return -ENOSPC;
+ for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
+ if (f2fs_data_blkaddr(dn) == NULL_ADDR)
+ f2fs_set_data_blkaddr(dn, NEW_ADDR);
+ }
f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
- reserved_blocks += reserved;
+ *reserved_blocks += reserved;
next:
count -= cluster_size;
}
- return reserved_blocks;
+ return 0;
}
static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
@@ -3671,9 +3714,6 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
if (ret)
return ret;
- if (atomic_read(&F2FS_I(inode)->i_compr_blocks))
- goto out;
-
f2fs_balance_fs(sbi, true);
inode_lock(inode);
@@ -3683,6 +3723,9 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
goto unlock_inode;
}
+ if (atomic_read(&F2FS_I(inode)->i_compr_blocks))
+ goto unlock_inode;
+
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
@@ -3708,7 +3751,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
count = round_up(count, F2FS_I(inode)->i_cluster_size);
- ret = reserve_compress_blocks(&dn, count);
+ ret = reserve_compress_blocks(&dn, count, &reserved_blocks);
f2fs_put_dnode(&dn);
@@ -3716,23 +3759,21 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
break;
page_idx += count;
- reserved_blocks += ret;
}
filemap_invalidate_unlock(inode->i_mapping);
f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- if (ret >= 0) {
+ if (!ret) {
clear_inode_flag(inode, FI_COMPRESS_RELEASED);
inode_set_ctime_current(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
unlock_inode:
inode_unlock(inode);
-out:
mnt_drop_write_file(filp);
- if (ret >= 0) {
+ if (!ret) {
ret = put_user(reserved_blocks, (u64 __user *)arg);
} else if (reserved_blocks &&
atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
@@ -3877,8 +3918,6 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
DATA_GENERIC_ENHANCE)) {
ret = -EFSCORRUPTED;
f2fs_put_dnode(&dn);
- f2fs_handle_error(sbi,
- ERROR_INVALID_BLKADDR);
goto out;
}
@@ -3981,16 +4020,20 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
sizeof(option)))
return -EFAULT;
- if (!f2fs_compressed_file(inode) ||
- option.log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
- option.log_cluster_size > MAX_COMPRESS_LOG_SIZE ||
- option.algorithm >= COMPRESS_MAX)
+ if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
+ option.log_cluster_size > MAX_COMPRESS_LOG_SIZE ||
+ option.algorithm >= COMPRESS_MAX)
return -EINVAL;
file_start_write(filp);
inode_lock(inode);
f2fs_down_write(&F2FS_I(inode)->i_sem);
+ if (!f2fs_compressed_file(inode)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) {
ret = -EBUSY;
goto out;
@@ -4066,7 +4109,6 @@ static int f2fs_ioc_decompress_file(struct file *filp)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
pgoff_t page_idx = 0, last_idx;
- unsigned int blk_per_seg = sbi->blocks_per_seg;
int cluster_size = fi->i_cluster_size;
int count, ret;
@@ -4110,7 +4152,7 @@ static int f2fs_ioc_decompress_file(struct file *filp)
if (ret < 0)
break;
- if (get_dirty_pages(inode) >= blk_per_seg) {
+ if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
ret = filemap_fdatawrite(inode->i_mapping);
if (ret < 0)
break;
@@ -4145,7 +4187,6 @@ static int f2fs_ioc_compress_file(struct file *filp)
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
pgoff_t page_idx = 0, last_idx;
- unsigned int blk_per_seg = sbi->blocks_per_seg;
int cluster_size = F2FS_I(inode)->i_cluster_size;
int count, ret;
@@ -4188,7 +4229,7 @@ static int f2fs_ioc_compress_file(struct file *filp)
if (ret < 0)
break;
- if (get_dirty_pages(inode) >= blk_per_seg) {
+ if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
ret = filemap_fdatawrite(inode->i_mapping);
if (ret < 0)
break;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index a079eebfb080..8852814dab7f 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -259,7 +259,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->ofs_unit = 1;
} else {
p->gc_mode = select_gc_type(sbi, gc_type);
- p->ofs_unit = sbi->segs_per_sec;
+ p->ofs_unit = SEGS_PER_SEC(sbi);
if (__is_large_section(sbi)) {
p->dirty_bitmap = dirty_i->dirty_secmap;
p->max_search = count_bits(p->dirty_bitmap,
@@ -280,11 +280,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
- /* let's select beginning hot/small space first in no_heap mode*/
+ /* let's select beginning hot/small space first. */
if (f2fs_need_rand_seg(sbi))
- p->offset = get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
- else if (test_opt(sbi, NOHEAP) &&
- (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
+ p->offset = get_random_u32_below(MAIN_SECS(sbi) *
+ SEGS_PER_SEC(sbi));
+ else if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
p->offset = 0;
else
p->offset = SIT_I(sbi)->last_victim[p->gc_mode];
@@ -295,13 +295,13 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
{
/* SSR allocates in a segment unit */
if (p->alloc_mode == SSR)
- return sbi->blocks_per_seg;
+ return BLKS_PER_SEG(sbi);
else if (p->alloc_mode == AT_SSR)
return UINT_MAX;
/* LFS */
if (p->gc_mode == GC_GREEDY)
- return 2 * sbi->blocks_per_seg * p->ofs_unit;
+ return SEGS_TO_BLKS(sbi, 2 * p->ofs_unit);
else if (p->gc_mode == GC_CB)
return UINT_MAX;
else if (p->gc_mode == GC_AT)
@@ -348,7 +348,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
mtime = div_u64(mtime, usable_segs_per_sec);
vblocks = div_u64(vblocks, usable_segs_per_sec);
- u = (vblocks * 100) >> sbi->log_blocks_per_seg;
+ u = BLKS_TO_SEGS(sbi, vblocks * 100);
/* Handle if the system time has changed by the user */
if (mtime < sit_i->min_mtime)
@@ -496,9 +496,9 @@ static void add_victim_entry(struct f2fs_sb_info *sbi,
return;
}
- for (i = 0; i < sbi->segs_per_sec; i++)
+ for (i = 0; i < SEGS_PER_SEC(sbi); i++)
mtime += get_seg_entry(sbi, start + i)->mtime;
- mtime = div_u64(mtime, sbi->segs_per_sec);
+ mtime = div_u64(mtime, SEGS_PER_SEC(sbi));
/* Handle if the system time has changed by the user */
if (mtime < sit_i->min_mtime)
@@ -599,7 +599,6 @@ static void atssr_lookup_victim(struct f2fs_sb_info *sbi,
unsigned long long age;
unsigned long long max_mtime = sit_i->dirty_max_mtime;
unsigned long long min_mtime = sit_i->dirty_min_mtime;
- unsigned int seg_blocks = sbi->blocks_per_seg;
unsigned int vblocks;
unsigned int dirty_threshold = max(am->max_candidate_count,
am->candidate_ratio *
@@ -629,7 +628,7 @@ next_node:
f2fs_bug_on(sbi, !vblocks);
/* rare case */
- if (vblocks == seg_blocks)
+ if (vblocks == BLKS_PER_SEG(sbi))
goto skip_node;
iter++;
@@ -755,7 +754,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result,
int ret = 0;
mutex_lock(&dirty_i->seglist_lock);
- last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec;
+ last_segment = MAIN_SECS(sbi) * SEGS_PER_SEC(sbi);
p.alloc_mode = alloc_mode;
p.age = age;
@@ -896,7 +895,7 @@ next:
else
sm->last_victim[p.gc_mode] = segno + p.ofs_unit;
sm->last_victim[p.gc_mode] %=
- (MAIN_SECS(sbi) * sbi->segs_per_sec);
+ (MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
break;
}
}
@@ -1184,7 +1183,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
.op_flags = 0,
.encrypted_page = NULL,
.in_list = 0,
- .retry = 0,
};
int err;
@@ -1197,7 +1195,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
DATA_GENERIC_ENHANCE_READ))) {
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto put_page;
}
goto got_it;
@@ -1216,7 +1213,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
DATA_GENERIC_ENHANCE))) {
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto put_page;
}
got_it:
@@ -1273,7 +1269,6 @@ static int move_data_block(struct inode *inode, block_t bidx,
.op_flags = 0,
.encrypted_page = NULL,
.in_list = 0,
- .retry = 0,
};
struct dnode_of_data dn;
struct f2fs_summary sum;
@@ -1364,8 +1359,13 @@ static int move_data_block(struct inode *inode, block_t bidx,
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
/* allocate block address */
- f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
+ err = f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
&sum, type, NULL);
+ if (err) {
+ f2fs_put_page(mpage, 1);
+ /* filesystem should shutdown, no need to recovery block */
+ goto up_out;
+ }
fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -1393,18 +1393,12 @@ static int move_data_block(struct inode *inode, block_t bidx,
fio.op_flags = REQ_SYNC;
fio.new_blkaddr = newaddr;
f2fs_submit_page_write(&fio);
- if (fio.retry) {
- err = -EAGAIN;
- if (PageWriteback(fio.encrypted_page))
- end_page_writeback(fio.encrypted_page);
- goto put_page_out;
- }
f2fs_update_iostat(fio.sbi, NULL, FS_GC_DATA_IO, F2FS_BLKSIZE);
f2fs_update_data_blkaddr(&dn, newaddr);
set_inode_flag(inode, FI_APPEND_WRITE);
-put_page_out:
+
f2fs_put_page(fio.encrypted_page, 1);
recover_block:
if (err)
@@ -1678,7 +1672,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
struct f2fs_summary_block *sum;
struct blk_plug plug;
unsigned int segno = start_segno;
- unsigned int end_segno = start_segno + sbi->segs_per_sec;
+ unsigned int end_segno = start_segno + SEGS_PER_SEC(sbi);
int seg_freed = 0, migrated = 0;
unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
SUM_TYPE_DATA : SUM_TYPE_NODE;
@@ -1686,7 +1680,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
int submitted = 0;
if (__is_large_section(sbi))
- end_segno = rounddown(end_segno, sbi->segs_per_sec);
+ end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi));
/*
* zone-capacity can be less than zone-size in zoned devices,
@@ -1694,7 +1688,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
* calculate the end segno in the zone which can be garbage collected
*/
if (f2fs_sb_has_blkzoned(sbi))
- end_segno -= sbi->segs_per_sec -
+ end_segno -= SEGS_PER_SEC(sbi) -
f2fs_usable_segs_in_sec(sbi, segno);
sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type);
@@ -1983,10 +1977,43 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
init_atgc_management(sbi);
}
+int f2fs_gc_range(struct f2fs_sb_info *sbi,
+ unsigned int start_seg, unsigned int end_seg,
+ bool dry_run, unsigned int dry_run_sections)
+{
+ unsigned int segno;
+ unsigned int gc_secs = dry_run_sections;
+
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
+ for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
+ struct gc_inode_list gc_list = {
+ .ilist = LIST_HEAD_INIT(gc_list.ilist),
+ .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
+ };
+
+ do_garbage_collect(sbi, segno, &gc_list, FG_GC,
+ dry_run_sections == 0);
+ put_gc_inode(&gc_list);
+
+ if (!dry_run && get_valid_blocks(sbi, segno, true))
+ return -EAGAIN;
+ if (dry_run && dry_run_sections &&
+ !get_valid_blocks(sbi, segno, true) && --gc_secs == 0)
+ break;
+
+ if (fatal_signal_pending(current))
+ return -ERESTARTSYS;
+ }
+
+ return 0;
+}
+
static int free_segment_range(struct f2fs_sb_info *sbi,
- unsigned int secs, bool gc_only)
+ unsigned int secs, bool dry_run)
{
- unsigned int segno, next_inuse, start, end;
+ unsigned int next_inuse, start, end;
struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
int gc_mode, gc_type;
int err = 0;
@@ -1994,7 +2021,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
/* Force block allocation for GC */
MAIN_SECS(sbi) -= secs;
- start = MAIN_SECS(sbi) * sbi->segs_per_sec;
+ start = MAIN_SECS(sbi) * SEGS_PER_SEC(sbi);
end = MAIN_SEGS(sbi) - 1;
mutex_lock(&DIRTY_I(sbi)->seglist_lock);
@@ -2008,29 +2035,15 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
/* Move out cursegs from the target range */
- for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
- f2fs_allocate_segment_for_resize(sbi, type, start, end);
-
- /* do GC to move out valid blocks in the range */
- for (segno = start; segno <= end; segno += sbi->segs_per_sec) {
- struct gc_inode_list gc_list = {
- .ilist = LIST_HEAD_INIT(gc_list.ilist),
- .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
- };
-
- do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
- put_gc_inode(&gc_list);
-
- if (!gc_only && get_valid_blocks(sbi, segno, true)) {
- err = -EAGAIN;
- goto out;
- }
- if (fatal_signal_pending(current)) {
- err = -ERESTARTSYS;
+ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++) {
+ err = f2fs_allocate_segment_for_resize(sbi, type, start, end);
+ if (err)
goto out;
- }
}
- if (gc_only)
+
+ /* do GC to move out valid blocks in the range */
+ err = f2fs_gc_range(sbi, start, end, dry_run, 0);
+ if (err || dry_run)
goto out;
stat_inc_cp_call_count(sbi, TOTAL_CALL);
@@ -2056,7 +2069,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
int segment_count;
int segment_count_main;
long long block_count;
- int segs = secs * sbi->segs_per_sec;
+ int segs = secs * SEGS_PER_SEC(sbi);
f2fs_down_write(&sbi->sb_lock);
@@ -2069,7 +2082,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
raw_sb->segment_count = cpu_to_le32(segment_count + segs);
raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs);
raw_sb->block_count = cpu_to_le64(block_count +
- (long long)segs * sbi->blocks_per_seg);
+ (long long)SEGS_TO_BLKS(sbi, segs));
if (f2fs_is_multi_device(sbi)) {
int last_dev = sbi->s_ndevs - 1;
int dev_segs =
@@ -2084,8 +2097,8 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
{
- int segs = secs * sbi->segs_per_sec;
- long long blks = (long long)segs * sbi->blocks_per_seg;
+ int segs = secs * SEGS_PER_SEC(sbi);
+ long long blks = SEGS_TO_BLKS(sbi, segs);
long long user_block_count =
le64_to_cpu(F2FS_CKPT(sbi)->user_block_count);
@@ -2127,7 +2140,7 @@ int f2fs_resize_fs(struct file *filp, __u64 block_count)
int last_dev = sbi->s_ndevs - 1;
__u64 last_segs = FDEV(last_dev).total_segments;
- if (block_count + last_segs * sbi->blocks_per_seg <=
+ if (block_count + SEGS_TO_BLKS(sbi, last_segs) <=
old_block_count)
return -EINVAL;
}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 28a00942802c..9c0d06c4d19a 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -96,7 +96,7 @@ static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi)
if (f2fs_sb_has_blkzoned(sbi))
return free_segs_blk_count_zoned(sbi);
- return free_segments(sbi) << sbi->log_blocks_per_seg;
+ return SEGS_TO_BLKS(sbi, free_segments(sbi));
}
static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
@@ -104,7 +104,7 @@ static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
block_t free_blks, ovp_blks;
free_blks = free_segs_blk_count(sbi);
- ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
+ ovp_blks = SEGS_TO_BLKS(sbi, overprovision_segments(sbi));
if (free_blks < ovp_blks)
return 0;
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index f7f63a567d86..e54f8c08bda8 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -851,7 +851,7 @@ out:
static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode, bool is_whiteout,
- struct inode **new_inode)
+ struct inode **new_inode, struct f2fs_filename *fname)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct inode *inode;
@@ -879,7 +879,7 @@ static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
if (err)
goto out;
- err = f2fs_do_tmpfile(inode, dir);
+ err = f2fs_do_tmpfile(inode, dir, fname);
if (err)
goto release_out;
@@ -930,22 +930,24 @@ static int f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
- err = __f2fs_tmpfile(idmap, dir, file, mode, false, NULL);
+ err = __f2fs_tmpfile(idmap, dir, file, mode, false, NULL, NULL);
return finish_open_simple(file, err);
}
static int f2fs_create_whiteout(struct mnt_idmap *idmap,
- struct inode *dir, struct inode **whiteout)
+ struct inode *dir, struct inode **whiteout,
+ struct f2fs_filename *fname)
{
- return __f2fs_tmpfile(idmap, dir, NULL,
- S_IFCHR | WHITEOUT_MODE, true, whiteout);
+ return __f2fs_tmpfile(idmap, dir, NULL, S_IFCHR | WHITEOUT_MODE,
+ true, whiteout, fname);
}
int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct inode **new_inode)
{
- return __f2fs_tmpfile(idmap, dir, NULL, S_IFREG, false, new_inode);
+ return __f2fs_tmpfile(idmap, dir, NULL, S_IFREG,
+ false, new_inode, NULL);
}
static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
@@ -989,7 +991,14 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
}
if (flags & RENAME_WHITEOUT) {
- err = f2fs_create_whiteout(idmap, old_dir, &whiteout);
+ struct f2fs_filename fname;
+
+ err = f2fs_setup_filename(old_dir, &old_dentry->d_name,
+ 0, &fname);
+ if (err)
+ return err;
+
+ err = f2fs_create_whiteout(idmap, old_dir, &whiteout, &fname);
if (err)
return err;
}
@@ -1104,14 +1113,11 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
iput(whiteout);
}
- if (old_is_dir) {
- if (old_dir_entry)
- f2fs_set_link(old_inode, old_dir_entry,
- old_dir_page, new_dir);
- else
- f2fs_put_page(old_dir_page, 0);
+ if (old_dir_entry)
+ f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir);
+ if (old_is_dir)
f2fs_i_links_write(old_dir, false);
- }
+
if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) {
f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
if (S_ISDIR(old_inode->i_mode))
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 9b546fd21010..b3de6d6cdb02 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -852,21 +852,29 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) &&
f2fs_sb_has_readonly(sbi)) {
- unsigned int c_len = f2fs_cluster_blocks_are_contiguous(dn);
+ unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
+ unsigned int ofs_in_node = dn->ofs_in_node;
+ pgoff_t fofs = index;
+ unsigned int c_len;
block_t blkaddr;
+ /* should align fofs and ofs_in_node to cluster_size */
+ if (fofs % cluster_size) {
+ fofs = round_down(fofs, cluster_size);
+ ofs_in_node = round_down(ofs_in_node, cluster_size);
+ }
+
+ c_len = f2fs_cluster_blocks_are_contiguous(dn, ofs_in_node);
if (!c_len)
goto out;
- blkaddr = f2fs_data_blkaddr(dn);
+ blkaddr = data_blkaddr(dn->inode, dn->node_page, ofs_in_node);
if (blkaddr == COMPRESS_ADDR)
blkaddr = data_blkaddr(dn->inode, dn->node_page,
- dn->ofs_in_node + 1);
+ ofs_in_node + 1);
f2fs_update_read_extent_tree_range_compressed(dn->inode,
- index, blkaddr,
- F2FS_I(dn->inode)->i_cluster_size,
- c_len);
+ fofs, blkaddr, cluster_size, c_len);
}
out:
return 0;
@@ -1919,7 +1927,7 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
for (i = 0; i < nr_folios; i++) {
struct page *page = &fbatch.folios[i]->page;
- if (!IS_DNODE(page))
+ if (!IS_INODE(page))
continue;
lock_page(page);
@@ -2841,7 +2849,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
int i, idx, last_offset, nrpages;
/* scan the node segment */
- last_offset = sbi->blocks_per_seg;
+ last_offset = BLKS_PER_SEG(sbi);
addr = START_BLOCK(sbi, segno);
sum_entry = &sum->entries[0];
@@ -3158,7 +3166,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
return 0;
- nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
+ nat_bits_addr = __start_cp_addr(sbi) + BLKS_PER_SEG(sbi) -
nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++) {
struct page *page;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 5bd16a95eef8..6aea13024ac1 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -208,10 +208,10 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
block_addr = (pgoff_t)(nm_i->nat_blkaddr +
(block_off << 1) -
- (block_off & (sbi->blocks_per_seg - 1)));
+ (block_off & (BLKS_PER_SEG(sbi) - 1)));
if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
- block_addr += sbi->blocks_per_seg;
+ block_addr += BLKS_PER_SEG(sbi);
return block_addr;
}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index d0f24ccbd1ac..e7bf15b8240a 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -354,7 +354,7 @@ static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi,
if (blkaddr + 1 == next_blkaddr)
ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS,
ra_blocks * 2);
- else if (next_blkaddr % sbi->blocks_per_seg)
+ else if (next_blkaddr % BLKS_PER_SEG(sbi))
ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS,
ra_blocks / 2);
return ra_blocks;
@@ -611,6 +611,19 @@ truncate_out:
return 0;
}
+static int f2fs_reserve_new_block_retry(struct dnode_of_data *dn)
+{
+ int i, err = 0;
+
+ for (i = DEFAULT_FAILURE_RETRY_COUNT; i > 0; i--) {
+ err = f2fs_reserve_new_block(dn);
+ if (!err)
+ break;
+ }
+
+ return err;
+}
+
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
struct page *page)
{
@@ -680,14 +693,12 @@ retry_dn:
if (__is_valid_data_blkaddr(src) &&
!f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto err;
}
if (__is_valid_data_blkaddr(dest) &&
!f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto err;
}
@@ -712,14 +723,8 @@ retry_dn:
*/
if (dest == NEW_ADDR) {
f2fs_truncate_data_blocks_range(&dn, 1);
- do {
- err = f2fs_reserve_new_block(&dn);
- if (err == -ENOSPC) {
- f2fs_bug_on(sbi, 1);
- break;
- }
- } while (err &&
- IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION));
+
+ err = f2fs_reserve_new_block_retry(&dn);
if (err)
goto err;
continue;
@@ -727,16 +732,8 @@ retry_dn:
/* dest is valid block, try to recover from src to dest */
if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
-
if (src == NULL_ADDR) {
- do {
- err = f2fs_reserve_new_block(&dn);
- if (err == -ENOSPC) {
- f2fs_bug_on(sbi, 1);
- break;
- }
- } while (err &&
- IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION));
+ err = f2fs_reserve_new_block_retry(&dn);
if (err)
goto err;
}
@@ -756,8 +753,6 @@ retry_prev:
f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u",
dest, inode->i_ino, dn.ofs_in_node);
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi,
- ERROR_INVALID_BLKADDR);
goto err;
}
@@ -852,7 +847,7 @@ next:
f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
}
if (!err)
- f2fs_allocate_new_segments(sbi);
+ err = f2fs_allocate_new_segments(sbi);
return err;
}
@@ -864,7 +859,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
int ret = 0;
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
- bool fix_curseg_write_pointer = false;
if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE))
f2fs_info(sbi, "recover fsync data on readonly fs");
@@ -895,8 +889,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
else
f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE);
skip:
- fix_curseg_write_pointer = !check_only || list_empty(&inode_list);
-
destroy_fsync_dnodes(&inode_list, err);
destroy_fsync_dnodes(&tmp_inode_list, err);
@@ -914,11 +906,13 @@ skip:
* and the f2fs is not read only, check and fix zoned block devices'
* write pointer consistency.
*/
- if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) &&
- f2fs_sb_has_blkzoned(sbi)) {
- err = f2fs_fix_curseg_write_pointer(sbi);
- if (!err)
- err = f2fs_check_write_pointer(sbi);
+ if (f2fs_sb_has_blkzoned(sbi) && !f2fs_readonly(sbi->sb)) {
+ int err2 = f2fs_fix_curseg_write_pointer(sbi);
+
+ if (!err2)
+ err2 = f2fs_check_write_pointer(sbi);
+ if (err2)
+ err = err2;
ret = err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index e1065ba70207..4fd76e867e0a 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -192,6 +192,9 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean)
if (!f2fs_is_atomic_file(inode))
return;
+ if (clean)
+ truncate_inode_pages_final(inode->i_mapping);
+
release_atomic_write_cnt(inode);
clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
clear_inode_flag(inode, FI_ATOMIC_REPLACE);
@@ -201,7 +204,6 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean)
F2FS_I(inode)->atomic_write_task = NULL;
if (clean) {
- truncate_inode_pages_final(inode->i_mapping);
f2fs_i_size_write(inode, fi->original_i_size);
fi->original_i_size = 0;
}
@@ -248,7 +250,7 @@ retry:
} else {
blkcnt_t count = 1;
- err = inc_valid_block_count(sbi, inode, &count);
+ err = inc_valid_block_count(sbi, inode, &count, true);
if (err) {
f2fs_put_dnode(&dn);
return err;
@@ -334,8 +336,6 @@ static int __f2fs_commit_atomic_write(struct inode *inode)
DATA_GENERIC_ENHANCE)) {
f2fs_put_dnode(&dn);
ret = -EFSCORRUPTED;
- f2fs_handle_error(sbi,
- ERROR_INVALID_BLKADDR);
goto out;
}
@@ -400,6 +400,9 @@ int f2fs_commit_atomic_write(struct inode *inode)
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
+ if (f2fs_cp_error(sbi))
+ return;
+
if (time_to_inject(sbi, FAULT_CHECKPOINT))
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
@@ -448,8 +451,8 @@ static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
- unsigned int threshold = sbi->blocks_per_seg * factor *
- DEFAULT_DIRTY_THRESHOLD;
+ unsigned int threshold =
+ SEGS_TO_BLKS(sbi, (factor * DEFAULT_DIRTY_THRESHOLD));
unsigned int global_threshold = threshold * 3 / 2;
if (dents >= threshold || qdata >= threshold ||
@@ -872,7 +875,7 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
{
int ovp_hole_segs =
(overprovision_segments(sbi) - reserved_segments(sbi));
- block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
+ block_t ovp_holes = SEGS_TO_BLKS(sbi, ovp_hole_segs);
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
block_t holes[2] = {0, 0}; /* DATA and NODE */
block_t unusable;
@@ -901,11 +904,16 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
{
int ovp_hole_segs =
(overprovision_segments(sbi) - reserved_segments(sbi));
+
+ if (F2FS_OPTION(sbi).unusable_cap_perc == 100)
+ return 0;
if (unusable > F2FS_OPTION(sbi).unusable_cap)
return -EAGAIN;
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
dirty_segments(sbi) > ovp_hole_segs)
return -EAGAIN;
+ if (has_not_enough_free_secs(sbi, 0, 0))
+ return -EAGAIN;
return 0;
}
@@ -1132,8 +1140,7 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
struct seg_entry *sentry;
unsigned int segno;
block_t blk = start;
- unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
- unsigned long *map;
+ unsigned long offset, size, *map;
while (blk < end) {
segno = GET_SEGNO(sbi, blk);
@@ -1143,7 +1150,7 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
if (end < START_BLOCK(sbi, segno + 1))
size = GET_BLKOFF_FROM_SEG0(sbi, end);
else
- size = max_blocks;
+ size = BLKS_PER_SEG(sbi);
map = (unsigned long *)(sentry->cur_valid_map);
offset = __find_rev_next_bit(map, size, offset);
f2fs_bug_on(sbi, offset != size);
@@ -2048,7 +2055,6 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
bool check_only)
{
int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
- int max_blocks = sbi->blocks_per_seg;
struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
@@ -2060,8 +2066,9 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
int i;
- if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) ||
- !f2fs_block_unit_discard(sbi))
+ if (se->valid_blocks == BLKS_PER_SEG(sbi) ||
+ !f2fs_hw_support_discard(sbi) ||
+ !f2fs_block_unit_discard(sbi))
return false;
if (!force) {
@@ -2078,13 +2085,14 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
while (force || SM_I(sbi)->dcc_info->nr_discards <=
SM_I(sbi)->dcc_info->max_discards) {
- start = __find_rev_next_bit(dmap, max_blocks, end + 1);
- if (start >= max_blocks)
+ start = __find_rev_next_bit(dmap, BLKS_PER_SEG(sbi), end + 1);
+ if (start >= BLKS_PER_SEG(sbi))
break;
- end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
- if (force && start && end != max_blocks
- && (end - start) < cpc->trim_minlen)
+ end = __find_rev_next_zero_bit(dmap,
+ BLKS_PER_SEG(sbi), start + 1);
+ if (force && start && end != BLKS_PER_SEG(sbi) &&
+ (end - start) < cpc->trim_minlen)
continue;
if (check_only)
@@ -2166,8 +2174,8 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
start + 1);
if (section_alignment) {
- start = rounddown(start, sbi->segs_per_sec);
- end = roundup(end, sbi->segs_per_sec);
+ start = rounddown(start, SEGS_PER_SEC(sbi));
+ end = roundup(end, SEGS_PER_SEC(sbi));
}
for (i = start; i < end; i++) {
@@ -2186,7 +2194,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
if (!f2fs_sb_has_blkzoned(sbi) &&
(!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) {
f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
- (end - start) << sbi->log_blocks_per_seg);
+ SEGS_TO_BLKS(sbi, end - start));
continue;
}
next:
@@ -2195,9 +2203,9 @@ next:
if (!IS_CURSEC(sbi, secno) &&
!get_valid_blocks(sbi, start, true))
f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
- sbi->segs_per_sec << sbi->log_blocks_per_seg);
+ BLKS_PER_SEC(sbi));
- start = start_segno + sbi->segs_per_sec;
+ start = start_segno + SEGS_PER_SEC(sbi);
if (start < end)
goto next;
else
@@ -2216,7 +2224,7 @@ next:
find_next:
if (is_valid) {
next_pos = find_next_zero_bit_le(entry->discard_map,
- sbi->blocks_per_seg, cur_pos);
+ BLKS_PER_SEG(sbi), cur_pos);
len = next_pos - cur_pos;
if (f2fs_sb_has_blkzoned(sbi) ||
@@ -2228,13 +2236,13 @@ find_next:
total_len += len;
} else {
next_pos = find_next_bit_le(entry->discard_map,
- sbi->blocks_per_seg, cur_pos);
+ BLKS_PER_SEG(sbi), cur_pos);
}
skip:
cur_pos = next_pos;
is_valid = !is_valid;
- if (cur_pos < sbi->blocks_per_seg)
+ if (cur_pos < BLKS_PER_SEG(sbi))
goto find_next;
release_discard_addr(entry);
@@ -2251,6 +2259,12 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
int err = 0;
+ if (f2fs_sb_has_readonly(sbi)) {
+ f2fs_info(sbi,
+ "Skip to start discard thread for readonly image");
+ return 0;
+ }
+
if (!f2fs_realtime_discard_enable(sbi))
return 0;
@@ -2283,7 +2297,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE;
if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
- dcc->discard_granularity = sbi->blocks_per_seg;
+ dcc->discard_granularity = BLKS_PER_SEG(sbi);
else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
dcc->discard_granularity = BLKS_PER_SEC(sbi);
@@ -2297,7 +2311,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
atomic_set(&dcc->queued_discard, 0);
atomic_set(&dcc->discard_cmd_cnt, 0);
dcc->nr_discards = 0;
- dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
+ dcc->max_discards = SEGS_TO_BLKS(sbi, MAIN_SEGS(sbi));
dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
@@ -2405,6 +2419,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
#endif
segno = GET_SEGNO(sbi, blkaddr);
+ if (segno == NULL_SEGNO)
+ return;
se = get_seg_entry(sbi, segno);
new_vblocks = se->valid_blocks + del;
@@ -2546,7 +2562,7 @@ static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int typ
struct curseg_info *curseg = CURSEG_I(sbi, type);
if (sbi->ckpt->alloc_type[type] == SSR)
- return sbi->blocks_per_seg;
+ return BLKS_PER_SEG(sbi);
return curseg->next_blkoff;
}
@@ -2634,7 +2650,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
unsigned int segno = curseg->segno + 1;
struct free_segmap_info *free_i = FREE_I(sbi);
- if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
+ if (segno < MAIN_SEGS(sbi) && segno % SEGS_PER_SEC(sbi))
return !test_bit(segno, free_i->free_segmap);
return 0;
}
@@ -2643,54 +2659,51 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
* Find a new segment from the free segments bitmap to right order
* This function should be returned with success, otherwise BUG
*/
-static void get_new_segment(struct f2fs_sb_info *sbi,
- unsigned int *newseg, bool new_sec, int dir)
+static int get_new_segment(struct f2fs_sb_info *sbi,
+ unsigned int *newseg, bool new_sec, bool pinning)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int segno, secno, zoneno;
unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
- unsigned int left_start = hint;
bool init = true;
- int go_left = 0;
int i;
+ int ret = 0;
spin_lock(&free_i->segmap_lock);
- if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
+ if (time_to_inject(sbi, FAULT_NO_SEGMENT)) {
+ ret = -ENOSPC;
+ goto out_unlock;
+ }
+
+ if (!new_sec && ((*newseg + 1) % SEGS_PER_SEC(sbi))) {
segno = find_next_zero_bit(free_i->free_segmap,
GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
goto got_it;
}
+
+ /*
+ * If we format f2fs on zoned storage, let's try to get pinned sections
+ * from beginning of the storage, which should be a conventional one.
+ */
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
+ hint = GET_SEC_FROM_SEG(sbi, segno);
+ }
+
find_other_zone:
secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
if (secno >= MAIN_SECS(sbi)) {
- if (dir == ALLOC_RIGHT) {
- secno = find_first_zero_bit(free_i->free_secmap,
+ secno = find_first_zero_bit(free_i->free_secmap,
MAIN_SECS(sbi));
- f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
- } else {
- go_left = 1;
- left_start = hint - 1;
+ if (secno >= MAIN_SECS(sbi)) {
+ ret = -ENOSPC;
+ goto out_unlock;
}
}
- if (go_left == 0)
- goto skip_left;
-
- while (test_bit(left_start, free_i->free_secmap)) {
- if (left_start > 0) {
- left_start--;
- continue;
- }
- left_start = find_first_zero_bit(free_i->free_secmap,
- MAIN_SECS(sbi));
- f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
- break;
- }
- secno = left_start;
-skip_left:
segno = GET_SEG_FROM_SEC(sbi, secno);
zoneno = GET_ZONE_FROM_SEC(sbi, secno);
@@ -2701,21 +2714,13 @@ skip_left:
goto got_it;
if (zoneno == old_zoneno)
goto got_it;
- if (dir == ALLOC_LEFT) {
- if (!go_left && zoneno + 1 >= total_zones)
- goto got_it;
- if (go_left && zoneno == 0)
- goto got_it;
- }
for (i = 0; i < NR_CURSEG_TYPE; i++)
if (CURSEG_I(sbi, i)->zone == zoneno)
break;
if (i < NR_CURSEG_TYPE) {
/* zone is in user, try another */
- if (go_left)
- hint = zoneno * sbi->secs_per_zone - 1;
- else if (zoneno + 1 >= total_zones)
+ if (zoneno + 1 >= total_zones)
hint = 0;
else
hint = (zoneno + 1) * sbi->secs_per_zone;
@@ -2725,9 +2730,23 @@ skip_left:
got_it:
/* set it as dirty segment in free segmap */
f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
+
+ /* no free section in conventional zone */
+ if (new_sec && pinning &&
+ !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
__set_inuse(sbi, segno);
*newseg = segno;
+out_unlock:
spin_unlock(&free_i->segmap_lock);
+
+ if (ret == -ENOSPC) {
+ f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT);
+ f2fs_bug_on(sbi, 1);
+ }
+ return ret;
}
static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
@@ -2736,6 +2755,10 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
struct summary_footer *sum_footer;
unsigned short seg_type = curseg->seg_type;
+ /* only happen when get_new_segment() fails */
+ if (curseg->next_segno == NULL_SEGNO)
+ return;
+
curseg->inited = true;
curseg->segno = curseg->next_segno;
curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
@@ -2761,9 +2784,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
sanity_check_seg_type(sbi, seg_type);
if (f2fs_need_rand_seg(sbi))
- return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
+ return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
- /* if segs_per_sec is large than 1, we need to keep original policy. */
if (__is_large_section(sbi))
return curseg->segno;
@@ -2774,8 +2796,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return 0;
- if (test_opt(sbi, NOHEAP) &&
- (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
+ if (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type))
return 0;
if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
@@ -2792,30 +2813,31 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
* Allocate a current working segment.
* This function always allocates a free segment in LFS manner.
*/
-static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
+static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
- unsigned short seg_type = curseg->seg_type;
unsigned int segno = curseg->segno;
- int dir = ALLOC_LEFT;
+ bool pinning = type == CURSEG_COLD_DATA_PINNED;
+ int ret;
if (curseg->inited)
- write_sum_page(sbi, curseg->sum_blk,
- GET_SUM_BLOCK(sbi, segno));
- if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
- dir = ALLOC_RIGHT;
-
- if (test_opt(sbi, NOHEAP))
- dir = ALLOC_RIGHT;
+ write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
segno = __get_next_segno(sbi, type);
- get_new_segment(sbi, &segno, new_sec, dir);
+ ret = get_new_segment(sbi, &segno, new_sec, pinning);
+ if (ret) {
+ if (ret == -ENOSPC)
+ curseg->segno = NULL_SEGNO;
+ return ret;
+ }
+
curseg->next_segno = segno;
reset_curseg(sbi, type, 1);
curseg->alloc_type = LFS;
if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
curseg->fragment_remained_chunk =
get_random_u32_inclusive(1, sbi->max_fragment_chunk);
+ return 0;
}
static int __next_free_blkoff(struct f2fs_sb_info *sbi,
@@ -2831,7 +2853,7 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi,
for (i = 0; i < entries; i++)
target_map[i] = ckpt_map[i] | cur_map[i];
- return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
+ return __find_rev_next_zero_bit(target_map, BLKS_PER_SEG(sbi), start);
}
static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
@@ -2842,14 +2864,14 @@ static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
{
- return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg;
+ return __next_free_blkoff(sbi, segno, 0) < BLKS_PER_SEG(sbi);
}
/*
* This function always allocates a used segment(from dirty seglist) by SSR
* manner, so it should recover the existing segment information of valid blocks
*/
-static void change_curseg(struct f2fs_sb_info *sbi, int type)
+static int change_curseg(struct f2fs_sb_info *sbi, int type)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2874,21 +2896,23 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
if (IS_ERR(sum_page)) {
/* GC won't be able to use stale summary pages by cp_error */
memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
- return;
+ return PTR_ERR(sum_page);
}
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
f2fs_put_page(sum_page, 1);
+ return 0;
}
static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
int alloc_mode, unsigned long long age);
-static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
+static int get_atssr_segment(struct f2fs_sb_info *sbi, int type,
int target_type, int alloc_mode,
unsigned long long age)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
+ int ret = 0;
curseg->seg_type = target_type;
@@ -2896,38 +2920,41 @@ static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
curseg->seg_type = se->type;
- change_curseg(sbi, type);
+ ret = change_curseg(sbi, type);
} else {
/* allocate cold segment by default */
curseg->seg_type = CURSEG_COLD_DATA;
- new_curseg(sbi, type, true);
+ ret = new_curseg(sbi, type, true);
}
stat_inc_seg_type(sbi, curseg);
+ return ret;
}
-static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
+static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
+ int ret = 0;
if (!sbi->am.atgc_enabled)
- return;
+ return 0;
f2fs_down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
down_write(&SIT_I(sbi)->sentry_lock);
- get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
+ ret = get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC,
+ CURSEG_COLD_DATA, SSR, 0);
up_write(&SIT_I(sbi)->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
f2fs_up_read(&SM_I(sbi)->curseg_lock);
-
+ return ret;
}
-void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
+int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
{
- __f2fs_init_atgc_curseg(sbi);
+ return __f2fs_init_atgc_curseg(sbi);
}
static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
@@ -3055,11 +3082,12 @@ static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
return false;
}
-void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int segno;
+ int ret = 0;
f2fs_down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
@@ -3070,9 +3098,9 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
goto unlock;
if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
- change_curseg(sbi, type);
+ ret = change_curseg(sbi, type);
else
- new_curseg(sbi, type, true);
+ ret = new_curseg(sbi, type, true);
stat_inc_seg_type(sbi, curseg);
@@ -3086,45 +3114,84 @@ unlock:
mutex_unlock(&curseg->curseg_mutex);
f2fs_up_read(&SM_I(sbi)->curseg_lock);
+ return ret;
}
-static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
+static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
bool new_sec, bool force)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int old_segno;
+ int err = 0;
+
+ if (type == CURSEG_COLD_DATA_PINNED && !curseg->inited)
+ goto allocate;
if (!force && curseg->inited &&
!curseg->next_blkoff &&
!get_valid_blocks(sbi, curseg->segno, new_sec) &&
!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
- return;
+ return 0;
+allocate:
old_segno = curseg->segno;
- new_curseg(sbi, type, true);
+ err = new_curseg(sbi, type, true);
+ if (err)
+ return err;
stat_inc_seg_type(sbi, curseg);
locate_dirty_segment(sbi, old_segno);
+ return 0;
}
-void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
+int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
{
+ int ret;
+
f2fs_down_read(&SM_I(sbi)->curseg_lock);
down_write(&SIT_I(sbi)->sentry_lock);
- __allocate_new_segment(sbi, type, true, force);
+ ret = __allocate_new_segment(sbi, type, true, force);
up_write(&SIT_I(sbi)->sentry_lock);
f2fs_up_read(&SM_I(sbi)->curseg_lock);
+
+ return ret;
}
-void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
+int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
+{
+ int err;
+ bool gc_required = true;
+
+retry:
+ f2fs_lock_op(sbi);
+ err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
+ f2fs_unlock_op(sbi);
+
+ if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) {
+ f2fs_down_write(&sbi->gc_lock);
+ err = f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
+ f2fs_up_write(&sbi->gc_lock);
+
+ gc_required = false;
+ if (!err)
+ goto retry;
+ }
+
+ return err;
+}
+
+int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
{
int i;
+ int err = 0;
f2fs_down_read(&SM_I(sbi)->curseg_lock);
down_write(&SIT_I(sbi)->sentry_lock);
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
- __allocate_new_segment(sbi, i, false, false);
+ err += __allocate_new_segment(sbi, i, false, false);
up_write(&SIT_I(sbi)->sentry_lock);
f2fs_up_read(&SM_I(sbi)->curseg_lock);
+
+ return err;
}
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
@@ -3242,8 +3309,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
GET_SEGNO(sbi, end);
if (need_align) {
- start_segno = rounddown(start_segno, sbi->segs_per_sec);
- end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
+ start_segno = rounddown(start_segno, SEGS_PER_SEC(sbi));
+ end_segno = roundup(end_segno + 1, SEGS_PER_SEC(sbi)) - 1;
}
cpc.reason = CP_DISCARD;
@@ -3416,7 +3483,14 @@ static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
get_random_u32_inclusive(1, sbi->max_fragment_hole);
}
-void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+static void reset_curseg_fields(struct curseg_info *curseg)
+{
+ curseg->inited = false;
+ curseg->segno = NULL_SEGNO;
+ curseg->next_segno = 0;
+}
+
+int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio)
@@ -3427,12 +3501,18 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
struct seg_entry *se = NULL;
bool segment_full = false;
+ int ret = 0;
f2fs_down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
down_write(&sit_i->sentry_lock);
+ if (curseg->segno == NULL_SEGNO) {
+ ret = -ENOSPC;
+ goto out_err;
+ }
+
if (from_gc) {
f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
@@ -3441,7 +3521,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
}
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
- f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
+ f2fs_bug_on(sbi, curseg->next_blkoff >= BLKS_PER_SEG(sbi));
f2fs_wait_discard_bio(sbi, *new_blkaddr);
@@ -3470,25 +3550,35 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
* since SSR needs latest valid block information.
*/
update_sit_entry(sbi, *new_blkaddr, 1);
- if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
- update_sit_entry(sbi, old_blkaddr, -1);
+ update_sit_entry(sbi, old_blkaddr, -1);
/*
* If the current segment is full, flush it out and replace it with a
* new segment.
*/
if (segment_full) {
+ if (type == CURSEG_COLD_DATA_PINNED &&
+ !((curseg->segno + 1) % sbi->segs_per_sec)) {
+ reset_curseg_fields(curseg);
+ goto skip_new_segment;
+ }
+
if (from_gc) {
- get_atssr_segment(sbi, type, se->type,
+ ret = get_atssr_segment(sbi, type, se->type,
AT_SSR, se->mtime);
} else {
if (need_new_seg(sbi, type))
- new_curseg(sbi, type, false);
+ ret = new_curseg(sbi, type, false);
else
- change_curseg(sbi, type);
+ ret = change_curseg(sbi, type);
stat_inc_seg_type(sbi, curseg);
}
+
+ if (ret)
+ goto out_err;
}
+
+skip_new_segment:
/*
* segment dirty status should be updated after segment allocation,
* so we just need to update status only one time after previous
@@ -3497,12 +3587,12 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
- if (IS_DATASEG(type))
+ if (IS_DATASEG(curseg->seg_type))
atomic64_inc(&sbi->allocated_data_blocks);
up_write(&sit_i->sentry_lock);
- if (page && IS_NODESEG(type)) {
+ if (page && IS_NODESEG(curseg->seg_type)) {
fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
f2fs_inode_chksum_set(sbi, page);
@@ -3511,9 +3601,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
if (fio) {
struct f2fs_bio_info *io;
- if (F2FS_IO_ALIGNED(sbi))
- fio->retry = 0;
-
INIT_LIST_HEAD(&fio->list);
fio->in_list = 1;
io = sbi->write_io[fio->type] + fio->temp;
@@ -3523,8 +3610,15 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
}
mutex_unlock(&curseg->curseg_mutex);
-
f2fs_up_read(&SM_I(sbi)->curseg_lock);
+ return 0;
+out_err:
+ *new_blkaddr = NULL_ADDR;
+ up_write(&sit_i->sentry_lock);
+ mutex_unlock(&curseg->curseg_mutex);
+ f2fs_up_read(&SM_I(sbi)->curseg_lock);
+ return ret;
+
}
void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
@@ -3561,21 +3655,25 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
if (keep_order)
f2fs_down_read(&fio->sbi->io_order_lock);
-reallocate:
- f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
- &fio->new_blkaddr, sum, type, fio);
+
+ if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
+ &fio->new_blkaddr, sum, type, fio)) {
+ if (fscrypt_inode_uses_fs_layer_crypto(fio->page->mapping->host))
+ fscrypt_finalize_bounce_page(&fio->encrypted_page);
+ if (PageWriteback(fio->page))
+ end_page_writeback(fio->page);
+ if (f2fs_in_warm_node_list(fio->sbi, fio->page))
+ f2fs_del_fsync_node_entry(fio->sbi, fio->page);
+ goto out;
+ }
if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr);
/* writeout dirty page into bdev */
f2fs_submit_page_write(fio);
- if (fio->retry) {
- fio->old_blkaddr = fio->new_blkaddr;
- goto reallocate;
- }
f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
-
+out:
if (keep_order)
f2fs_up_read(&fio->sbi->io_order_lock);
}
@@ -3659,8 +3757,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
}
if (fio->post_read)
- invalidate_mapping_pages(META_MAPPING(sbi),
- fio->new_blkaddr, fio->new_blkaddr);
+ f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
stat_inc_inplace_blocks(fio->sbi);
@@ -3749,7 +3846,8 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
/* change the current segment */
if (segno != curseg->segno) {
curseg->next_segno = segno;
- change_curseg(sbi, type);
+ if (change_curseg(sbi, type))
+ goto out_unlock;
}
curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
@@ -3775,12 +3873,14 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (recover_curseg) {
if (old_cursegno != curseg->segno) {
curseg->next_segno = old_cursegno;
- change_curseg(sbi, type);
+ if (change_curseg(sbi, type))
+ goto out_unlock;
}
curseg->next_blkoff = old_blkoff;
curseg->alloc_type = old_alloc_type;
}
+out_unlock:
up_write(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
f2fs_up_write(&SM_I(sbi)->curseg_lock);
@@ -3850,7 +3950,7 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
for (i = 0; i < len; i++)
f2fs_wait_on_block_writeback(inode, blkaddr + i);
- invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
+ f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
}
static int read_compacted_summaries(struct f2fs_sb_info *sbi)
@@ -3892,7 +3992,7 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
seg_i->next_blkoff = blk_off;
if (seg_i->alloc_type == SSR)
- blk_off = sbi->blocks_per_seg;
+ blk_off = BLKS_PER_SEG(sbi);
for (j = 0; j < blk_off; j++) {
struct f2fs_summary *s;
@@ -3960,7 +4060,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
struct f2fs_summary *ns = &sum->entries[0];
int i;
- for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
+ for (i = 0; i < BLKS_PER_SEG(sbi); i++, ns++) {
ns->version = 0;
ns->ofs_in_node = 0;
}
@@ -4466,7 +4566,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
#endif
sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
- sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
+ sit_i->sit_blocks = SEGS_TO_BLKS(sbi, sit_segs);
sit_i->written_valid_blocks = 0;
sit_i->bitmap_size = sit_bitmap_size;
sit_i->dirty_sentries = 0;
@@ -4539,9 +4639,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
array[i].seg_type = CURSEG_COLD_DATA;
else if (i == CURSEG_ALL_DATA_ATGC)
array[i].seg_type = CURSEG_COLD_DATA;
- array[i].segno = NULL_SEGNO;
- array[i].next_blkoff = 0;
- array[i].inited = false;
+ reset_curseg_fields(&array[i]);
}
return restore_curseg_summaries(sbi);
}
@@ -4593,21 +4691,20 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
- if (f2fs_block_unit_discard(sbi)) {
- /* build discard map only one time */
- if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
- memset(se->discard_map, 0xff,
+ if (!f2fs_block_unit_discard(sbi))
+ goto init_discard_map_done;
+
+ /* build discard map only one time */
+ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+ memset(se->discard_map, 0xff,
SIT_VBLOCK_MAP_SIZE);
- } else {
- memcpy(se->discard_map,
- se->cur_valid_map,
+ goto init_discard_map_done;
+ }
+ memcpy(se->discard_map, se->cur_valid_map,
SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks +=
- sbi->blocks_per_seg -
+ sbi->discard_blks += BLKS_PER_SEG(sbi) -
se->valid_blocks;
- }
- }
-
+init_discard_map_done:
if (__is_large_section(sbi))
get_sec_entry(sbi, start)->valid_blocks +=
se->valid_blocks;
@@ -4747,7 +4844,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
return;
mutex_lock(&dirty_i->seglist_lock);
- for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
+ for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
valid_blocks = get_valid_blocks(sbi, segno, true);
secno = GET_SEC_FROM_SEG(sbi, segno);
@@ -4846,7 +4943,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
if (curseg->alloc_type == SSR)
continue;
- for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
+ for (blkofs += 1; blkofs < BLKS_PER_SEG(sbi); blkofs++) {
if (!f2fs_test_bit(blkofs, se->cur_valid_map))
continue;
out:
@@ -4862,6 +4959,16 @@ out:
}
#ifdef CONFIG_BLK_DEV_ZONED
+static const char *f2fs_zone_status[BLK_ZONE_COND_OFFLINE + 1] = {
+ [BLK_ZONE_COND_NOT_WP] = "NOT_WP",
+ [BLK_ZONE_COND_EMPTY] = "EMPTY",
+ [BLK_ZONE_COND_IMP_OPEN] = "IMPLICIT_OPEN",
+ [BLK_ZONE_COND_EXP_OPEN] = "EXPLICIT_OPEN",
+ [BLK_ZONE_COND_CLOSED] = "CLOSED",
+ [BLK_ZONE_COND_READONLY] = "READONLY",
+ [BLK_ZONE_COND_FULL] = "FULL",
+ [BLK_ZONE_COND_OFFLINE] = "OFFLINE",
+};
static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
struct f2fs_dev_info *fdev,
@@ -4883,14 +4990,19 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
* Skip check of zones cursegs point to, since
* fix_curseg_write_pointer() checks them.
*/
- if (zone_segno >= MAIN_SEGS(sbi) ||
- IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno)))
+ if (zone_segno >= MAIN_SEGS(sbi))
return 0;
/*
* Get # of valid block of the zone.
*/
valid_block_cnt = get_valid_blocks(sbi, zone_segno, true);
+ if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) {
+ f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]",
+ zone_segno, valid_block_cnt,
+ f2fs_zone_status[zone->cond]);
+ return 0;
+ }
if ((!valid_block_cnt && zone->cond == BLK_ZONE_COND_EMPTY) ||
(valid_block_cnt && zone->cond == BLK_ZONE_COND_FULL))
@@ -4898,8 +5010,8 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
if (!valid_block_cnt) {
f2fs_notice(sbi, "Zone without valid block has non-zero write "
- "pointer. Reset the write pointer: cond[0x%x]",
- zone->cond);
+ "pointer. Reset the write pointer: cond[%s]",
+ f2fs_zone_status[zone->cond]);
ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
zone->len >> log_sectors_per_block);
if (ret)
@@ -4916,8 +5028,8 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
* selected for write operation until it get discarded.
*/
f2fs_notice(sbi, "Valid blocks are not aligned with write "
- "pointer: valid block[0x%x,0x%x] cond[0x%x]",
- zone_segno, valid_block_cnt, zone->cond);
+ "pointer: valid block[0x%x,0x%x] cond[%s]",
+ zone_segno, valid_block_cnt, f2fs_zone_status[zone->cond]);
nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
@@ -5128,7 +5240,7 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg(
unsigned int secno;
if (!sbi->unusable_blocks_per_sec)
- return sbi->blocks_per_seg;
+ return BLKS_PER_SEG(sbi);
secno = GET_SEC_FROM_SEG(sbi, segno);
seg_start = START_BLOCK(sbi, segno);
@@ -5143,10 +5255,10 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg(
*/
if (seg_start >= sec_cap_blkaddr)
return 0;
- if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
+ if (seg_start + BLKS_PER_SEG(sbi) > sec_cap_blkaddr)
return sec_cap_blkaddr - seg_start;
- return sbi->blocks_per_seg;
+ return BLKS_PER_SEG(sbi);
}
#else
int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
@@ -5172,7 +5284,7 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
if (f2fs_sb_has_blkzoned(sbi))
return f2fs_usable_zone_blks_in_seg(sbi, segno);
- return sbi->blocks_per_seg;
+ return BLKS_PER_SEG(sbi);
}
unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
@@ -5181,7 +5293,7 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
if (f2fs_sb_has_blkzoned(sbi))
return CAP_SEGS_PER_SEC(sbi);
- return sbi->segs_per_sec;
+ return SEGS_PER_SEC(sbi);
}
/*
@@ -5196,14 +5308,14 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
sit_i->min_mtime = ULLONG_MAX;
- for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
+ for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
unsigned int i;
unsigned long long mtime = 0;
- for (i = 0; i < sbi->segs_per_sec; i++)
+ for (i = 0; i < SEGS_PER_SEC(sbi); i++)
mtime += get_seg_entry(sbi, segno + i)->mtime;
- mtime = div_u64(mtime, sbi->segs_per_sec);
+ mtime = div_u64(mtime, SEGS_PER_SEC(sbi));
if (sit_i->min_mtime > mtime)
sit_i->min_mtime = mtime;
@@ -5242,7 +5354,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC);
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
- sm_info->min_seq_blocks = sbi->blocks_per_seg;
+ sm_info->min_seq_blocks = BLKS_PER_SEG(sbi);
sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
sm_info->min_ssr_sections = reserved_sections(sbi);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 8129be788bd5..e1c0f418aa11 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -48,21 +48,21 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
#define IS_CURSEC(sbi, secno) \
(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
- (sbi)->segs_per_sec) || \
+ SEGS_PER_SEC(sbi)) || \
((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \
- (sbi)->segs_per_sec) || \
+ SEGS_PER_SEC(sbi)) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \
- (sbi)->segs_per_sec) || \
+ SEGS_PER_SEC(sbi)) || \
((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \
- (sbi)->segs_per_sec) || \
+ SEGS_PER_SEC(sbi)) || \
((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
- (sbi)->segs_per_sec) || \
+ SEGS_PER_SEC(sbi)) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
- (sbi)->segs_per_sec) || \
+ SEGS_PER_SEC(sbi)) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \
- (sbi)->segs_per_sec) || \
+ SEGS_PER_SEC(sbi)) || \
((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \
- (sbi)->segs_per_sec))
+ SEGS_PER_SEC(sbi)))
#define MAIN_BLKADDR(sbi) \
(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
@@ -77,40 +77,37 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
#define TOTAL_SEGS(sbi) \
(SM_I(sbi) ? SM_I(sbi)->segment_count : \
le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count))
-#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg)
+#define TOTAL_BLKS(sbi) (SEGS_TO_BLKS(sbi, TOTAL_SEGS(sbi)))
#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
#define SEGMENT_SIZE(sbi) (1ULL << ((sbi)->log_blocksize + \
(sbi)->log_blocks_per_seg))
#define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \
- (GET_R2L_SEGNO(FREE_I(sbi), segno) << (sbi)->log_blocks_per_seg))
+ (SEGS_TO_BLKS(sbi, GET_R2L_SEGNO(FREE_I(sbi), segno))))
#define NEXT_FREE_BLKADDR(sbi, curseg) \
(START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff)
#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi))
#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
- (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg)
+ (BLKS_TO_SEGS(sbi, GET_SEGOFF_FROM_SEG0(sbi, blk_addr)))
#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
- (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
+ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (BLKS_PER_SEG(sbi) - 1))
#define GET_SEGNO(sbi, blk_addr) \
((!__is_valid_data_blkaddr(blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
-#define BLKS_PER_SEC(sbi) \
- ((sbi)->segs_per_sec * (sbi)->blocks_per_seg)
#define CAP_BLKS_PER_SEC(sbi) \
- ((sbi)->segs_per_sec * (sbi)->blocks_per_seg - \
- (sbi)->unusable_blocks_per_sec)
+ (BLKS_PER_SEC(sbi) - (sbi)->unusable_blocks_per_sec)
#define CAP_SEGS_PER_SEC(sbi) \
- ((sbi)->segs_per_sec - ((sbi)->unusable_blocks_per_sec >>\
- (sbi)->log_blocks_per_seg))
+ (SEGS_PER_SEC(sbi) - \
+ BLKS_TO_SEGS(sbi, (sbi)->unusable_blocks_per_sec))
#define GET_SEC_FROM_SEG(sbi, segno) \
- (((segno) == -1) ? -1 : (segno) / (sbi)->segs_per_sec)
+ (((segno) == -1) ? -1 : (segno) / SEGS_PER_SEC(sbi))
#define GET_SEG_FROM_SEC(sbi, secno) \
- ((secno) * (sbi)->segs_per_sec)
+ ((secno) * SEGS_PER_SEC(sbi))
#define GET_ZONE_FROM_SEC(sbi, secno) \
(((secno) == -1) ? -1 : (secno) / (sbi)->secs_per_zone)
#define GET_ZONE_FROM_SEG(sbi, segno) \
@@ -139,16 +136,6 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK)
/*
- * indicate a block allocation direction: RIGHT and LEFT.
- * RIGHT means allocating new sections towards the end of volume.
- * LEFT means the opposite direction.
- */
-enum {
- ALLOC_RIGHT = 0,
- ALLOC_LEFT
-};
-
-/*
* In the victim_sel_policy->alloc_mode, there are three block allocation modes.
* LFS writes data sequentially with cleaning operations.
* SSR (Slack Space Recycle) reuses obsolete space without cleaning operations.
@@ -364,7 +351,7 @@ static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
unsigned int blocks = 0;
int i;
- for (i = 0; i < sbi->segs_per_sec; i++, start_segno++) {
+ for (i = 0; i < SEGS_PER_SEC(sbi); i++, start_segno++) {
struct seg_entry *se = get_seg_entry(sbi, start_segno);
blocks += se->ckpt_valid_blocks;
@@ -449,7 +436,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
free_i->free_segments++;
next = find_next_bit(free_i->free_segmap,
- start_segno + sbi->segs_per_sec, start_segno);
+ start_segno + SEGS_PER_SEC(sbi), start_segno);
if (next >= start_segno + usable_segs) {
clear_bit(secno, free_i->free_secmap);
free_i->free_sections++;
@@ -485,7 +472,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
if (!inmem && IS_CURSEC(sbi, secno))
goto skip_free;
next = find_next_bit(free_i->free_segmap,
- start_segno + sbi->segs_per_sec, start_segno);
+ start_segno + SEGS_PER_SEC(sbi), start_segno);
if (next >= start_segno + usable_segs) {
if (test_and_clear_bit(secno, free_i->free_secmap))
free_i->free_sections++;
@@ -573,23 +560,22 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
unsigned int node_blocks, unsigned int dent_blocks)
{
- unsigned int segno, left_blocks;
+ unsigned segno, left_blocks;
int i;
- /* check current node segment */
+ /* check current node sections in the worst case. */
for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) {
segno = CURSEG_I(sbi, i)->segno;
- left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
- get_seg_entry(sbi, segno)->ckpt_valid_blocks;
-
+ left_blocks = CAP_BLKS_PER_SEC(sbi) -
+ get_ckpt_valid_blocks(sbi, segno, true);
if (node_blocks > left_blocks)
return false;
}
- /* check current data segment */
+ /* check current data section for dentry blocks. */
segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno;
- left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
- get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+ left_blocks = CAP_BLKS_PER_SEC(sbi) -
+ get_ckpt_valid_blocks(sbi, segno, true);
if (dent_blocks > left_blocks)
return false;
return true;
@@ -638,7 +624,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
if (free_secs > upper_secs)
return false;
- else if (free_secs <= lower_secs)
+ if (free_secs <= lower_secs)
return true;
return !curseg_space;
}
@@ -793,10 +779,10 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
return -EFSCORRUPTED;
}
- if (usable_blks_per_seg < sbi->blocks_per_seg)
+ if (usable_blks_per_seg < BLKS_PER_SEG(sbi))
f2fs_bug_on(sbi, find_next_bit_le(&raw_sit->valid_map,
- sbi->blocks_per_seg,
- usable_blks_per_seg) != sbi->blocks_per_seg);
+ BLKS_PER_SEG(sbi),
+ usable_blks_per_seg) != BLKS_PER_SEG(sbi));
/* check segment usage, and check boundary of a given segment number */
if (unlikely(GET_SIT_VBLOCKS(raw_sit) > usable_blks_per_seg
@@ -915,9 +901,9 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
return 0;
if (type == DATA)
- return sbi->blocks_per_seg;
+ return BLKS_PER_SEG(sbi);
else if (type == NODE)
- return 8 * sbi->blocks_per_seg;
+ return SEGS_TO_BLKS(sbi, 8);
else if (type == META)
return 8 * BIO_MAX_VECS;
else
@@ -969,3 +955,13 @@ wake_up:
dcc->discard_wake = true;
wake_up_interruptible_all(&dcc->discard_wait_queue);
}
+
+static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
+{
+ int devi;
+
+ for (devi = 0; devi < sbi->s_ndevs; devi++)
+ if (bdev_is_zoned(FDEV(devi).bdev))
+ return GET_SEGNO(sbi, FDEV(devi).start_blk);
+ return 0;
+}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f6ffbfe75653..a6867f26f141 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -44,24 +44,26 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION
const char *f2fs_fault_name[FAULT_MAX] = {
- [FAULT_KMALLOC] = "kmalloc",
- [FAULT_KVMALLOC] = "kvmalloc",
- [FAULT_PAGE_ALLOC] = "page alloc",
- [FAULT_PAGE_GET] = "page get",
- [FAULT_ALLOC_NID] = "alloc nid",
- [FAULT_ORPHAN] = "orphan",
- [FAULT_BLOCK] = "no more block",
- [FAULT_DIR_DEPTH] = "too big dir depth",
- [FAULT_EVICT_INODE] = "evict_inode fail",
- [FAULT_TRUNCATE] = "truncate fail",
- [FAULT_READ_IO] = "read IO error",
- [FAULT_CHECKPOINT] = "checkpoint error",
- [FAULT_DISCARD] = "discard error",
- [FAULT_WRITE_IO] = "write IO error",
- [FAULT_SLAB_ALLOC] = "slab alloc",
- [FAULT_DQUOT_INIT] = "dquot initialize",
- [FAULT_LOCK_OP] = "lock_op",
- [FAULT_BLKADDR] = "invalid blkaddr",
+ [FAULT_KMALLOC] = "kmalloc",
+ [FAULT_KVMALLOC] = "kvmalloc",
+ [FAULT_PAGE_ALLOC] = "page alloc",
+ [FAULT_PAGE_GET] = "page get",
+ [FAULT_ALLOC_NID] = "alloc nid",
+ [FAULT_ORPHAN] = "orphan",
+ [FAULT_BLOCK] = "no more block",
+ [FAULT_DIR_DEPTH] = "too big dir depth",
+ [FAULT_EVICT_INODE] = "evict_inode fail",
+ [FAULT_TRUNCATE] = "truncate fail",
+ [FAULT_READ_IO] = "read IO error",
+ [FAULT_CHECKPOINT] = "checkpoint error",
+ [FAULT_DISCARD] = "discard error",
+ [FAULT_WRITE_IO] = "write IO error",
+ [FAULT_SLAB_ALLOC] = "slab alloc",
+ [FAULT_DQUOT_INIT] = "dquot initialize",
+ [FAULT_LOCK_OP] = "lock_op",
+ [FAULT_BLKADDR_VALIDITY] = "invalid blkaddr",
+ [FAULT_BLKADDR_CONSISTENCE] = "inconsistent blkaddr",
+ [FAULT_NO_SEGMENT] = "no free segment",
};
void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -137,7 +139,6 @@ enum {
Opt_resgid,
Opt_resuid,
Opt_mode,
- Opt_io_size_bits,
Opt_fault_injection,
Opt_fault_type,
Opt_lazytime,
@@ -216,7 +217,6 @@ static match_table_t f2fs_tokens = {
{Opt_resgid, "resgid=%u"},
{Opt_resuid, "resuid=%u"},
{Opt_mode, "mode=%s"},
- {Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
{Opt_fault_type, "fault_type=%u"},
{Opt_lazytime, "lazytime"},
@@ -263,7 +263,8 @@ static match_table_t f2fs_tokens = {
{Opt_err, NULL},
};
-void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
+void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate,
+ const char *fmt, ...)
{
struct va_format vaf;
va_list args;
@@ -274,8 +275,12 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
level = printk_get_level(fmt);
vaf.fmt = printk_skip_level(fmt);
vaf.va = &args;
- printk("%c%cF2FS-fs (%s): %pV\n",
- KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+ if (limit_rate)
+ printk_ratelimited("%c%cF2FS-fs (%s): %pV\n",
+ KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+ else
+ printk("%c%cF2FS-fs (%s): %pV\n",
+ KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
va_end(args);
}
@@ -343,46 +348,6 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).s_resgid));
}
-static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi)
-{
- unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec;
- unsigned int avg_vblocks;
- unsigned int wanted_reserved_segments;
- block_t avail_user_block_count;
-
- if (!F2FS_IO_ALIGNED(sbi))
- return 0;
-
- /* average valid block count in section in worst case */
- avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi);
-
- /*
- * we need enough free space when migrating one section in worst case
- */
- wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) *
- reserved_segments(sbi);
- wanted_reserved_segments -= reserved_segments(sbi);
-
- avail_user_block_count = sbi->user_block_count -
- sbi->current_reserved_blocks -
- F2FS_OPTION(sbi).root_reserved_blocks;
-
- if (wanted_reserved_segments * sbi->blocks_per_seg >
- avail_user_block_count) {
- f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u",
- wanted_reserved_segments,
- avail_user_block_count >> sbi->log_blocks_per_seg);
- return -ENOSPC;
- }
-
- SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments;
-
- f2fs_info(sbi, "IO align feature needs additional reserved segment: %u",
- wanted_reserved_segments);
-
- return 0;
-}
-
static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi)
{
if (!F2FS_OPTION(sbi).unusable_cap_perc)
@@ -663,7 +628,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str)
#ifdef CONFIG_F2FS_FS_ZSTD
static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
{
- unsigned int level;
+ int level;
int len = 4;
if (strlen(str) == len) {
@@ -677,9 +642,15 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>");
return -EINVAL;
}
- if (kstrtouint(str + 1, 10, &level))
+ if (kstrtoint(str + 1, 10, &level))
return -EINVAL;
+ /* f2fs does not support negative compress level now */
+ if (level < 0) {
+ f2fs_info(sbi, "do not support negative compress level: %d", level);
+ return -ERANGE;
+ }
+
if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) {
f2fs_info(sbi, "invalid zstd compress level: %d", level);
return -EINVAL;
@@ -763,10 +734,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
clear_opt(sbi, DISCARD);
break;
case Opt_noheap:
- set_opt(sbi, NOHEAP);
- break;
case Opt_heap:
- clear_opt(sbi, NOHEAP);
+ f2fs_warn(sbi, "heap/no_heap options were deprecated");
break;
#ifdef CONFIG_F2FS_FS_XATTR
case Opt_user_xattr:
@@ -913,16 +882,6 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
}
kfree(name);
break;
- case Opt_io_size_bits:
- if (args->from && match_int(args, &arg))
- return -EINVAL;
- if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) {
- f2fs_warn(sbi, "Not support %ld, larger than %d",
- BIT(arg), BIO_MAX_VECS);
- return -EINVAL;
- }
- F2FS_OPTION(sbi).write_io_size_bits = arg;
- break;
#ifdef CONFIG_F2FS_FAULT_INJECTION
case Opt_fault_injection:
if (args->from && match_int(args, &arg))
@@ -1392,12 +1351,6 @@ default_check:
}
#endif
- if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) {
- f2fs_err(sbi, "Should set mode=lfs with %luKB-sized IO",
- F2FS_IO_SIZE_KB(sbi));
- return -EINVAL;
- }
-
if (test_opt(sbi, INLINE_XATTR_SIZE)) {
int min_size, max_size;
@@ -1718,7 +1671,6 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_destroy_page_array_cache(sbi);
f2fs_destroy_xattr_caches(sbi);
- mempool_destroy(sbi->write_io_dummy);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
kfree(F2FS_OPTION(sbi).s_qf_names[i]);
@@ -2009,10 +1961,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
} else {
seq_puts(seq, ",nodiscard");
}
- if (test_opt(sbi, NOHEAP))
- seq_puts(seq, ",no_heap");
- else
- seq_puts(seq, ",heap");
#ifdef CONFIG_F2FS_FS_XATTR
if (test_opt(sbi, XATTR_USER))
seq_puts(seq, ",user_xattr");
@@ -2078,9 +2026,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
F2FS_OPTION(sbi).s_resuid),
from_kgid_munged(&init_user_ns,
F2FS_OPTION(sbi).s_resgid));
- if (F2FS_IO_SIZE_BITS(sbi))
- seq_printf(seq, ",io_bits=%u",
- F2FS_OPTION(sbi).write_io_size_bits);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (test_opt(sbi, FAULT_INJECTION)) {
seq_printf(seq, ",fault_injection=%u",
@@ -2192,7 +2137,6 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount)
set_opt(sbi, INLINE_XATTR);
set_opt(sbi, INLINE_DATA);
set_opt(sbi, INLINE_DENTRY);
- set_opt(sbi, NOHEAP);
set_opt(sbi, MERGE_CHECKPOINT);
F2FS_OPTION(sbi).unusable_cap = 0;
sbi->sb->s_flags |= SB_LAZYTIME;
@@ -2247,6 +2191,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
.init_gc_type = FG_GC,
.should_migrate_blocks = false,
.err_gc_skipped = true,
+ .no_bg_gc = true,
.nr_free_secs = 1 };
f2fs_down_write(&sbi->gc_lock);
@@ -2332,7 +2277,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE);
bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE);
bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
- bool no_io_align = !F2FS_IO_ALIGNED(sbi);
bool no_atgc = !test_opt(sbi, ATGC);
bool no_discard = !test_opt(sbi, DISCARD);
bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE);
@@ -2440,12 +2384,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) {
- err = -EINVAL;
- f2fs_warn(sbi, "switch io_bits option is not allowed");
- goto restore_opts;
- }
-
if (no_compress_cache == !!test_opt(sbi, COMPRESS_CACHE)) {
err = -EINVAL;
f2fs_warn(sbi, "switch compress_cache option is not allowed");
@@ -3706,7 +3644,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
}
main_segs = le32_to_cpu(raw_super->segment_count_main);
- blocks_per_seg = sbi->blocks_per_seg;
+ blocks_per_seg = BLKS_PER_SEG(sbi);
for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
@@ -3818,9 +3756,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
sbi->total_sections = le32_to_cpu(raw_super->section_count);
- sbi->total_node_count =
- (le32_to_cpu(raw_super->segment_count_nat) / 2)
- * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
+ sbi->total_node_count = SEGS_TO_BLKS(sbi,
+ ((le32_to_cpu(raw_super->segment_count_nat) / 2) *
+ NAT_ENTRY_PER_BLOCK));
F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino);
F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino);
F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino);
@@ -3829,7 +3767,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
- sbi->migration_granularity = sbi->segs_per_sec;
+ sbi->migration_granularity = SEGS_PER_SEC(sbi);
sbi->seq_file_ra_mul = MIN_RA_MUL;
sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
@@ -3930,11 +3868,6 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
return 0;
zone_sectors = bdev_zone_sectors(bdev);
- if (!is_power_of_2(zone_sectors)) {
- f2fs_err(sbi, "F2FS does not support non power of 2 zone sizes\n");
- return -EINVAL;
- }
-
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
SECTOR_TO_BLOCK(zone_sectors))
return -EINVAL;
@@ -4090,7 +4023,9 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
f2fs_up_write(&sbi->sb_lock);
if (err)
- f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
+ f2fs_err_ratelimited(sbi,
+ "f2fs_commit_super fails to record stop_reason, err:%d",
+ err);
}
void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
@@ -4133,8 +4068,9 @@ static void f2fs_record_errors(struct f2fs_sb_info *sbi, unsigned char error)
err = f2fs_commit_super(sbi, false);
if (err)
- f2fs_err(sbi, "f2fs_commit_super fails to record errors:%u, err:%d",
- error, err);
+ f2fs_err_ratelimited(sbi,
+ "f2fs_commit_super fails to record errors:%u, err:%d",
+ error, err);
out_unlock:
f2fs_up_write(&sbi->sb_lock);
}
@@ -4259,14 +4195,14 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
if (i == 0) {
FDEV(i).start_blk = 0;
FDEV(i).end_blk = FDEV(i).start_blk +
- (FDEV(i).total_segments <<
- sbi->log_blocks_per_seg) - 1 +
- le32_to_cpu(raw_super->segment0_blkaddr);
+ SEGS_TO_BLKS(sbi,
+ FDEV(i).total_segments) - 1 +
+ le32_to_cpu(raw_super->segment0_blkaddr);
} else {
FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
FDEV(i).end_blk = FDEV(i).start_blk +
- (FDEV(i).total_segments <<
- sbi->log_blocks_per_seg) - 1;
+ SEGS_TO_BLKS(sbi,
+ FDEV(i).total_segments) - 1;
FDEV(i).bdev_file = bdev_file_open_by_path(
FDEV(i).path, mode, sbi->sb, NULL);
}
@@ -4305,8 +4241,6 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
FDEV(i).total_segments,
FDEV(i).start_blk, FDEV(i).end_blk);
}
- f2fs_info(sbi,
- "IO Block Size: %8ld KB", F2FS_IO_SIZE_KB(sbi));
return 0;
}
@@ -4519,19 +4453,10 @@ try_onemore:
if (err)
goto free_iostat;
- if (F2FS_IO_ALIGNED(sbi)) {
- sbi->write_io_dummy =
- mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
- if (!sbi->write_io_dummy) {
- err = -ENOMEM;
- goto free_percpu;
- }
- }
-
/* init per sbi slab cache */
err = f2fs_init_xattr_caches(sbi);
if (err)
- goto free_io_dummy;
+ goto free_percpu;
err = f2fs_init_page_array_cache(sbi);
if (err)
goto free_xattr_cache;
@@ -4619,10 +4544,6 @@ try_onemore:
goto free_nm;
}
- err = adjust_reserved_segment(sbi);
- if (err)
- goto free_nm;
-
/* For write statistics */
sbi->sectors_written_start = f2fs_get_sectors_written(sbi);
@@ -4749,13 +4670,20 @@ reset_checkpoint:
* If the f2fs is not readonly and fsync data recovery succeeds,
* check zoned block devices' write pointer consistency.
*/
- if (!err && !f2fs_readonly(sb) && f2fs_sb_has_blkzoned(sbi)) {
- err = f2fs_check_write_pointer(sbi);
- if (err)
- goto free_meta;
+ if (f2fs_sb_has_blkzoned(sbi) && !f2fs_readonly(sb)) {
+ int err2;
+
+ f2fs_notice(sbi, "Checking entire write pointers");
+ err2 = f2fs_check_write_pointer(sbi);
+ if (err2)
+ err = err2;
}
+ if (err)
+ goto free_meta;
- f2fs_init_inmem_curseg(sbi);
+ err = f2fs_init_inmem_curseg(sbi);
+ if (err)
+ goto sync_free_meta;
/* f2fs_recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
@@ -4854,8 +4782,6 @@ free_page_array_cache:
f2fs_destroy_page_array_cache(sbi);
free_xattr_cache:
f2fs_destroy_xattr_caches(sbi);
-free_io_dummy:
- mempool_destroy(sbi->write_io_dummy);
free_percpu:
destroy_percpu_info(sbi);
free_iostat:
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index a7ec55c7bb20..a568ce96cf56 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -493,8 +493,8 @@ out:
spin_lock(&sbi->stat_lock);
if (t > (unsigned long)(sbi->user_block_count -
F2FS_OPTION(sbi).root_reserved_blocks -
- sbi->blocks_per_seg *
- SM_I(sbi)->additional_reserved_segments)) {
+ SEGS_TO_BLKS(sbi,
+ SM_I(sbi)->additional_reserved_segments))) {
spin_unlock(&sbi->stat_lock);
return -EINVAL;
}
@@ -551,7 +551,7 @@ out:
}
if (!strcmp(a->attr.name, "migration_granularity")) {
- if (t == 0 || t > sbi->segs_per_sec)
+ if (t == 0 || t > SEGS_PER_SEC(sbi))
return -EINVAL;
}
@@ -1492,6 +1492,50 @@ static int __maybe_unused discard_plist_seq_show(struct seq_file *seq,
return 0;
}
+static int __maybe_unused disk_map_seq_show(struct seq_file *seq,
+ void *offset)
+{
+ struct super_block *sb = seq->private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int i;
+
+ seq_printf(seq, "Address Layout : %5luB Block address (# of Segments)\n",
+ F2FS_BLKSIZE);
+ seq_printf(seq, " SB : %12s\n", "0/1024B");
+ seq_printf(seq, " seg0_blkaddr : 0x%010x\n", SEG0_BLKADDR(sbi));
+ seq_printf(seq, " Checkpoint : 0x%010x (%10d)\n",
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr), 2);
+ seq_printf(seq, " SIT : 0x%010x (%10d)\n",
+ SIT_I(sbi)->sit_base_addr,
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_sit));
+ seq_printf(seq, " NAT : 0x%010x (%10d)\n",
+ NM_I(sbi)->nat_blkaddr,
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_nat));
+ seq_printf(seq, " SSA : 0x%010x (%10d)\n",
+ SM_I(sbi)->ssa_blkaddr,
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_ssa));
+ seq_printf(seq, " Main : 0x%010x (%10d)\n",
+ SM_I(sbi)->main_blkaddr,
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_main));
+ seq_printf(seq, " # of Sections : %12d\n",
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->section_count));
+ seq_printf(seq, " Segs/Sections : %12d\n",
+ SEGS_PER_SEC(sbi));
+ seq_printf(seq, " Section size : %12d MB\n",
+ SEGS_PER_SEC(sbi) << 1);
+
+ if (!f2fs_is_multi_device(sbi))
+ return 0;
+
+ seq_puts(seq, "\nDisk Map for multi devices:\n");
+ for (i = 0; i < sbi->s_ndevs; i++)
+ seq_printf(seq, "Disk:%2d (zoned=%d): 0x%010x - 0x%010x on %s\n",
+ i, bdev_is_zoned(FDEV(i).bdev),
+ FDEV(i).start_blk, FDEV(i).end_blk,
+ FDEV(i).path);
+ return 0;
+}
+
int __init f2fs_init_sysfs(void)
{
int ret;
@@ -1573,6 +1617,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
victim_bits_seq_show, sb);
proc_create_single_data("discard_plist_info", 0444, sbi->s_proc,
discard_plist_seq_show, sb);
+ proc_create_single_data("disk_map", 0444, sbi->s_proc,
+ disk_map_seq_show, sb);
return 0;
put_feature_list_kobj:
kobject_put(&sbi->s_feature_list_kobj);
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index 4fc95f353a7a..f7bb0c54502c 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -258,21 +258,23 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
pgoff_t index,
unsigned long num_ra_pages)
{
- struct page *page;
+ struct folio *folio;
index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
- page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
- if (!page || !PageUptodate(page)) {
+ folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0);
+ if (IS_ERR(folio) || !folio_test_uptodate(folio)) {
DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
- if (page)
- put_page(page);
+ if (!IS_ERR(folio))
+ folio_put(folio);
else if (num_ra_pages > 1)
page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
- page = read_mapping_page(inode->i_mapping, index, NULL);
+ folio = read_mapping_folio(inode->i_mapping, index, NULL);
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
}
- return page;
+ return folio_file_page(folio, index);
}
static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf,
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index bce1d7ac95ca..458519e416fe 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -529,6 +529,20 @@ void kernfs_get(struct kernfs_node *kn)
}
EXPORT_SYMBOL_GPL(kernfs_get);
+static void kernfs_free_rcu(struct rcu_head *rcu)
+{
+ struct kernfs_node *kn = container_of(rcu, struct kernfs_node, rcu);
+
+ kfree_const(kn->name);
+
+ if (kn->iattr) {
+ simple_xattrs_free(&kn->iattr->xattrs, NULL);
+ kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
+ }
+
+ kmem_cache_free(kernfs_node_cache, kn);
+}
+
/**
* kernfs_put - put a reference count on a kernfs_node
* @kn: the target kernfs_node
@@ -557,16 +571,11 @@ void kernfs_put(struct kernfs_node *kn)
if (kernfs_type(kn) == KERNFS_LINK)
kernfs_put(kn->symlink.target_kn);
- kfree_const(kn->name);
-
- if (kn->iattr) {
- simple_xattrs_free(&kn->iattr->xattrs, NULL);
- kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
- }
spin_lock(&kernfs_idr_lock);
idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
spin_unlock(&kernfs_idr_lock);
- kmem_cache_free(kernfs_node_cache, kn);
+
+ call_rcu(&kn->rcu, kernfs_free_rcu);
kn = parent;
if (kn) {
@@ -575,7 +584,7 @@ void kernfs_put(struct kernfs_node *kn)
} else {
/* just released the root kn, free @root too */
idr_destroy(&root->ino_idr);
- kfree(root);
+ kfree_rcu(root, rcu);
}
}
EXPORT_SYMBOL_GPL(kernfs_put);
@@ -715,7 +724,7 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
ino_t ino = kernfs_id_ino(id);
u32 gen = kernfs_id_gen(id);
- spin_lock(&kernfs_idr_lock);
+ rcu_read_lock();
kn = idr_find(&root->ino_idr, (u32)ino);
if (!kn)
@@ -739,10 +748,10 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
if (unlikely(!__kernfs_active(kn) || !atomic_inc_not_zero(&kn->count)))
goto err_unlock;
- spin_unlock(&kernfs_idr_lock);
+ rcu_read_unlock();
return kn;
err_unlock:
- spin_unlock(&kernfs_idr_lock);
+ rcu_read_unlock();
return NULL;
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index ffa4565c275a..e9df2f87072c 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -483,9 +483,11 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
goto out_put;
rc = 0;
- of->mmapped = true;
- of_on(of)->nr_mmapped++;
- of->vm_ops = vma->vm_ops;
+ if (!of->mmapped) {
+ of->mmapped = true;
+ of_on(of)->nr_mmapped++;
+ of->vm_ops = vma->vm_ops;
+ }
vma->vm_ops = &kernfs_vm_ops;
out_put:
kernfs_put_active(of->kn);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 237f2764b941..b42ee6547cdc 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -49,6 +49,8 @@ struct kernfs_root {
struct rw_semaphore kernfs_rwsem;
struct rw_semaphore kernfs_iattr_rwsem;
struct rw_semaphore kernfs_supers_rwsem;
+
+ struct rcu_head rcu;
};
/* +1 to avoid triggering overflow warning when negating it */
diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index ad572f7ee897..43a651ed8264 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -83,8 +83,10 @@ static int fscache_begin_operation(struct netfs_cache_resources *cres,
cres->debug_id = cookie->debug_id;
cres->inval_counter = cookie->inval_counter;
- if (!fscache_begin_cookie_access(cookie, why))
+ if (!fscache_begin_cookie_access(cookie, why)) {
+ cres->cache_priv = NULL;
return -ENOBUFS;
+ }
again:
spin_lock(&cookie->lock);
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index e545e92c4408..1cd2076210b1 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -104,7 +104,7 @@ TRACE_EVENT(nfsd_compound,
TP_fast_assign(
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->opcnt = opcnt;
- __assign_str_len(tag, tag, taglen);
+ __assign_str(tag, tag);
),
TP_printk("xid=0x%08x opcnt=%u tag=%s",
__entry->xid, __entry->opcnt, __get_str(tag)
@@ -485,7 +485,7 @@ TRACE_EVENT(nfsd_dirent,
TP_fast_assign(
__entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0;
__entry->ino = ino;
- __assign_str_len(name, name, namlen)
+ __assign_str(name, name);
),
TP_printk("fh_hash=0x%08x ino=%llu name=%s",
__entry->fh_hash, __entry->ino, __get_str(name)
@@ -896,7 +896,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class,
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
__field(unsigned long, flavor)
__array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
- __string_len(name, name, clp->cl_name.len)
+ __string_len(name, clp->cl_name.data, clp->cl_name.len)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
@@ -906,7 +906,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class,
__entry->flavor = clp->cl_cred.cr_flavor;
memcpy(__entry->verifier, (void *)&clp->cl_verifier,
NFS4_VERIFIER_SIZE);
- __assign_str_len(name, clp->cl_name.data, clp->cl_name.len);
+ __assign_str(name, clp->cl_name.data);
),
TP_printk("addr=%pISpc name='%s' verifier=0x%s flavor=%s client=%08x:%08x",
__entry->addr, __get_str(name),
@@ -1976,7 +1976,7 @@ TRACE_EVENT(nfsd_ctl_time,
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->time = time;
- __assign_str_len(name, name, namelen);
+ __assign_str(name, name);
),
TP_printk("file=%s time=%d\n",
__get_str(name), __entry->time
diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c
index 3b6982bf6bcf..e75e173a9186 100644
--- a/fs/orangefs/orangefs-cache.c
+++ b/fs/orangefs/orangefs-cache.c
@@ -22,7 +22,7 @@ int op_cache_initialize(void)
op_cache = kmem_cache_create("orangefs_op_cache",
sizeof(struct orangefs_kernel_op_s),
0,
- ORANGEFS_CACHE_CREATE_FLAGS,
+ 0,
NULL);
if (!op_cache) {
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 926d9c0a428a..e2df7eeadc7a 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -93,16 +93,6 @@ enum orangefs_vfs_op_states {
OP_VFS_STATE_GIVEN_UP = 16,
};
-/*
- * orangefs kernel memory related flags
- */
-
-#if (defined CONFIG_DEBUG_SLAB)
-#define ORANGEFS_CACHE_CREATE_FLAGS SLAB_RED_ZONE
-#else
-#define ORANGEFS_CACHE_CREATE_FLAGS 0
-#endif
-
extern const struct xattr_handler * const orangefs_xattr_handlers[];
extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu);
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index 5254256a224d..34849b4a3243 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -527,7 +527,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
sb->s_fs_info = kzalloc(sizeof(struct orangefs_sb_info_s), GFP_KERNEL);
if (!ORANGEFS_SB(sb)) {
d = ERR_PTR(-ENOMEM);
- goto free_sb_and_op;
+ goto free_op;
}
ret = orangefs_fill_sb(sb,
@@ -644,7 +644,7 @@ int orangefs_inode_cache_initialize(void)
"orangefs_inode_cache",
sizeof(struct orangefs_inode_s),
0,
- ORANGEFS_CACHE_CREATE_FLAGS,
+ 0,
offsetof(struct orangefs_inode_s,
link_target),
sizeof_field(struct orangefs_inode_s,
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 8586e2f5d243..0762575a1e70 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -234,11 +234,11 @@ static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen)
{
loff_t tmp;
- if (WARN_ON_ONCE(pos != pos2))
+ if (pos != pos2)
return -EIO;
- if (WARN_ON_ONCE(pos < 0 || len < 0 || totlen < 0))
+ if (pos < 0 || len < 0 || totlen < 0)
return -EIO;
- if (WARN_ON_ONCE(check_add_overflow(pos, len, &tmp)))
+ if (check_add_overflow(pos, len, &tmp))
return -EIO;
return 0;
}
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 3de5047a7ff9..a0017724d523 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -239,7 +239,8 @@ replay_again:
.tcon = tcon,
.path = path,
.create_options = cifs_create_options(cifs_sb, CREATE_NOT_FILE),
- .desired_access = FILE_READ_DATA | FILE_READ_ATTRIBUTES,
+ .desired_access = FILE_READ_DATA | FILE_READ_ATTRIBUTES |
+ FILE_READ_EA,
.disposition = FILE_OPEN,
.fid = pfid,
.replay = !!(retries),
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 81d9aafd2210..aa6f1ecb7c0e 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -151,10 +151,6 @@ MODULE_PARM_DESC(disable_legacy_dialects, "To improve security it may be "
"vers=1.0 (CIFS/SMB1) and vers=2.0 are weaker"
" and less secure. Default: n/N/0");
-extern mempool_t *cifs_sm_req_poolp;
-extern mempool_t *cifs_req_poolp;
-extern mempool_t *cifs_mid_poolp;
-
struct workqueue_struct *cifsiod_wq;
struct workqueue_struct *decrypt_wq;
struct workqueue_struct *fileinfo_put_wq;
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 8be62ed053a2..7ed9d05f6890 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -355,6 +355,9 @@ struct smb_version_operations {
/* informational QFS call */
void (*qfs_tcon)(const unsigned int, struct cifs_tcon *,
struct cifs_sb_info *);
+ /* query for server interfaces */
+ int (*query_server_interfaces)(const unsigned int, struct cifs_tcon *,
+ bool);
/* check if a path is accessible or not */
int (*is_path_accessible)(const unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, const char *);
@@ -2104,6 +2107,8 @@ extern struct workqueue_struct *cifsoplockd_wq;
extern struct workqueue_struct *deferredclose_wq;
extern __u32 cifs_lock_secret;
+extern mempool_t *cifs_sm_req_poolp;
+extern mempool_t *cifs_req_poolp;
extern mempool_t *cifs_mid_poolp;
/* Operations for different SMB versions */
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 86ae578904a2..9b85b5341822 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -52,9 +52,6 @@
#include "fs_context.h"
#include "cifs_swn.h"
-extern mempool_t *cifs_req_poolp;
-extern bool disable_legacy_dialects;
-
/* FIXME: should these be tunable? */
#define TLINK_ERROR_EXPIRE (1 * HZ)
#define TLINK_IDLE_EXPIRE (600 * HZ)
@@ -123,12 +120,16 @@ static void smb2_query_server_interfaces(struct work_struct *work)
struct cifs_tcon *tcon = container_of(work,
struct cifs_tcon,
query_interfaces.work);
+ struct TCP_Server_Info *server = tcon->ses->server;
/*
* query server network interfaces, in case they change
*/
+ if (!server->ops->query_server_interfaces)
+ return;
+
xid = get_xid();
- rc = SMB3_request_interfaces(xid, tcon, false);
+ rc = server->ops->query_server_interfaces(xid, tcon, false);
free_xid(xid);
if (rc) {
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index ec25d3c3e1ee..16aadce492b2 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -486,7 +486,6 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
cfile->uid = current_fsuid();
cfile->dentry = dget(dentry);
cfile->f_flags = file->f_flags;
- cfile->status_file_deleted = false;
cfile->invalidHandle = false;
cfile->deferred_close_scheduled = false;
cfile->tlink = cifs_get_tlink(tlink);
@@ -1073,6 +1072,19 @@ void smb2_deferred_work_close(struct work_struct *work)
_cifsFileInfo_put(cfile, true, false);
}
+static bool
+smb2_can_defer_close(struct inode *inode, struct cifs_deferred_close *dclose)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsInodeInfo *cinode = CIFS_I(inode);
+
+ return (cifs_sb->ctx->closetimeo && cinode->lease_granted && dclose &&
+ (cinode->oplock == CIFS_CACHE_RHW_FLG ||
+ cinode->oplock == CIFS_CACHE_RH_FLG) &&
+ !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags));
+
+}
+
int cifs_close(struct inode *inode, struct file *file)
{
struct cifsFileInfo *cfile;
@@ -1086,10 +1098,8 @@ int cifs_close(struct inode *inode, struct file *file)
cfile = file->private_data;
file->private_data = NULL;
dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
- if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
- && cinode->lease_granted &&
- !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
- dclose && !(cfile->status_file_deleted)) {
+ if ((cfile->status_file_deleted == false) &&
+ (smb2_can_defer_close(inode, dclose))) {
if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
inode_set_mtime_to_ts(inode,
inode_set_ctime_current(inode));
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 8177ec59afee..d28ab0af6049 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -401,7 +401,6 @@ cifs_get_file_info_unix(struct file *filp)
cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb);
} else if (rc == -EREMOTE) {
cifs_create_junction_fattr(&fattr, inode->i_sb);
- rc = 0;
} else
goto cifs_gfiunix_out;
@@ -820,8 +819,10 @@ cifs_get_file_info(struct file *filp)
void *page = alloc_dentry_path();
const unsigned char *path;
- if (!server->ops->query_file_info)
+ if (!server->ops->query_file_info) {
+ free_dentry_path(page);
return -ENOSYS;
+ }
xid = get_xid();
rc = server->ops->query_file_info(xid, tcon, cfile, &data);
@@ -835,8 +836,8 @@ cifs_get_file_info(struct file *filp)
}
path = build_path_from_dentry(dentry, page);
if (IS_ERR(path)) {
- free_dentry_path(page);
- return PTR_ERR(path);
+ rc = PTR_ERR(path);
+ goto cgfi_exit;
}
cifs_open_info_to_fattr(&fattr, &data, inode->i_sb);
if (fattr.cf_flags & CIFS_FATTR_DELETE_PENDING)
@@ -844,7 +845,6 @@ cifs_get_file_info(struct file *filp)
break;
case -EREMOTE:
cifs_create_junction_fattr(&fattr, inode->i_sb);
- rc = 0;
break;
case -EOPNOTSUPP:
case -EINVAL:
@@ -1009,7 +1009,6 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
struct kvec rsp_iov, *iov = NULL;
int rsp_buftype = CIFS_NO_BUFFER;
u32 tag = data->reparse.tag;
- struct inode *inode = NULL;
int rc = 0;
if (!tag && server->ops->query_reparse_point) {
@@ -1049,12 +1048,8 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
if (tcon->posix_extensions)
smb311_posix_info_to_fattr(fattr, data, sb);
- else {
+ else
cifs_open_info_to_fattr(fattr, data, sb);
- inode = cifs_iget(sb, fattr);
- if (inode && fattr->cf_flags & CIFS_FATTR_DELETE_PENDING)
- cifs_mark_open_handles_for_deleted_file(inode, full_path);
- }
out:
fattr->cf_cifstag = data->reparse.tag;
free_rsp_buf(rsp_buftype, rsp_iov.iov_base);
@@ -1109,9 +1104,9 @@ static int cifs_get_fattr(struct cifs_open_info_data *data,
full_path, fattr);
} else {
cifs_open_info_to_fattr(fattr, data, sb);
- if (fattr->cf_flags & CIFS_FATTR_DELETE_PENDING)
- cifs_mark_open_handles_for_deleted_file(*inode, full_path);
}
+ if (!rc && fattr->cf_flags & CIFS_FATTR_DELETE_PENDING)
+ cifs_mark_open_handles_for_deleted_file(*inode, full_path);
break;
case -EREMOTE:
/* DFS link, no metadata available on this server */
@@ -1340,6 +1335,8 @@ int smb311_posix_get_inode_info(struct inode **inode,
goto out;
rc = update_inode_info(sb, &fattr, inode);
+ if (!rc && fattr.cf_flags & CIFS_FATTR_DELETE_PENDING)
+ cifs_mark_open_handles_for_deleted_file(*inode, full_path);
out:
kfree(fattr.cf_symlink_target);
return rc;
@@ -1501,6 +1498,9 @@ iget_root:
goto out;
}
+ if (!rc && fattr.cf_flags & CIFS_FATTR_DELETE_PENDING)
+ cifs_mark_open_handles_for_deleted_file(inode, path);
+
if (rc && tcon->pipe) {
cifs_dbg(FYI, "ipc connection - fake read inode\n");
spin_lock(&inode->i_lock);
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index 9428a0db7718..c3771fc81328 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -27,9 +27,6 @@
#include "fs_context.h"
#include "cached_dir.h"
-extern mempool_t *cifs_sm_req_poolp;
-extern mempool_t *cifs_req_poolp;
-
/* The xid serves as a useful identifier for each incoming vfs request,
in a similar way to the mid which is useful to track each sent smb,
and CurrentXid can also provide a running counter (although it
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 8f37373fd333..3216f786908f 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -230,7 +230,7 @@ int cifs_try_adding_channels(struct cifs_ses *ses)
spin_lock(&ses->iface_lock);
if (!ses->iface_count) {
spin_unlock(&ses->iface_lock);
- cifs_dbg(VFS, "server %s does not advertise interfaces\n",
+ cifs_dbg(ONCE, "server %s does not advertise interfaces\n",
ses->server->hostname);
break;
}
@@ -396,7 +396,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
spin_lock(&ses->iface_lock);
if (!ses->iface_count) {
spin_unlock(&ses->iface_lock);
- cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
+ cifs_dbg(ONCE, "server %s does not advertise interfaces\n", ses->server->hostname);
return;
}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 6ee22d0dbc00..2ed456948f34 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -5290,6 +5290,7 @@ struct smb_version_operations smb30_operations = {
.tree_connect = SMB2_tcon,
.tree_disconnect = SMB2_tdis,
.qfs_tcon = smb3_qfs_tcon,
+ .query_server_interfaces = SMB3_request_interfaces,
.is_path_accessible = smb2_is_path_accessible,
.can_echo = smb2_can_echo,
.echo = SMB2_echo,
@@ -5405,6 +5406,7 @@ struct smb_version_operations smb311_operations = {
.tree_connect = SMB2_tcon,
.tree_disconnect = SMB2_tdis,
.qfs_tcon = smb3_qfs_tcon,
+ .query_server_interfaces = SMB3_request_interfaces,
.is_path_accessible = smb2_is_path_accessible,
.can_echo = smb2_can_echo,
.echo = SMB2_echo,
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index e5e6b14f8cae..3ea688558e6c 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -409,14 +409,15 @@ skip_sess_setup:
spin_unlock(&ses->ses_lock);
if (!rc &&
- (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
+ (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL) &&
+ server->ops->query_server_interfaces) {
mutex_unlock(&ses->session_mutex);
/*
* query server network interfaces, in case they change
*/
xid = get_xid();
- rc = SMB3_request_interfaces(xid, tcon, false);
+ rc = server->ops->query_server_interfaces(xid, tcon, false);
free_xid(xid);
if (rc == -EOPNOTSUPP && ses->chan_count > 1) {
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 20784f76a604..1b594307c9d5 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -227,7 +227,7 @@ struct smb2_compression_hdr {
__le32 OriginalCompressedSegmentSize;
__le16 CompressionAlgorithm;
__le16 Flags;
- __le16 Offset; /* this is the size of the uncompressed SMB2 header below */
+ __le32 Offset; /* this is the size of the uncompressed SMB2 header below */
/* uncompressed SMB2 header (READ or WRITE) goes here */
/* compressed data goes here */
} __packed;
@@ -280,15 +280,16 @@ struct smb3_blob_data {
#define SE_GROUP_RESOURCE 0x20000000
#define SE_GROUP_LOGON_ID 0xC0000000
-/* struct sid_attr_data is SidData array in BlobData format then le32 Attr */
-
struct sid_array_data {
__le16 SidAttrCount;
/* SidAttrList - array of sid_attr_data structs */
} __packed;
-struct luid_attr_data {
-
+/* struct sid_attr_data is SidData array in BlobData format then le32 Attr */
+struct sid_attr_data {
+ __le16 BlobSize;
+ __u8 BlobData[];
+ /* __le32 Attr */
} __packed;
/*
@@ -502,6 +503,7 @@ struct smb2_encryption_neg_context {
#define SMB3_COMPRESS_LZ77_HUFF cpu_to_le16(0x0003)
/* Pattern scanning algorithm See MS-SMB2 3.1.4.4.1 */
#define SMB3_COMPRESS_PATTERN cpu_to_le16(0x0004) /* Pattern_V1 */
+#define SMB3_COMPRESS_LZ4 cpu_to_le16(0x0005)
/* Compression Flags */
#define SMB2_COMPRESSION_CAPABILITIES_FLAG_NONE cpu_to_le32(0x00000000)
diff --git a/fs/smb/server/glob.h b/fs/smb/server/glob.h
index 5b8f3e0ebdb3..d528b20b37a8 100644
--- a/fs/smb/server/glob.h
+++ b/fs/smb/server/glob.h
@@ -12,8 +12,6 @@
#include "unicode.h"
#include "vfs_cache.h"
-#define KSMBD_VERSION "3.4.2"
-
extern int ksmbd_debug_types;
#define KSMBD_DEBUG_SMB BIT(0)
diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
index 0ebf91ffa236..8ca8a45c4c62 100644
--- a/fs/smb/server/ksmbd_netlink.h
+++ b/fs/smb/server/ksmbd_netlink.h
@@ -75,6 +75,7 @@ struct ksmbd_heartbeat {
#define KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION BIT(1)
#define KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL BIT(2)
#define KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF BIT(3)
+#define KSMBD_GLOBAL_FLAG_DURABLE_HANDLE BIT(4)
/*
* IPC request for ksmbd server startup
diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index 15f68ee05089..aec0a7a12405 100644
--- a/fs/smb/server/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
@@ -156,7 +156,7 @@ void ksmbd_session_destroy(struct ksmbd_session *sess)
kfree(sess);
}
-static struct ksmbd_session *__session_lookup(unsigned long long id)
+struct ksmbd_session *__session_lookup(unsigned long long id)
{
struct ksmbd_session *sess;
@@ -305,6 +305,32 @@ struct preauth_session *ksmbd_preauth_session_alloc(struct ksmbd_conn *conn,
return sess;
}
+void destroy_previous_session(struct ksmbd_conn *conn,
+ struct ksmbd_user *user, u64 id)
+{
+ struct ksmbd_session *prev_sess;
+ struct ksmbd_user *prev_user;
+
+ down_write(&sessions_table_lock);
+ down_write(&conn->session_lock);
+ prev_sess = __session_lookup(id);
+ if (!prev_sess || prev_sess->state == SMB2_SESSION_EXPIRED)
+ goto out;
+
+ prev_user = prev_sess->user;
+ if (!prev_user ||
+ strcmp(user->name, prev_user->name) ||
+ user->passkey_sz != prev_user->passkey_sz ||
+ memcmp(user->passkey, prev_user->passkey, user->passkey_sz))
+ goto out;
+
+ ksmbd_destroy_file_table(&prev_sess->file_table);
+ prev_sess->state = SMB2_SESSION_EXPIRED;
+out:
+ up_write(&conn->session_lock);
+ up_write(&sessions_table_lock);
+}
+
static bool ksmbd_preauth_session_id_match(struct preauth_session *sess,
unsigned long long id)
{
diff --git a/fs/smb/server/mgmt/user_session.h b/fs/smb/server/mgmt/user_session.h
index 63cb08fffde8..dc9fded2cd43 100644
--- a/fs/smb/server/mgmt/user_session.h
+++ b/fs/smb/server/mgmt/user_session.h
@@ -88,8 +88,11 @@ struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
int ksmbd_session_register(struct ksmbd_conn *conn,
struct ksmbd_session *sess);
void ksmbd_sessions_deregister(struct ksmbd_conn *conn);
+struct ksmbd_session *__session_lookup(unsigned long long id);
struct ksmbd_session *ksmbd_session_lookup_all(struct ksmbd_conn *conn,
unsigned long long id);
+void destroy_previous_session(struct ksmbd_conn *conn,
+ struct ksmbd_user *user, u64 id);
struct preauth_session *ksmbd_preauth_session_alloc(struct ksmbd_conn *conn,
u64 sess_id);
struct preauth_session *ksmbd_preauth_session_lookup(struct ksmbd_conn *conn,
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 53dfaac425c6..4978edfb15f9 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -159,7 +159,8 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci)
opinfo = list_first_or_null_rcu(&ci->m_op_list, struct oplock_info,
op_entry);
if (opinfo) {
- if (!atomic_inc_not_zero(&opinfo->refcount))
+ if (opinfo->conn == NULL ||
+ !atomic_inc_not_zero(&opinfo->refcount))
opinfo = NULL;
else {
atomic_inc(&opinfo->conn->r_count);
@@ -527,7 +528,7 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci,
*/
read_lock(&ci->m_lock);
list_for_each_entry(opinfo, &ci->m_op_list, op_entry) {
- if (!opinfo->is_lease)
+ if (!opinfo->is_lease || !opinfo->conn)
continue;
read_unlock(&ci->m_lock);
lease = opinfo->o_lease;
@@ -641,7 +642,7 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
struct smb2_hdr *rsp_hdr;
struct ksmbd_file *fp;
- fp = ksmbd_lookup_durable_fd(br_info->fid);
+ fp = ksmbd_lookup_global_fd(br_info->fid);
if (!fp)
goto out;
@@ -1106,7 +1107,7 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp,
read_lock(&p_ci->m_lock);
list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) {
- if (!opinfo->is_lease)
+ if (opinfo->conn == NULL || !opinfo->is_lease)
continue;
if (opinfo->o_lease->state != SMB2_OPLOCK_LEVEL_NONE &&
@@ -1142,7 +1143,7 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp)
opinfo = rcu_dereference(fp->f_opinfo);
rcu_read_unlock();
- if (!opinfo->is_lease || opinfo->o_lease->version != 2)
+ if (!opinfo || !opinfo->is_lease || opinfo->o_lease->version != 2)
return;
p_ci = ksmbd_inode_lookup_lock(fp->filp->f_path.dentry->d_parent);
@@ -1151,7 +1152,7 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp)
read_lock(&p_ci->m_lock);
list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) {
- if (!opinfo->is_lease)
+ if (opinfo->conn == NULL || !opinfo->is_lease)
continue;
if (opinfo->o_lease->state != SMB2_OPLOCK_LEVEL_NONE) {
@@ -1361,6 +1362,9 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
rcu_read_lock();
list_for_each_entry_rcu(brk_op, &ci->m_op_list, op_entry) {
+ if (brk_op->conn == NULL)
+ continue;
+
if (!atomic_inc_not_zero(&brk_op->refcount))
continue;
@@ -1496,11 +1500,10 @@ void create_lease_buf(u8 *rbuf, struct lease *lease)
/**
* parse_lease_state() - parse lease context containted in file open request
* @open_req: buffer containing smb2 file open(create) request
- * @is_dir: whether leasing file is directory
*
* Return: oplock state, -ENOENT if create lease context not found
*/
-struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir)
+struct lease_ctx_info *parse_lease_state(void *open_req)
{
struct create_context *cc;
struct smb2_create_req *req = (struct smb2_create_req *)open_req;
@@ -1518,12 +1521,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir)
struct create_lease_v2 *lc = (struct create_lease_v2 *)cc;
memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
- if (is_dir) {
- lreq->req_state = lc->lcontext.LeaseState &
- ~SMB2_LEASE_WRITE_CACHING_LE;
- lreq->is_dir = true;
- } else
- lreq->req_state = lc->lcontext.LeaseState;
+ lreq->req_state = lc->lcontext.LeaseState;
lreq->flags = lc->lcontext.LeaseFlags;
lreq->epoch = lc->lcontext.Epoch;
lreq->duration = lc->lcontext.LeaseDuration;
@@ -1646,6 +1644,8 @@ void create_durable_v2_rsp_buf(char *cc, struct ksmbd_file *fp)
buf->Name[3] = 'Q';
buf->Timeout = cpu_to_le32(fp->durable_timeout);
+ if (fp->is_persistent)
+ buf->Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT);
}
/**
@@ -1813,3 +1813,71 @@ out:
read_unlock(&lease_list_lock);
return ret_op;
}
+
+int smb2_check_durable_oplock(struct ksmbd_conn *conn,
+ struct ksmbd_share_config *share,
+ struct ksmbd_file *fp,
+ struct lease_ctx_info *lctx,
+ char *name)
+{
+ struct oplock_info *opinfo = opinfo_get(fp);
+ int ret = 0;
+
+ if (!opinfo)
+ return 0;
+
+ if (opinfo->is_lease == false) {
+ if (lctx) {
+ pr_err("create context include lease\n");
+ ret = -EBADF;
+ goto out;
+ }
+
+ if (opinfo->level != SMB2_OPLOCK_LEVEL_BATCH) {
+ pr_err("oplock level is not equal to SMB2_OPLOCK_LEVEL_BATCH\n");
+ ret = -EBADF;
+ }
+
+ goto out;
+ }
+
+ if (memcmp(conn->ClientGUID, fp->client_guid,
+ SMB2_CLIENT_GUID_SIZE)) {
+ ksmbd_debug(SMB, "Client guid of fp is not equal to the one of connection\n");
+ ret = -EBADF;
+ goto out;
+ }
+
+ if (!lctx) {
+ ksmbd_debug(SMB, "create context does not include lease\n");
+ ret = -EBADF;
+ goto out;
+ }
+
+ if (memcmp(opinfo->o_lease->lease_key, lctx->lease_key,
+ SMB2_LEASE_KEY_SIZE)) {
+ ksmbd_debug(SMB,
+ "lease key of fp does not match lease key in create context\n");
+ ret = -EBADF;
+ goto out;
+ }
+
+ if (!(opinfo->o_lease->state & SMB2_LEASE_HANDLE_CACHING_LE)) {
+ ksmbd_debug(SMB, "lease state does not contain SMB2_LEASE_HANDLE_CACHING\n");
+ ret = -EBADF;
+ goto out;
+ }
+
+ if (opinfo->o_lease->version != lctx->version) {
+ ksmbd_debug(SMB,
+ "lease version of fp does not match the one in create context\n");
+ ret = -EBADF;
+ goto out;
+ }
+
+ if (!ksmbd_inode_pending_delete(fp))
+ ret = ksmbd_validate_name_reconnect(share, fp, name);
+out:
+ opinfo_put(opinfo);
+ return ret;
+}
diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h
index 5b93ea9196c0..e9da63f25b20 100644
--- a/fs/smb/server/oplock.h
+++ b/fs/smb/server/oplock.h
@@ -111,7 +111,7 @@ void opinfo_put(struct oplock_info *opinfo);
/* Lease related functions */
void create_lease_buf(u8 *rbuf, struct lease *lease);
-struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir);
+struct lease_ctx_info *parse_lease_state(void *open_req);
__u8 smb2_map_lease_to_oplock(__le32 lease_state);
int lease_read_to_write(struct oplock_info *opinfo);
@@ -130,4 +130,9 @@ void destroy_lease_table(struct ksmbd_conn *conn);
void smb_send_parent_lease_break_noti(struct ksmbd_file *fp,
struct lease_ctx_info *lctx);
void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp);
+int smb2_check_durable_oplock(struct ksmbd_conn *conn,
+ struct ksmbd_share_config *share,
+ struct ksmbd_file *fp,
+ struct lease_ctx_info *lctx,
+ char *name);
#endif /* __KSMBD_OPLOCK_H */
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index 3079e607c5fe..c0788188aa82 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -625,7 +625,6 @@ static void __exit ksmbd_server_exit(void)
}
MODULE_AUTHOR("Namjae Jeon <linkinjeon@kernel.org>");
-MODULE_VERSION(KSMBD_VERSION);
MODULE_DESCRIPTION("Linux kernel CIFS/SMB SERVER");
MODULE_LICENSE("GPL");
MODULE_SOFTDEP("pre: ecb");
diff --git a/fs/smb/server/smb2misc.c b/fs/smb/server/smb2misc.c
index 03dded29a980..727cb49926ee 100644
--- a/fs/smb/server/smb2misc.c
+++ b/fs/smb/server/smb2misc.c
@@ -101,13 +101,17 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len,
*len = le16_to_cpu(((struct smb2_sess_setup_req *)hdr)->SecurityBufferLength);
break;
case SMB2_TREE_CONNECT:
- *off = le16_to_cpu(((struct smb2_tree_connect_req *)hdr)->PathOffset);
+ *off = max_t(unsigned short int,
+ le16_to_cpu(((struct smb2_tree_connect_req *)hdr)->PathOffset),
+ offsetof(struct smb2_tree_connect_req, Buffer));
*len = le16_to_cpu(((struct smb2_tree_connect_req *)hdr)->PathLength);
break;
case SMB2_CREATE:
{
unsigned short int name_off =
- le16_to_cpu(((struct smb2_create_req *)hdr)->NameOffset);
+ max_t(unsigned short int,
+ le16_to_cpu(((struct smb2_create_req *)hdr)->NameOffset),
+ offsetof(struct smb2_create_req, Buffer));
unsigned short int name_len =
le16_to_cpu(((struct smb2_create_req *)hdr)->NameLength);
@@ -128,11 +132,15 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len,
break;
}
case SMB2_QUERY_INFO:
- *off = le16_to_cpu(((struct smb2_query_info_req *)hdr)->InputBufferOffset);
+ *off = max_t(unsigned int,
+ le16_to_cpu(((struct smb2_query_info_req *)hdr)->InputBufferOffset),
+ offsetof(struct smb2_query_info_req, Buffer));
*len = le32_to_cpu(((struct smb2_query_info_req *)hdr)->InputBufferLength);
break;
case SMB2_SET_INFO:
- *off = le16_to_cpu(((struct smb2_set_info_req *)hdr)->BufferOffset);
+ *off = max_t(unsigned int,
+ le16_to_cpu(((struct smb2_set_info_req *)hdr)->BufferOffset),
+ offsetof(struct smb2_set_info_req, Buffer));
*len = le32_to_cpu(((struct smb2_set_info_req *)hdr)->BufferLength);
break;
case SMB2_READ:
@@ -142,7 +150,7 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len,
case SMB2_WRITE:
if (((struct smb2_write_req *)hdr)->DataOffset ||
((struct smb2_write_req *)hdr)->Length) {
- *off = max_t(unsigned int,
+ *off = max_t(unsigned short int,
le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset),
offsetof(struct smb2_write_req, Buffer));
*len = le32_to_cpu(((struct smb2_write_req *)hdr)->Length);
@@ -153,7 +161,9 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len,
*len = le16_to_cpu(((struct smb2_write_req *)hdr)->WriteChannelInfoLength);
break;
case SMB2_QUERY_DIRECTORY:
- *off = le16_to_cpu(((struct smb2_query_directory_req *)hdr)->FileNameOffset);
+ *off = max_t(unsigned short int,
+ le16_to_cpu(((struct smb2_query_directory_req *)hdr)->FileNameOffset),
+ offsetof(struct smb2_query_directory_req, Buffer));
*len = le16_to_cpu(((struct smb2_query_directory_req *)hdr)->FileNameLength);
break;
case SMB2_LOCK:
@@ -168,7 +178,9 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len,
break;
}
case SMB2_IOCTL:
- *off = le32_to_cpu(((struct smb2_ioctl_req *)hdr)->InputOffset);
+ *off = max_t(unsigned int,
+ le32_to_cpu(((struct smb2_ioctl_req *)hdr)->InputOffset),
+ offsetof(struct smb2_ioctl_req, Buffer));
*len = le32_to_cpu(((struct smb2_ioctl_req *)hdr)->InputCount);
break;
default:
diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c
index 27a9dce3e03a..a45f7dca482e 100644
--- a/fs/smb/server/smb2ops.c
+++ b/fs/smb/server/smb2ops.c
@@ -256,6 +256,9 @@ void init_smb3_02_server(struct ksmbd_conn *conn)
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_PERSISTENT_HANDLES;
}
/**
@@ -283,6 +286,9 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE)
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_PERSISTENT_HANDLES;
+
INIT_LIST_HEAD(&conn->preauth_sess_table);
return 0;
}
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 089527a8b4ff..d478fa0c57ab 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -607,30 +607,6 @@ int smb2_check_user_session(struct ksmbd_work *work)
return -ENOENT;
}
-static void destroy_previous_session(struct ksmbd_conn *conn,
- struct ksmbd_user *user, u64 id)
-{
- struct ksmbd_session *prev_sess = ksmbd_session_lookup_slowpath(id);
- struct ksmbd_user *prev_user;
- struct channel *chann;
- long index;
-
- if (!prev_sess)
- return;
-
- prev_user = prev_sess->user;
-
- if (!prev_user ||
- strcmp(user->name, prev_user->name) ||
- user->passkey_sz != prev_user->passkey_sz ||
- memcmp(user->passkey, prev_user->passkey, user->passkey_sz))
- return;
-
- prev_sess->state = SMB2_SESSION_EXPIRED;
- xa_for_each(&prev_sess->ksmbd_chann_list, index, chann)
- ksmbd_conn_set_exiting(chann->conn);
-}
-
/**
* smb2_get_name() - get filename string from on the wire smb format
* @src: source buffer
@@ -1951,7 +1927,7 @@ int smb2_tree_connect(struct ksmbd_work *work)
WORK_BUFFERS(work, req, rsp);
- treename = smb_strndup_from_utf16(req->Buffer,
+ treename = smb_strndup_from_utf16((char *)req + le16_to_cpu(req->PathOffset),
le16_to_cpu(req->PathLength), true,
conn->local_nls);
if (IS_ERR(treename)) {
@@ -2642,6 +2618,165 @@ static void ksmbd_acls_fattr(struct smb_fattr *fattr,
}
}
+enum {
+ DURABLE_RECONN_V2 = 1,
+ DURABLE_RECONN,
+ DURABLE_REQ_V2,
+ DURABLE_REQ,
+};
+
+struct durable_info {
+ struct ksmbd_file *fp;
+ unsigned short int type;
+ bool persistent;
+ bool reconnected;
+ unsigned int timeout;
+ char *CreateGuid;
+};
+
+static int parse_durable_handle_context(struct ksmbd_work *work,
+ struct smb2_create_req *req,
+ struct lease_ctx_info *lc,
+ struct durable_info *dh_info)
+{
+ struct ksmbd_conn *conn = work->conn;
+ struct create_context *context;
+ int dh_idx, err = 0;
+ u64 persistent_id = 0;
+ int req_op_level;
+ static const char * const durable_arr[] = {"DH2C", "DHnC", "DH2Q", "DHnQ"};
+
+ req_op_level = req->RequestedOplockLevel;
+ for (dh_idx = DURABLE_RECONN_V2; dh_idx <= ARRAY_SIZE(durable_arr);
+ dh_idx++) {
+ context = smb2_find_context_vals(req, durable_arr[dh_idx - 1], 4);
+ if (IS_ERR(context)) {
+ err = PTR_ERR(context);
+ goto out;
+ }
+ if (!context)
+ continue;
+
+ switch (dh_idx) {
+ case DURABLE_RECONN_V2:
+ {
+ struct create_durable_reconn_v2_req *recon_v2;
+
+ if (dh_info->type == DURABLE_RECONN ||
+ dh_info->type == DURABLE_REQ_V2) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ recon_v2 = (struct create_durable_reconn_v2_req *)context;
+ persistent_id = recon_v2->Fid.PersistentFileId;
+ dh_info->fp = ksmbd_lookup_durable_fd(persistent_id);
+ if (!dh_info->fp) {
+ ksmbd_debug(SMB, "Failed to get durable handle state\n");
+ err = -EBADF;
+ goto out;
+ }
+
+ if (memcmp(dh_info->fp->create_guid, recon_v2->CreateGuid,
+ SMB2_CREATE_GUID_SIZE)) {
+ err = -EBADF;
+ ksmbd_put_durable_fd(dh_info->fp);
+ goto out;
+ }
+
+ dh_info->type = dh_idx;
+ dh_info->reconnected = true;
+ ksmbd_debug(SMB,
+ "reconnect v2 Persistent-id from reconnect = %llu\n",
+ persistent_id);
+ break;
+ }
+ case DURABLE_RECONN:
+ {
+ struct create_durable_reconn_req *recon;
+
+ if (dh_info->type == DURABLE_RECONN_V2 ||
+ dh_info->type == DURABLE_REQ_V2) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ recon = (struct create_durable_reconn_req *)context;
+ persistent_id = recon->Data.Fid.PersistentFileId;
+ dh_info->fp = ksmbd_lookup_durable_fd(persistent_id);
+ if (!dh_info->fp) {
+ ksmbd_debug(SMB, "Failed to get durable handle state\n");
+ err = -EBADF;
+ goto out;
+ }
+
+ dh_info->type = dh_idx;
+ dh_info->reconnected = true;
+ ksmbd_debug(SMB, "reconnect Persistent-id from reconnect = %llu\n",
+ persistent_id);
+ break;
+ }
+ case DURABLE_REQ_V2:
+ {
+ struct create_durable_req_v2 *durable_v2_blob;
+
+ if (dh_info->type == DURABLE_RECONN ||
+ dh_info->type == DURABLE_RECONN_V2) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ durable_v2_blob =
+ (struct create_durable_req_v2 *)context;
+ ksmbd_debug(SMB, "Request for durable v2 open\n");
+ dh_info->fp = ksmbd_lookup_fd_cguid(durable_v2_blob->CreateGuid);
+ if (dh_info->fp) {
+ if (!memcmp(conn->ClientGUID, dh_info->fp->client_guid,
+ SMB2_CLIENT_GUID_SIZE)) {
+ if (!(req->hdr.Flags & SMB2_FLAGS_REPLAY_OPERATION)) {
+ err = -ENOEXEC;
+ goto out;
+ }
+
+ dh_info->fp->conn = conn;
+ dh_info->reconnected = true;
+ goto out;
+ }
+ }
+
+ if (((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) ||
+ req_op_level == SMB2_OPLOCK_LEVEL_BATCH)) {
+ dh_info->CreateGuid =
+ durable_v2_blob->CreateGuid;
+ dh_info->persistent =
+ le32_to_cpu(durable_v2_blob->Flags);
+ dh_info->timeout =
+ le32_to_cpu(durable_v2_blob->Timeout);
+ dh_info->type = dh_idx;
+ }
+ break;
+ }
+ case DURABLE_REQ:
+ if (dh_info->type == DURABLE_RECONN)
+ goto out;
+ if (dh_info->type == DURABLE_RECONN_V2 ||
+ dh_info->type == DURABLE_REQ_V2) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) ||
+ req_op_level == SMB2_OPLOCK_LEVEL_BATCH)) {
+ ksmbd_debug(SMB, "Request for durable open\n");
+ dh_info->type = dh_idx;
+ }
+ }
+ }
+
+out:
+ return err;
+}
+
/**
* smb2_open() - handler for smb file open request
* @work: smb work containing request buffer
@@ -2665,6 +2800,7 @@ int smb2_open(struct ksmbd_work *work)
struct lease_ctx_info *lc = NULL;
struct create_ea_buf_req *ea_buf = NULL;
struct oplock_info *opinfo;
+ struct durable_info dh_info = {0};
__le32 *next_ptr = NULL;
int req_op_level = 0, open_flags = 0, may_flags = 0, file_info = 0;
int rc = 0;
@@ -2704,7 +2840,7 @@ int smb2_open(struct ksmbd_work *work)
goto err_out2;
}
- name = smb2_get_name(req->Buffer,
+ name = smb2_get_name((char *)req + le16_to_cpu(req->NameOffset),
le16_to_cpu(req->NameLength),
work->conn->local_nls);
if (IS_ERR(name)) {
@@ -2745,6 +2881,49 @@ int smb2_open(struct ksmbd_work *work)
}
}
+ req_op_level = req->RequestedOplockLevel;
+
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE &&
+ req->CreateContextsOffset) {
+ lc = parse_lease_state(req);
+ rc = parse_durable_handle_context(work, req, lc, &dh_info);
+ if (rc) {
+ ksmbd_debug(SMB, "error parsing durable handle context\n");
+ goto err_out2;
+ }
+
+ if (dh_info.reconnected == true) {
+ rc = smb2_check_durable_oplock(conn, share, dh_info.fp, lc, name);
+ if (rc) {
+ ksmbd_put_durable_fd(dh_info.fp);
+ goto err_out2;
+ }
+
+ rc = ksmbd_reopen_durable_fd(work, dh_info.fp);
+ if (rc) {
+ ksmbd_put_durable_fd(dh_info.fp);
+ goto err_out2;
+ }
+
+ if (ksmbd_override_fsids(work)) {
+ rc = -ENOMEM;
+ ksmbd_put_durable_fd(dh_info.fp);
+ goto err_out2;
+ }
+
+ fp = dh_info.fp;
+ file_info = FILE_OPENED;
+
+ rc = ksmbd_vfs_getattr(&fp->filp->f_path, &stat);
+ if (rc)
+ goto err_out2;
+
+ ksmbd_put_durable_fd(fp);
+ goto reconnected_fp;
+ }
+ } else if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE)
+ lc = parse_lease_state(req);
+
if (le32_to_cpu(req->ImpersonationLevel) > le32_to_cpu(IL_DELEGATE)) {
pr_err("Invalid impersonationlevel : 0x%x\n",
le32_to_cpu(req->ImpersonationLevel));
@@ -3207,10 +3386,6 @@ int smb2_open(struct ksmbd_work *work)
need_truncate = 1;
}
- req_op_level = req->RequestedOplockLevel;
- if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE)
- lc = parse_lease_state(req, S_ISDIR(file_inode(filp)->i_mode));
-
share_ret = ksmbd_smb_check_shared_mode(fp->filp, fp);
if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_OPLOCKS) ||
(req_op_level == SMB2_OPLOCK_LEVEL_LEASE &&
@@ -3221,6 +3396,11 @@ int smb2_open(struct ksmbd_work *work)
}
} else {
if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) {
+ if (S_ISDIR(file_inode(filp)->i_mode)) {
+ lc->req_state &= ~SMB2_LEASE_WRITE_CACHING_LE;
+ lc->is_dir = true;
+ }
+
/*
* Compare parent lease using parent key. If there is no
* a lease that has same parent key, Send lease break
@@ -3317,6 +3497,24 @@ int smb2_open(struct ksmbd_work *work)
memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
+ if (dh_info.type == DURABLE_REQ_V2 || dh_info.type == DURABLE_REQ) {
+ if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent)
+ fp->is_persistent = true;
+ else
+ fp->is_durable = true;
+
+ if (dh_info.type == DURABLE_REQ_V2) {
+ memcpy(fp->create_guid, dh_info.CreateGuid,
+ SMB2_CREATE_GUID_SIZE);
+ if (dh_info.timeout)
+ fp->durable_timeout = min(dh_info.timeout,
+ 300000);
+ else
+ fp->durable_timeout = 60;
+ }
+ }
+
+reconnected_fp:
rsp->StructureSize = cpu_to_le16(89);
rcu_read_lock();
opinfo = rcu_dereference(fp->f_opinfo);
@@ -3403,6 +3601,33 @@ int smb2_open(struct ksmbd_work *work)
next_off = conn->vals->create_disk_id_size;
}
+ if (dh_info.type == DURABLE_REQ || dh_info.type == DURABLE_REQ_V2) {
+ struct create_context *durable_ccontext;
+
+ durable_ccontext = (struct create_context *)(rsp->Buffer +
+ le32_to_cpu(rsp->CreateContextsLength));
+ contxt_cnt++;
+ if (dh_info.type == DURABLE_REQ) {
+ create_durable_rsp_buf(rsp->Buffer +
+ le32_to_cpu(rsp->CreateContextsLength));
+ le32_add_cpu(&rsp->CreateContextsLength,
+ conn->vals->create_durable_size);
+ iov_len += conn->vals->create_durable_size;
+ } else {
+ create_durable_v2_rsp_buf(rsp->Buffer +
+ le32_to_cpu(rsp->CreateContextsLength),
+ fp);
+ le32_add_cpu(&rsp->CreateContextsLength,
+ conn->vals->create_durable_v2_size);
+ iov_len += conn->vals->create_durable_v2_size;
+ }
+
+ if (next_ptr)
+ *next_ptr = cpu_to_le32(next_off);
+ next_ptr = &durable_ccontext->Next;
+ next_off = conn->vals->create_durable_size;
+ }
+
if (posix_ctxt) {
contxt_cnt++;
create_posix_rsp_buf(rsp->Buffer +
@@ -3828,11 +4053,16 @@ static int process_query_dir_entries(struct smb2_query_dir_private *priv)
}
ksmbd_kstat.kstat = &kstat;
- if (priv->info_level != FILE_NAMES_INFORMATION)
- ksmbd_vfs_fill_dentry_attrs(priv->work,
- idmap,
- dent,
- &ksmbd_kstat);
+ if (priv->info_level != FILE_NAMES_INFORMATION) {
+ rc = ksmbd_vfs_fill_dentry_attrs(priv->work,
+ idmap,
+ dent,
+ &ksmbd_kstat);
+ if (rc) {
+ dput(dent);
+ continue;
+ }
+ }
rc = smb2_populate_readdir_entry(priv->work->conn,
priv->info_level,
@@ -4075,7 +4305,7 @@ int smb2_query_dir(struct ksmbd_work *work)
}
srch_flag = req->Flags;
- srch_ptr = smb_strndup_from_utf16(req->Buffer,
+ srch_ptr = smb_strndup_from_utf16((char *)req + le16_to_cpu(req->FileNameOffset),
le16_to_cpu(req->FileNameLength), 1,
conn->local_nls);
if (IS_ERR(srch_ptr)) {
@@ -4335,7 +4565,8 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
sizeof(struct smb2_ea_info_req))
return -EINVAL;
- ea_req = (struct smb2_ea_info_req *)req->Buffer;
+ ea_req = (struct smb2_ea_info_req *)((char *)req +
+ le16_to_cpu(req->InputBufferOffset));
} else {
/* need to send all EAs, if no specific EA is requested*/
if (le32_to_cpu(req->Flags) & SL_RETURN_SINGLE_ENTRY)
@@ -4480,6 +4711,7 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
struct smb2_file_basic_info *basic_info;
struct kstat stat;
u64 time;
+ int ret;
if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) {
pr_err("no right to read the attributes : 0x%x\n",
@@ -4487,9 +4719,12 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
return -EACCES;
}
+ ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
+ AT_STATX_SYNC_AS_STAT);
+ if (ret)
+ return ret;
+
basic_info = (struct smb2_file_basic_info *)rsp->Buffer;
- generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
- file_inode(fp->filp), &stat);
basic_info->CreationTime = cpu_to_le64(fp->create_time);
time = ksmbd_UnixTimeToNT(stat.atime);
basic_info->LastAccessTime = cpu_to_le64(time);
@@ -4504,27 +4739,31 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
return 0;
}
-static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
- struct ksmbd_file *fp, void *rsp_org)
+static int get_file_standard_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
{
struct smb2_file_standard_info *sinfo;
unsigned int delete_pending;
- struct inode *inode;
struct kstat stat;
+ int ret;
- inode = file_inode(fp->filp);
- generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat);
+ ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
+ AT_STATX_SYNC_AS_STAT);
+ if (ret)
+ return ret;
sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
delete_pending = ksmbd_inode_pending_delete(fp);
- sinfo->AllocationSize = cpu_to_le64(inode->i_blocks << 9);
+ sinfo->AllocationSize = cpu_to_le64(stat.blocks << 9);
sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
sinfo->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending);
sinfo->DeletePending = delete_pending;
sinfo->Directory = S_ISDIR(stat.mode) ? 1 : 0;
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_standard_info));
+
+ return 0;
}
static void get_file_alignment_info(struct smb2_query_info_rsp *rsp,
@@ -4546,11 +4785,11 @@ static int get_file_all_info(struct ksmbd_work *work,
struct ksmbd_conn *conn = work->conn;
struct smb2_file_all_info *file_info;
unsigned int delete_pending;
- struct inode *inode;
struct kstat stat;
int conv_len;
char *filename;
u64 time;
+ int ret;
if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) {
ksmbd_debug(SMB, "no right to read the attributes : 0x%x\n",
@@ -4562,8 +4801,10 @@ static int get_file_all_info(struct ksmbd_work *work,
if (IS_ERR(filename))
return PTR_ERR(filename);
- inode = file_inode(fp->filp);
- generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat);
+ ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
+ AT_STATX_SYNC_AS_STAT);
+ if (ret)
+ return ret;
ksmbd_debug(SMB, "filename = %s\n", filename);
delete_pending = ksmbd_inode_pending_delete(fp);
@@ -4579,7 +4820,7 @@ static int get_file_all_info(struct ksmbd_work *work,
file_info->Attributes = fp->f_ci->m_fattr;
file_info->Pad1 = 0;
file_info->AllocationSize =
- cpu_to_le64(inode->i_blocks << 9);
+ cpu_to_le64(stat.blocks << 9);
file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
file_info->NumberOfLinks =
cpu_to_le32(get_nlink(&stat) - delete_pending);
@@ -4623,10 +4864,10 @@ static void get_file_alternate_info(struct ksmbd_work *work,
cpu_to_le32(sizeof(struct smb2_file_alt_name_info) + conv_len);
}
-static void get_file_stream_info(struct ksmbd_work *work,
- struct smb2_query_info_rsp *rsp,
- struct ksmbd_file *fp,
- void *rsp_org)
+static int get_file_stream_info(struct ksmbd_work *work,
+ struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp,
+ void *rsp_org)
{
struct ksmbd_conn *conn = work->conn;
struct smb2_file_stream_info *file_info;
@@ -4637,9 +4878,13 @@ static void get_file_stream_info(struct ksmbd_work *work,
int nbytes = 0, streamlen, stream_name_len, next, idx = 0;
int buf_free_len;
struct smb2_query_info_req *req = ksmbd_req_buf_next(work);
+ int ret;
+
+ ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
+ AT_STATX_SYNC_AS_STAT);
+ if (ret)
+ return ret;
- generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
- file_inode(fp->filp), &stat);
file_info = (struct smb2_file_stream_info *)rsp->Buffer;
buf_free_len =
@@ -4720,29 +4965,37 @@ out:
kvfree(xattr_list);
rsp->OutputBufferLength = cpu_to_le32(nbytes);
+
+ return 0;
}
-static void get_file_internal_info(struct smb2_query_info_rsp *rsp,
- struct ksmbd_file *fp, void *rsp_org)
+static int get_file_internal_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
{
struct smb2_file_internal_info *file_info;
struct kstat stat;
+ int ret;
+
+ ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
+ AT_STATX_SYNC_AS_STAT);
+ if (ret)
+ return ret;
- generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
- file_inode(fp->filp), &stat);
file_info = (struct smb2_file_internal_info *)rsp->Buffer;
file_info->IndexNumber = cpu_to_le64(stat.ino);
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_internal_info));
+
+ return 0;
}
static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
struct ksmbd_file *fp, void *rsp_org)
{
struct smb2_file_ntwrk_info *file_info;
- struct inode *inode;
struct kstat stat;
u64 time;
+ int ret;
if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) {
pr_err("no right to read the attributes : 0x%x\n",
@@ -4750,10 +5003,12 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
return -EACCES;
}
- file_info = (struct smb2_file_ntwrk_info *)rsp->Buffer;
+ ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
+ AT_STATX_SYNC_AS_STAT);
+ if (ret)
+ return ret;
- inode = file_inode(fp->filp);
- generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat);
+ file_info = (struct smb2_file_ntwrk_info *)rsp->Buffer;
file_info->CreationTime = cpu_to_le64(fp->create_time);
time = ksmbd_UnixTimeToNT(stat.atime);
@@ -4763,8 +5018,7 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
time = ksmbd_UnixTimeToNT(stat.ctime);
file_info->ChangeTime = cpu_to_le64(time);
file_info->Attributes = fp->f_ci->m_fattr;
- file_info->AllocationSize =
- cpu_to_le64(inode->i_blocks << 9);
+ file_info->AllocationSize = cpu_to_le64(stat.blocks << 9);
file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
file_info->Reserved = cpu_to_le32(0);
rsp->OutputBufferLength =
@@ -4804,14 +5058,17 @@ static void get_file_mode_info(struct smb2_query_info_rsp *rsp,
cpu_to_le32(sizeof(struct smb2_file_mode_info));
}
-static void get_file_compression_info(struct smb2_query_info_rsp *rsp,
- struct ksmbd_file *fp, void *rsp_org)
+static int get_file_compression_info(struct smb2_query_info_rsp *rsp,
+ struct ksmbd_file *fp, void *rsp_org)
{
struct smb2_file_comp_info *file_info;
struct kstat stat;
+ int ret;
- generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
- file_inode(fp->filp), &stat);
+ ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
+ AT_STATX_SYNC_AS_STAT);
+ if (ret)
+ return ret;
file_info = (struct smb2_file_comp_info *)rsp->Buffer;
file_info->CompressedFileSize = cpu_to_le64(stat.blocks << 9);
@@ -4823,6 +5080,8 @@ static void get_file_compression_info(struct smb2_query_info_rsp *rsp,
rsp->OutputBufferLength =
cpu_to_le32(sizeof(struct smb2_file_comp_info));
+
+ return 0;
}
static int get_file_attribute_tag_info(struct smb2_query_info_rsp *rsp,
@@ -4844,7 +5103,7 @@ static int get_file_attribute_tag_info(struct smb2_query_info_rsp *rsp,
return 0;
}
-static void find_file_posix_info(struct smb2_query_info_rsp *rsp,
+static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
struct ksmbd_file *fp, void *rsp_org)
{
struct smb311_posix_qinfo *file_info;
@@ -4852,24 +5111,31 @@ static void find_file_posix_info(struct smb2_query_info_rsp *rsp,
struct mnt_idmap *idmap = file_mnt_idmap(fp->filp);
vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
+ struct kstat stat;
u64 time;
int out_buf_len = sizeof(struct smb311_posix_qinfo) + 32;
+ int ret;
+
+ ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
+ AT_STATX_SYNC_AS_STAT);
+ if (ret)
+ return ret;
file_info = (struct smb311_posix_qinfo *)rsp->Buffer;
file_info->CreationTime = cpu_to_le64(fp->create_time);
- time = ksmbd_UnixTimeToNT(inode_get_atime(inode));
+ time = ksmbd_UnixTimeToNT(stat.atime);
file_info->LastAccessTime = cpu_to_le64(time);
- time = ksmbd_UnixTimeToNT(inode_get_mtime(inode));
+ time = ksmbd_UnixTimeToNT(stat.mtime);
file_info->LastWriteTime = cpu_to_le64(time);
- time = ksmbd_UnixTimeToNT(inode_get_ctime(inode));
+ time = ksmbd_UnixTimeToNT(stat.ctime);
file_info->ChangeTime = cpu_to_le64(time);
file_info->DosAttributes = fp->f_ci->m_fattr;
- file_info->Inode = cpu_to_le64(inode->i_ino);
- file_info->EndOfFile = cpu_to_le64(inode->i_size);
- file_info->AllocationSize = cpu_to_le64(inode->i_blocks << 9);
- file_info->HardLinks = cpu_to_le32(inode->i_nlink);
- file_info->Mode = cpu_to_le32(inode->i_mode & 0777);
- file_info->DeviceId = cpu_to_le32(inode->i_rdev);
+ file_info->Inode = cpu_to_le64(stat.ino);
+ file_info->EndOfFile = cpu_to_le64(stat.size);
+ file_info->AllocationSize = cpu_to_le64(stat.blocks << 9);
+ file_info->HardLinks = cpu_to_le32(stat.nlink);
+ file_info->Mode = cpu_to_le32(stat.mode & 0777);
+ file_info->DeviceId = cpu_to_le32(stat.rdev);
/*
* Sids(32) contain two sids(Domain sid(16), UNIX group sid(16)).
@@ -4882,6 +5148,8 @@ static void find_file_posix_info(struct smb2_query_info_rsp *rsp,
SIDUNIX_GROUP, (struct smb_sid *)&file_info->Sids[16]);
rsp->OutputBufferLength = cpu_to_le32(out_buf_len);
+
+ return 0;
}
static int smb2_get_info_file(struct ksmbd_work *work,
@@ -4930,7 +5198,7 @@ static int smb2_get_info_file(struct ksmbd_work *work,
break;
case FILE_STANDARD_INFORMATION:
- get_file_standard_info(rsp, fp, work->response_buf);
+ rc = get_file_standard_info(rsp, fp, work->response_buf);
break;
case FILE_ALIGNMENT_INFORMATION:
@@ -4946,11 +5214,11 @@ static int smb2_get_info_file(struct ksmbd_work *work,
break;
case FILE_STREAM_INFORMATION:
- get_file_stream_info(work, rsp, fp, work->response_buf);
+ rc = get_file_stream_info(work, rsp, fp, work->response_buf);
break;
case FILE_INTERNAL_INFORMATION:
- get_file_internal_info(rsp, fp, work->response_buf);
+ rc = get_file_internal_info(rsp, fp, work->response_buf);
break;
case FILE_NETWORK_OPEN_INFORMATION:
@@ -4974,7 +5242,7 @@ static int smb2_get_info_file(struct ksmbd_work *work,
break;
case FILE_COMPRESSION_INFORMATION:
- get_file_compression_info(rsp, fp, work->response_buf);
+ rc = get_file_compression_info(rsp, fp, work->response_buf);
break;
case FILE_ATTRIBUTE_TAG_INFORMATION:
@@ -4985,7 +5253,7 @@ static int smb2_get_info_file(struct ksmbd_work *work,
pr_err("client doesn't negotiate with SMB3.1.1 POSIX Extensions\n");
rc = -EOPNOTSUPP;
} else {
- find_file_posix_info(rsp, fp, work->response_buf);
+ rc = find_file_posix_info(rsp, fp, work->response_buf);
}
break;
default:
@@ -5398,7 +5666,6 @@ int smb2_close(struct ksmbd_work *work)
struct smb2_close_rsp *rsp;
struct ksmbd_conn *conn = work->conn;
struct ksmbd_file *fp;
- struct inode *inode;
u64 time;
int err = 0;
@@ -5453,24 +5720,33 @@ int smb2_close(struct ksmbd_work *work)
rsp->Reserved = 0;
if (req->Flags == SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB) {
+ struct kstat stat;
+ int ret;
+
fp = ksmbd_lookup_fd_fast(work, volatile_id);
if (!fp) {
err = -ENOENT;
goto out;
}
- inode = file_inode(fp->filp);
+ ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
+ AT_STATX_SYNC_AS_STAT);
+ if (ret) {
+ ksmbd_fd_put(work, fp);
+ goto out;
+ }
+
rsp->Flags = SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB;
- rsp->AllocationSize = S_ISDIR(inode->i_mode) ? 0 :
- cpu_to_le64(inode->i_blocks << 9);
- rsp->EndOfFile = cpu_to_le64(inode->i_size);
+ rsp->AllocationSize = S_ISDIR(stat.mode) ? 0 :
+ cpu_to_le64(stat.blocks << 9);
+ rsp->EndOfFile = cpu_to_le64(stat.size);
rsp->Attributes = fp->f_ci->m_fattr;
rsp->CreationTime = cpu_to_le64(fp->create_time);
- time = ksmbd_UnixTimeToNT(inode_get_atime(inode));
+ time = ksmbd_UnixTimeToNT(stat.atime);
rsp->LastAccessTime = cpu_to_le64(time);
- time = ksmbd_UnixTimeToNT(inode_get_mtime(inode));
+ time = ksmbd_UnixTimeToNT(stat.mtime);
rsp->LastWriteTime = cpu_to_le64(time);
- time = ksmbd_UnixTimeToNT(inode_get_ctime(inode));
+ time = ksmbd_UnixTimeToNT(stat.ctime);
rsp->ChangeTime = cpu_to_le64(time);
ksmbd_fd_put(work, fp);
} else {
@@ -5759,15 +6035,21 @@ static int set_file_allocation_info(struct ksmbd_work *work,
loff_t alloc_blks;
struct inode *inode;
+ struct kstat stat;
int rc;
if (!(fp->daccess & FILE_WRITE_DATA_LE))
return -EACCES;
+ rc = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
+ AT_STATX_SYNC_AS_STAT);
+ if (rc)
+ return rc;
+
alloc_blks = (le64_to_cpu(file_alloc_info->AllocationSize) + 511) >> 9;
inode = file_inode(fp->filp);
- if (alloc_blks > inode->i_blocks) {
+ if (alloc_blks > stat.blocks) {
smb_break_all_levII_oplock(work, fp, 1);
rc = vfs_fallocate(fp->filp, FALLOC_FL_KEEP_SIZE, 0,
alloc_blks * 512);
@@ -5775,7 +6057,7 @@ static int set_file_allocation_info(struct ksmbd_work *work,
pr_err("vfs_fallocate is failed : %d\n", rc);
return rc;
}
- } else if (alloc_blks < inode->i_blocks) {
+ } else if (alloc_blks < stat.blocks) {
loff_t size;
/*
@@ -5930,6 +6212,7 @@ static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp,
struct ksmbd_share_config *share)
{
unsigned int buf_len = le32_to_cpu(req->BufferLength);
+ char *buffer = (char *)req + le16_to_cpu(req->BufferOffset);
switch (req->FileInfoClass) {
case FILE_BASIC_INFORMATION:
@@ -5937,7 +6220,7 @@ static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp,
if (buf_len < sizeof(struct smb2_file_basic_info))
return -EINVAL;
- return set_file_basic_info(fp, (struct smb2_file_basic_info *)req->Buffer, share);
+ return set_file_basic_info(fp, (struct smb2_file_basic_info *)buffer, share);
}
case FILE_ALLOCATION_INFORMATION:
{
@@ -5945,7 +6228,7 @@ static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp,
return -EINVAL;
return set_file_allocation_info(work, fp,
- (struct smb2_file_alloc_info *)req->Buffer);
+ (struct smb2_file_alloc_info *)buffer);
}
case FILE_END_OF_FILE_INFORMATION:
{
@@ -5953,7 +6236,7 @@ static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp,
return -EINVAL;
return set_end_of_file_info(work, fp,
- (struct smb2_file_eof_info *)req->Buffer);
+ (struct smb2_file_eof_info *)buffer);
}
case FILE_RENAME_INFORMATION:
{
@@ -5961,7 +6244,7 @@ static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp,
return -EINVAL;
return set_rename_info(work, fp,
- (struct smb2_file_rename_info *)req->Buffer,
+ (struct smb2_file_rename_info *)buffer,
buf_len);
}
case FILE_LINK_INFORMATION:
@@ -5970,7 +6253,7 @@ static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp,
return -EINVAL;
return smb2_create_link(work, work->tcon->share_conf,
- (struct smb2_file_link_info *)req->Buffer,
+ (struct smb2_file_link_info *)buffer,
buf_len, fp->filp,
work->conn->local_nls);
}
@@ -5980,7 +6263,7 @@ static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp,
return -EINVAL;
return set_file_disposition_info(fp,
- (struct smb2_file_disposition_info *)req->Buffer);
+ (struct smb2_file_disposition_info *)buffer);
}
case FILE_FULL_EA_INFORMATION:
{
@@ -5993,7 +6276,7 @@ static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp,
if (buf_len < sizeof(struct smb2_ea_info))
return -EINVAL;
- return smb2_set_ea((struct smb2_ea_info *)req->Buffer,
+ return smb2_set_ea((struct smb2_ea_info *)buffer,
buf_len, &fp->filp->f_path, true);
}
case FILE_POSITION_INFORMATION:
@@ -6001,14 +6284,14 @@ static int smb2_set_info_file(struct ksmbd_work *work, struct ksmbd_file *fp,
if (buf_len < sizeof(struct smb2_file_pos_info))
return -EINVAL;
- return set_file_position_info(fp, (struct smb2_file_pos_info *)req->Buffer);
+ return set_file_position_info(fp, (struct smb2_file_pos_info *)buffer);
}
case FILE_MODE_INFORMATION:
{
if (buf_len < sizeof(struct smb2_file_mode_info))
return -EINVAL;
- return set_file_mode_info(fp, (struct smb2_file_mode_info *)req->Buffer);
+ return set_file_mode_info(fp, (struct smb2_file_mode_info *)buffer);
}
}
@@ -6089,7 +6372,7 @@ int smb2_set_info(struct ksmbd_work *work)
}
rc = smb2_set_info_sec(fp,
le32_to_cpu(req->AdditionalInformation),
- req->Buffer,
+ (char *)req + le16_to_cpu(req->BufferOffset),
le32_to_cpu(req->BufferLength));
ksmbd_revert_fsids(work);
break;
@@ -7535,7 +7818,7 @@ static int fsctl_pipe_transceive(struct ksmbd_work *work, u64 id,
struct smb2_ioctl_rsp *rsp)
{
struct ksmbd_rpc_command *rpc_resp;
- char *data_buf = (char *)&req->Buffer[0];
+ char *data_buf = (char *)req + le32_to_cpu(req->InputOffset);
int nbytes = 0;
rpc_resp = ksmbd_rpc_ioctl(work->sess, id, data_buf,
@@ -7648,6 +7931,7 @@ int smb2_ioctl(struct ksmbd_work *work)
u64 id = KSMBD_NO_FID;
struct ksmbd_conn *conn = work->conn;
int ret = 0;
+ char *buffer;
if (work->next_smb2_rcv_hdr_off) {
req = ksmbd_req_buf_next(work);
@@ -7670,6 +7954,8 @@ int smb2_ioctl(struct ksmbd_work *work)
goto out;
}
+ buffer = (char *)req + le32_to_cpu(req->InputOffset);
+
cnt_code = le32_to_cpu(req->CtlCode);
ret = smb2_calc_max_out_buf_len(work, 48,
le32_to_cpu(req->MaxOutputResponse));
@@ -7727,7 +8013,7 @@ int smb2_ioctl(struct ksmbd_work *work)
}
ret = fsctl_validate_negotiate_info(conn,
- (struct validate_negotiate_info_req *)&req->Buffer[0],
+ (struct validate_negotiate_info_req *)buffer,
(struct validate_negotiate_info_rsp *)&rsp->Buffer[0],
in_buf_len);
if (ret < 0)
@@ -7780,7 +8066,7 @@ int smb2_ioctl(struct ksmbd_work *work)
rsp->VolatileFileId = req->VolatileFileId;
rsp->PersistentFileId = req->PersistentFileId;
fsctl_copychunk(work,
- (struct copychunk_ioctl_req *)&req->Buffer[0],
+ (struct copychunk_ioctl_req *)buffer,
le32_to_cpu(req->CtlCode),
le32_to_cpu(req->InputCount),
req->VolatileFileId,
@@ -7793,8 +8079,7 @@ int smb2_ioctl(struct ksmbd_work *work)
goto out;
}
- ret = fsctl_set_sparse(work, id,
- (struct file_sparse *)&req->Buffer[0]);
+ ret = fsctl_set_sparse(work, id, (struct file_sparse *)buffer);
if (ret < 0)
goto out;
break;
@@ -7817,7 +8102,7 @@ int smb2_ioctl(struct ksmbd_work *work)
}
zero_data =
- (struct file_zero_data_information *)&req->Buffer[0];
+ (struct file_zero_data_information *)buffer;
off = le64_to_cpu(zero_data->FileOffset);
bfz = le64_to_cpu(zero_data->BeyondFinalZero);
@@ -7848,7 +8133,7 @@ int smb2_ioctl(struct ksmbd_work *work)
}
ret = fsctl_query_allocated_ranges(work, id,
- (struct file_allocated_range_buffer *)&req->Buffer[0],
+ (struct file_allocated_range_buffer *)buffer,
(struct file_allocated_range_buffer *)&rsp->Buffer[0],
out_buf_len /
sizeof(struct file_allocated_range_buffer), &nbytes);
@@ -7892,7 +8177,7 @@ int smb2_ioctl(struct ksmbd_work *work)
goto out;
}
- dup_ext = (struct duplicate_extents_to_file *)&req->Buffer[0];
+ dup_ext = (struct duplicate_extents_to_file *)buffer;
fp_in = ksmbd_lookup_fd_slow(work, dup_ext->VolatileFileHandle,
dup_ext->PersistentFileHandle);
diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h
index d12cfd3b0927..bd1d2a0e9203 100644
--- a/fs/smb/server/smb2pdu.h
+++ b/fs/smb/server/smb2pdu.h
@@ -72,6 +72,18 @@ struct create_durable_req_v2 {
__u8 CreateGuid[16];
} __packed;
+struct create_durable_reconn_req {
+ struct create_context ccontext;
+ __u8 Name[8];
+ union {
+ __u8 Reserved[16];
+ struct {
+ __u64 PersistentFileId;
+ __u64 VolatileFileId;
+ } Fid;
+ } Data;
+} __packed;
+
struct create_durable_reconn_v2_req {
struct create_context ccontext;
__u8 Name[8];
@@ -98,6 +110,9 @@ struct create_durable_rsp {
} Data;
} __packed;
+/* See MS-SMB2 2.2.13.2.11 */
+/* Flags */
+#define SMB2_DHANDLE_FLAG_PERSISTENT 0x00000002
struct create_durable_v2_rsp {
struct create_context ccontext;
__u8 Name[8];
diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c
index 7c98bf699772..fcaf373cc008 100644
--- a/fs/smb/server/smb_common.c
+++ b/fs/smb/server/smb_common.c
@@ -457,10 +457,13 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
}
ksmbd_kstat.kstat = &kstat;
- ksmbd_vfs_fill_dentry_attrs(work,
- idmap,
- dentry,
- &ksmbd_kstat);
+ rc = ksmbd_vfs_fill_dentry_attrs(work,
+ idmap,
+ dentry,
+ &ksmbd_kstat);
+ if (rc)
+ break;
+
rc = fn(conn, info_level, d_info, &ksmbd_kstat);
if (rc)
break;
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index c487e834331a..22f0f3db3ac9 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -1682,11 +1682,19 @@ int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
struct dentry *dentry,
struct ksmbd_kstat *ksmbd_kstat)
{
+ struct ksmbd_share_config *share_conf = work->tcon->share_conf;
u64 time;
int rc;
+ struct path path = {
+ .mnt = share_conf->vfs_path.mnt,
+ .dentry = dentry,
+ };
- generic_fillattr(idmap, STATX_BASIC_STATS, d_inode(dentry),
- ksmbd_kstat->kstat);
+ rc = vfs_getattr(&path, ksmbd_kstat->kstat,
+ STATX_BASIC_STATS | STATX_BTIME,
+ AT_STATX_SYNC_AS_STAT);
+ if (rc)
+ return rc;
time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->ctime);
ksmbd_kstat->create_time = time;
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
index 4e82ff627d12..030f70700036 100644
--- a/fs/smb/server/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -305,7 +305,8 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp)
fd_limit_close();
__ksmbd_remove_durable_fd(fp);
- __ksmbd_remove_fd(ft, fp);
+ if (ft)
+ __ksmbd_remove_fd(ft, fp);
close_id_del_oplock(fp);
filp = fp->filp;
@@ -465,11 +466,32 @@ struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id,
return fp;
}
-struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id)
+struct ksmbd_file *ksmbd_lookup_global_fd(unsigned long long id)
{
return __ksmbd_lookup_fd(&global_ft, id);
}
+struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id)
+{
+ struct ksmbd_file *fp;
+
+ fp = __ksmbd_lookup_fd(&global_ft, id);
+ if (fp && fp->conn) {
+ ksmbd_put_durable_fd(fp);
+ fp = NULL;
+ }
+
+ return fp;
+}
+
+void ksmbd_put_durable_fd(struct ksmbd_file *fp)
+{
+ if (!atomic_dec_and_test(&fp->refcount))
+ return;
+
+ __ksmbd_close_fd(NULL, fp);
+}
+
struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid)
{
struct ksmbd_file *fp = NULL;
@@ -639,6 +661,32 @@ __close_file_table_ids(struct ksmbd_file_table *ft,
return num;
}
+static inline bool is_reconnectable(struct ksmbd_file *fp)
+{
+ struct oplock_info *opinfo = opinfo_get(fp);
+ bool reconn = false;
+
+ if (!opinfo)
+ return false;
+
+ if (opinfo->op_state != OPLOCK_STATE_NONE) {
+ opinfo_put(opinfo);
+ return false;
+ }
+
+ if (fp->is_resilient || fp->is_persistent)
+ reconn = true;
+ else if (fp->is_durable && opinfo->is_lease &&
+ opinfo->o_lease->state & SMB2_LEASE_HANDLE_CACHING_LE)
+ reconn = true;
+
+ else if (fp->is_durable && opinfo->level == SMB2_OPLOCK_LEVEL_BATCH)
+ reconn = true;
+
+ opinfo_put(opinfo);
+ return reconn;
+}
+
static bool tree_conn_fd_check(struct ksmbd_tree_connect *tcon,
struct ksmbd_file *fp)
{
@@ -648,7 +696,28 @@ static bool tree_conn_fd_check(struct ksmbd_tree_connect *tcon,
static bool session_fd_check(struct ksmbd_tree_connect *tcon,
struct ksmbd_file *fp)
{
- return false;
+ struct ksmbd_inode *ci;
+ struct oplock_info *op;
+ struct ksmbd_conn *conn;
+
+ if (!is_reconnectable(fp))
+ return false;
+
+ conn = fp->conn;
+ ci = fp->f_ci;
+ write_lock(&ci->m_lock);
+ list_for_each_entry_rcu(op, &ci->m_op_list, op_entry) {
+ if (op->conn != conn)
+ continue;
+ op->conn = NULL;
+ }
+ write_unlock(&ci->m_lock);
+
+ fp->conn = NULL;
+ fp->tcon = NULL;
+ fp->volatile_id = KSMBD_NO_FID;
+
+ return true;
}
void ksmbd_close_tree_conn_fds(struct ksmbd_work *work)
@@ -687,6 +756,68 @@ void ksmbd_free_global_file_table(void)
ksmbd_destroy_file_table(&global_ft);
}
+int ksmbd_validate_name_reconnect(struct ksmbd_share_config *share,
+ struct ksmbd_file *fp, char *name)
+{
+ char *pathname, *ab_pathname;
+ int ret = 0;
+
+ pathname = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!pathname)
+ return -EACCES;
+
+ ab_pathname = d_path(&fp->filp->f_path, pathname, PATH_MAX);
+ if (IS_ERR(ab_pathname)) {
+ kfree(pathname);
+ return -EACCES;
+ }
+
+ if (name && strcmp(&ab_pathname[share->path_sz + 1], name)) {
+ ksmbd_debug(SMB, "invalid name reconnect %s\n", name);
+ ret = -EINVAL;
+ }
+
+ kfree(pathname);
+
+ return ret;
+}
+
+int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp)
+{
+ struct ksmbd_inode *ci;
+ struct oplock_info *op;
+
+ if (!fp->is_durable || fp->conn || fp->tcon) {
+ pr_err("Invalid durable fd [%p:%p]\n", fp->conn, fp->tcon);
+ return -EBADF;
+ }
+
+ if (has_file_id(fp->volatile_id)) {
+ pr_err("Still in use durable fd: %llu\n", fp->volatile_id);
+ return -EBADF;
+ }
+
+ fp->conn = work->conn;
+ fp->tcon = work->tcon;
+
+ ci = fp->f_ci;
+ write_lock(&ci->m_lock);
+ list_for_each_entry_rcu(op, &ci->m_op_list, op_entry) {
+ if (op->conn)
+ continue;
+ op->conn = fp->conn;
+ }
+ write_unlock(&ci->m_lock);
+
+ __open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID);
+ if (!has_file_id(fp->volatile_id)) {
+ fp->conn = NULL;
+ fp->tcon = NULL;
+ return -EBADF;
+ }
+ return 0;
+}
+
int ksmbd_init_file_table(struct ksmbd_file_table *ft)
{
ft->idr = kzalloc(sizeof(struct idr), GFP_KERNEL);
diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h
index a528f0cc775a..ed44fb4e18e7 100644
--- a/fs/smb/server/vfs_cache.h
+++ b/fs/smb/server/vfs_cache.h
@@ -14,6 +14,7 @@
#include <linux/workqueue.h>
#include "vfs.h"
+#include "mgmt/share_config.h"
/* Windows style file permissions for extended response */
#define FILE_GENERIC_ALL 0x1F01FF
@@ -106,6 +107,9 @@ struct ksmbd_file {
int dot_dotdot[2];
unsigned int f_state;
bool reserve_lease_break;
+ bool is_durable;
+ bool is_persistent;
+ bool is_resilient;
};
static inline void set_ctx_actor(struct dir_context *ctx,
@@ -141,7 +145,9 @@ struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id,
void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp);
struct ksmbd_inode *ksmbd_inode_lookup_lock(struct dentry *d);
void ksmbd_inode_put(struct ksmbd_inode *ci);
+struct ksmbd_file *ksmbd_lookup_global_fd(unsigned long long id);
struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id);
+void ksmbd_put_durable_fd(struct ksmbd_file *fp);
struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid);
struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry);
unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp);
@@ -173,6 +179,9 @@ void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp);
void ksmbd_clear_inode_pending_delete(struct ksmbd_file *fp);
void ksmbd_fd_set_delete_on_close(struct ksmbd_file *fp,
int file_info);
+int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp);
+int ksmbd_validate_name_reconnect(struct ksmbd_share_config *share,
+ struct ksmbd_file *fp, char *name);
int ksmbd_init_file_cache(void);
void ksmbd_exit_file_cache(void);
#endif /* __VFS_CACHE_H__ */
diff --git a/fs/super.c b/fs/super.c
index ee05ab6b37e7..71d9779c42b1 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1515,11 +1515,29 @@ static int fs_bdev_thaw(struct block_device *bdev)
return error;
}
+static void fs_bdev_super_get(void *data)
+{
+ struct super_block *sb = data;
+
+ spin_lock(&sb_lock);
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+}
+
+static void fs_bdev_super_put(void *data)
+{
+ struct super_block *sb = data;
+
+ put_super(sb);
+}
+
const struct blk_holder_ops fs_holder_ops = {
.mark_dead = fs_bdev_mark_dead,
.sync = fs_bdev_sync,
.freeze = fs_bdev_freeze,
.thaw = fs_bdev_thaw,
+ .get_holder = fs_bdev_super_get,
+ .put_holder = fs_bdev_super_put,
};
EXPORT_SYMBOL_GPL(fs_holder_ops);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 138676463336..d22ad67a0f32 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -31,6 +31,17 @@ static void remove_files(struct kernfs_node *parent,
kernfs_remove_by_name(parent, (*bin_attr)->attr.name);
}
+static umode_t __first_visible(const struct attribute_group *grp, struct kobject *kobj)
+{
+ if (grp->attrs && grp->attrs[0] && grp->is_visible)
+ return grp->is_visible(kobj, grp->attrs[0], 0);
+
+ if (grp->bin_attrs && grp->bin_attrs[0] && grp->is_bin_visible)
+ return grp->is_bin_visible(kobj, grp->bin_attrs[0], 0);
+
+ return 0;
+}
+
static int create_files(struct kernfs_node *parent, struct kobject *kobj,
kuid_t uid, kgid_t gid,
const struct attribute_group *grp, int update)
@@ -52,6 +63,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
kernfs_remove_by_name(parent, (*attr)->name);
if (grp->is_visible) {
mode = grp->is_visible(kobj, *attr, i);
+ mode &= ~SYSFS_GROUP_INVISIBLE;
if (!mode)
continue;
}
@@ -81,6 +93,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
(*bin_attr)->attr.name);
if (grp->is_bin_visible) {
mode = grp->is_bin_visible(kobj, *bin_attr, i);
+ mode &= ~SYSFS_GROUP_INVISIBLE;
if (!mode)
continue;
}
@@ -127,16 +140,31 @@ static int internal_create_group(struct kobject *kobj, int update,
kobject_get_ownership(kobj, &uid, &gid);
if (grp->name) {
+ umode_t mode = __first_visible(grp, kobj);
+
+ if (mode & SYSFS_GROUP_INVISIBLE)
+ mode = 0;
+ else
+ mode = S_IRWXU | S_IRUGO | S_IXUGO;
+
if (update) {
kn = kernfs_find_and_get(kobj->sd, grp->name);
if (!kn) {
- pr_warn("Can't update unknown attr grp name: %s/%s\n",
- kobj->name, grp->name);
- return -EINVAL;
+ pr_debug("attr grp %s/%s not created yet\n",
+ kobj->name, grp->name);
+ /* may have been invisible prior to this update */
+ update = 0;
+ } else if (!mode) {
+ sysfs_remove_group(kobj, grp);
+ kernfs_put(kn);
+ return 0;
}
- } else {
- kn = kernfs_create_dir_ns(kobj->sd, grp->name,
- S_IRWXU | S_IRUGO | S_IXUGO,
+ }
+
+ if (!update) {
+ if (!mode)
+ return 0;
+ kn = kernfs_create_dir_ns(kobj->sd, grp->name, mode,
uid, gid, kobj, NULL);
if (IS_ERR(kn)) {
if (PTR_ERR(kn) == -EEXIST)
@@ -279,9 +307,8 @@ void sysfs_remove_group(struct kobject *kobj,
if (grp->name) {
kn = kernfs_find_and_get(parent, grp->name);
if (!kn) {
- WARN(!kn, KERN_WARNING
- "sysfs group '%s' not found for kobject '%s'\n",
- grp->name, kobject_name(kobj));
+ pr_debug("sysfs group '%s' not found for kobject '%s'\n",
+ grp->name, kobject_name(kobj));
return;
}
} else {
@@ -318,13 +345,13 @@ void sysfs_remove_groups(struct kobject *kobj,
EXPORT_SYMBOL_GPL(sysfs_remove_groups);
/**
- * sysfs_merge_group - merge files into a pre-existing attribute group.
+ * sysfs_merge_group - merge files into a pre-existing named attribute group.
* @kobj: The kobject containing the group.
* @grp: The files to create and the attribute group they belong to.
*
- * This function returns an error if the group doesn't exist or any of the
- * files already exist in that group, in which case none of the new files
- * are created.
+ * This function returns an error if the group doesn't exist, the .name field is
+ * NULL or any of the files already exist in that group, in which case none of
+ * the new files are created.
*/
int sysfs_merge_group(struct kobject *kobj,
const struct attribute_group *grp)
@@ -356,7 +383,7 @@ int sysfs_merge_group(struct kobject *kobj,
EXPORT_SYMBOL_GPL(sysfs_merge_group);
/**
- * sysfs_unmerge_group - remove files from a pre-existing attribute group.
+ * sysfs_unmerge_group - remove files from a pre-existing named attribute group.
* @kobj: The kobject containing the group.
* @grp: The files to remove and the attribute group they belong to.
*/
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index 110e8a272189..dc067eeb6387 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -35,6 +35,17 @@ static DEFINE_MUTEX(eventfs_mutex);
/* Choose something "unique" ;-) */
#define EVENTFS_FILE_INODE_INO 0x12c4e37
+struct eventfs_root_inode {
+ struct eventfs_inode ei;
+ struct dentry *events_dir;
+};
+
+static struct eventfs_root_inode *get_root_inode(struct eventfs_inode *ei)
+{
+ WARN_ON_ONCE(!ei->is_events);
+ return container_of(ei, struct eventfs_root_inode, ei);
+}
+
/* Just try to make something consistent and unique */
static int eventfs_dir_ino(struct eventfs_inode *ei)
{
@@ -73,12 +84,18 @@ enum {
static void release_ei(struct kref *ref)
{
struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref);
+ struct eventfs_root_inode *rei;
WARN_ON_ONCE(!ei->is_freed);
kfree(ei->entry_attrs);
kfree_const(ei->name);
- kfree_rcu(ei, rcu);
+ if (ei->is_events) {
+ rei = get_root_inode(ei);
+ kfree_rcu(rei, ei.rcu);
+ } else {
+ kfree_rcu(ei, rcu);
+ }
}
static inline void put_ei(struct eventfs_inode *ei)
@@ -408,19 +425,43 @@ static struct dentry *lookup_dir_entry(struct dentry *dentry,
return NULL;
}
+static inline struct eventfs_inode *init_ei(struct eventfs_inode *ei, const char *name)
+{
+ ei->name = kstrdup_const(name, GFP_KERNEL);
+ if (!ei->name)
+ return NULL;
+ kref_init(&ei->kref);
+ return ei;
+}
+
static inline struct eventfs_inode *alloc_ei(const char *name)
{
struct eventfs_inode *ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+ struct eventfs_inode *result;
if (!ei)
return NULL;
- ei->name = kstrdup_const(name, GFP_KERNEL);
- if (!ei->name) {
+ result = init_ei(ei, name);
+ if (!result)
kfree(ei);
+
+ return result;
+}
+
+static inline struct eventfs_inode *alloc_root_ei(const char *name)
+{
+ struct eventfs_root_inode *rei = kzalloc(sizeof(*rei), GFP_KERNEL);
+ struct eventfs_inode *ei;
+
+ if (!rei)
return NULL;
- }
- kref_init(&ei->kref);
+
+ rei->ei.is_events = 1;
+ ei = init_ei(&rei->ei, name);
+ if (!ei)
+ kfree(rei);
+
return ei;
}
@@ -483,7 +524,7 @@ static struct dentry *eventfs_root_lookup(struct inode *dir,
struct dentry *result = NULL;
ti = get_tracefs(dir);
- if (!(ti->flags & TRACEFS_EVENT_INODE))
+ if (WARN_ON_ONCE(!(ti->flags & TRACEFS_EVENT_INODE)))
return ERR_PTR(-EIO);
mutex_lock(&eventfs_mutex);
@@ -495,7 +536,8 @@ static struct dentry *eventfs_root_lookup(struct inode *dir,
list_for_each_entry(ei_child, &ei->children, list) {
if (strcmp(ei_child->name, name) != 0)
continue;
- if (ei_child->is_freed)
+ /* A child is freed and removed from the list at the same time */
+ if (WARN_ON_ONCE(ei_child->is_freed))
goto out;
result = lookup_dir_entry(dentry, ei, ei_child);
goto out;
@@ -709,6 +751,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
int size, void *data)
{
struct dentry *dentry = tracefs_start_creating(name, parent);
+ struct eventfs_root_inode *rei;
struct eventfs_inode *ei;
struct tracefs_inode *ti;
struct inode *inode;
@@ -721,7 +764,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
if (IS_ERR(dentry))
return ERR_CAST(dentry);
- ei = alloc_ei(name);
+ ei = alloc_root_ei(name);
if (!ei)
goto fail;
@@ -730,10 +773,11 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
goto fail;
// Note: we have a ref to the dentry from tracefs_start_creating()
- ei->events_dir = dentry;
+ rei = get_root_inode(ei);
+ rei->events_dir = dentry;
+
ei->entries = entries;
ei->nr_entries = size;
- ei->is_events = 1;
ei->data = data;
/* Save the ownership of this directory */
@@ -844,13 +888,15 @@ void eventfs_remove_dir(struct eventfs_inode *ei)
*/
void eventfs_remove_events_dir(struct eventfs_inode *ei)
{
+ struct eventfs_root_inode *rei;
struct dentry *dentry;
- dentry = ei->events_dir;
+ rei = get_root_inode(ei);
+ dentry = rei->events_dir;
if (!dentry)
return;
- ei->events_dir = NULL;
+ rei->events_dir = NULL;
eventfs_remove_dir(ei);
/*
diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h
index beb3dcd0e434..15c26f9aaad4 100644
--- a/fs/tracefs/internal.h
+++ b/fs/tracefs/internal.h
@@ -36,7 +36,6 @@ struct eventfs_attr {
* @children: link list into the child eventfs_inode
* @entries: the array of entries representing the files in the directory
* @name: the name of the directory to create
- * @events_dir: the dentry of the events directory
* @entry_attrs: Saved mode and ownership of the @d_children
* @data: The private data to pass to the callbacks
* @attr: Saved mode and ownership of eventfs_inode itself
@@ -54,7 +53,6 @@ struct eventfs_inode {
struct list_head children;
const struct eventfs_entry *entries;
const char *name;
- struct dentry *events_dir;
struct eventfs_attr *entry_attrs;
void *data;
struct eventfs_attr attr;
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index d013c5b3f1ed..ac77ac1fd73e 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -1742,17 +1742,22 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
err = dbg_walk_index(c, NULL, add_size, &calc);
if (err) {
ubifs_err(c, "error %d while walking the index", err);
- return err;
+ goto out_err;
}
if (calc != idx_size) {
ubifs_err(c, "index size check failed: calculated size is %lld, should be %lld",
calc, idx_size);
dump_stack();
- return -EINVAL;
+ err = -EINVAL;
+ goto out_err;
}
return 0;
+
+out_err:
+ ubifs_destroy_tnc_tree(c);
+ return err;
}
/**
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 551148de66cd..eac0fef801f1 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1133,6 +1133,8 @@ out_cancel:
dir_ui->ui_size = dir->i_size;
mutex_unlock(&dir_ui->ui_mutex);
out_inode:
+ /* Free inode->i_link before inode is marked as bad. */
+ fscrypt_free_inode(inode);
make_bad_inode(inode);
iput(inode);
out_fname:
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5029eb3390a5..a1f46919934c 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -96,36 +96,36 @@ dump:
return -EINVAL;
}
-static int do_readpage(struct page *page)
+static int do_readpage(struct folio *folio)
{
void *addr;
int err = 0, i;
unsigned int block, beyond;
- struct ubifs_data_node *dn;
- struct inode *inode = page->mapping->host;
+ struct ubifs_data_node *dn = NULL;
+ struct inode *inode = folio->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
loff_t i_size = i_size_read(inode);
dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
- inode->i_ino, page->index, i_size, page->flags);
- ubifs_assert(c, !PageChecked(page));
- ubifs_assert(c, !PagePrivate(page));
+ inode->i_ino, folio->index, i_size, folio->flags);
+ ubifs_assert(c, !folio_test_checked(folio));
+ ubifs_assert(c, !folio->private);
- addr = kmap(page);
+ addr = kmap_local_folio(folio, 0);
- block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+ block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
if (block >= beyond) {
/* Reading beyond inode */
- SetPageChecked(page);
- memset(addr, 0, PAGE_SIZE);
+ folio_set_checked(folio);
+ addr = folio_zero_tail(folio, 0, addr);
goto out;
}
dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS);
if (!dn) {
err = -ENOMEM;
- goto error;
+ goto out;
}
i = 0;
@@ -150,39 +150,35 @@ static int do_readpage(struct page *page)
memset(addr + ilen, 0, dlen - ilen);
}
}
- if (++i >= UBIFS_BLOCKS_PER_PAGE)
+ if (++i >= (UBIFS_BLOCKS_PER_PAGE << folio_order(folio)))
break;
block += 1;
addr += UBIFS_BLOCK_SIZE;
+ if (folio_test_highmem(folio) && (offset_in_page(addr) == 0)) {
+ kunmap_local(addr - UBIFS_BLOCK_SIZE);
+ addr = kmap_local_folio(folio, i * UBIFS_BLOCK_SIZE);
+ }
}
+
if (err) {
struct ubifs_info *c = inode->i_sb->s_fs_info;
if (err == -ENOENT) {
/* Not found, so it must be a hole */
- SetPageChecked(page);
+ folio_set_checked(folio);
dbg_gen("hole");
- goto out_free;
+ err = 0;
+ } else {
+ ubifs_err(c, "cannot read page %lu of inode %lu, error %d",
+ folio->index, inode->i_ino, err);
}
- ubifs_err(c, "cannot read page %lu of inode %lu, error %d",
- page->index, inode->i_ino, err);
- goto error;
}
-out_free:
- kfree(dn);
out:
- SetPageUptodate(page);
- ClearPageError(page);
- flush_dcache_page(page);
- kunmap(page);
- return 0;
-
-error:
kfree(dn);
- ClearPageUptodate(page);
- SetPageError(page);
- flush_dcache_page(page);
- kunmap(page);
+ if (!err)
+ folio_mark_uptodate(folio);
+ flush_dcache_folio(folio);
+ kunmap_local(addr);
return err;
}
@@ -222,16 +218,16 @@ static int write_begin_slow(struct address_space *mapping,
pgoff_t index = pos >> PAGE_SHIFT;
struct ubifs_budget_req req = { .new_page = 1 };
int err, appending = !!(pos + len > inode->i_size);
- struct page *page;
+ struct folio *folio;
dbg_gen("ino %lu, pos %llu, len %u, i_size %lld",
inode->i_ino, pos, len, inode->i_size);
/*
- * At the slow path we have to budget before locking the page, because
- * budgeting may force write-back, which would wait on locked pages and
- * deadlock if we had the page locked. At this point we do not know
- * anything about the page, so assume that this is a new page which is
+ * At the slow path we have to budget before locking the folio, because
+ * budgeting may force write-back, which would wait on locked folios and
+ * deadlock if we had the folio locked. At this point we do not know
+ * anything about the folio, so assume that this is a new folio which is
* written to a hole. This corresponds to largest budget. Later the
* budget will be amended if this is not true.
*/
@@ -243,45 +239,43 @@ static int write_begin_slow(struct address_space *mapping,
if (unlikely(err))
return err;
- page = grab_cache_page_write_begin(mapping, index);
- if (unlikely(!page)) {
+ folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio)) {
ubifs_release_budget(c, &req);
- return -ENOMEM;
+ return PTR_ERR(folio);
}
- if (!PageUptodate(page)) {
- if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE)
- SetPageChecked(page);
+ if (!folio_test_uptodate(folio)) {
+ if (pos == folio_pos(folio) && len >= folio_size(folio))
+ folio_set_checked(folio);
else {
- err = do_readpage(page);
+ err = do_readpage(folio);
if (err) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
ubifs_release_budget(c, &req);
return err;
}
}
-
- SetPageUptodate(page);
- ClearPageError(page);
}
- if (PagePrivate(page))
+ if (folio->private)
/*
- * The page is dirty, which means it was budgeted twice:
+ * The folio is dirty, which means it was budgeted twice:
* o first time the budget was allocated by the task which
- * made the page dirty and set the PG_private flag;
+ * made the folio dirty and set the private field;
* o and then we budgeted for it for the second time at the
* very beginning of this function.
*
- * So what we have to do is to release the page budget we
+ * So what we have to do is to release the folio budget we
* allocated.
*/
release_new_page_budget(c);
- else if (!PageChecked(page))
+ else if (!folio_test_checked(folio))
/*
- * We are changing a page which already exists on the media.
- * This means that changing the page does not make the amount
+ * We are changing a folio which already exists on the media.
+ * This means that changing the folio does not make the amount
* of indexing information larger, and this part of the budget
* which we have already acquired may be released.
*/
@@ -304,14 +298,14 @@ static int write_begin_slow(struct address_space *mapping,
ubifs_release_dirty_inode_budget(c, ui);
}
- *pagep = page;
+ *pagep = &folio->page;
return 0;
}
/**
* allocate_budget - allocate budget for 'ubifs_write_begin()'.
* @c: UBIFS file-system description object
- * @page: page to allocate budget for
+ * @folio: folio to allocate budget for
* @ui: UBIFS inode object the page belongs to
* @appending: non-zero if the page is appended
*
@@ -322,15 +316,15 @@ static int write_begin_slow(struct address_space *mapping,
*
* Returns: %0 in case of success and %-ENOSPC in case of failure.
*/
-static int allocate_budget(struct ubifs_info *c, struct page *page,
+static int allocate_budget(struct ubifs_info *c, struct folio *folio,
struct ubifs_inode *ui, int appending)
{
struct ubifs_budget_req req = { .fast = 1 };
- if (PagePrivate(page)) {
+ if (folio->private) {
if (!appending)
/*
- * The page is dirty and we are not appending, which
+ * The folio is dirty and we are not appending, which
* means no budget is needed at all.
*/
return 0;
@@ -354,11 +348,11 @@ static int allocate_budget(struct ubifs_info *c, struct page *page,
*/
req.dirtied_ino = 1;
} else {
- if (PageChecked(page))
+ if (folio_test_checked(folio))
/*
* The page corresponds to a hole and does not
* exist on the media. So changing it makes
- * make the amount of indexing information
+ * the amount of indexing information
* larger, and we have to budget for a new
* page.
*/
@@ -428,7 +422,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index = pos >> PAGE_SHIFT;
int err, appending = !!(pos + len > inode->i_size);
int skipped_read = 0;
- struct page *page;
+ struct folio *folio;
ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size);
ubifs_assert(c, !c->ro_media && !c->ro_mount);
@@ -437,13 +431,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
return -EROFS;
/* Try out the fast-path part first */
- page = grab_cache_page_write_begin(mapping, index);
- if (unlikely(!page))
- return -ENOMEM;
+ folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
/* The page is not loaded from the flash */
- if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) {
+ if (pos == folio_pos(folio) && len >= folio_size(folio)) {
/*
* We change whole page so no need to load it. But we
* do not know whether this page exists on the media or
@@ -453,32 +448,27 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
* media. Thus, we are setting the @PG_checked flag
* here.
*/
- SetPageChecked(page);
+ folio_set_checked(folio);
skipped_read = 1;
} else {
- err = do_readpage(page);
+ err = do_readpage(folio);
if (err) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return err;
}
}
-
- SetPageUptodate(page);
- ClearPageError(page);
}
- err = allocate_budget(c, page, ui, appending);
+ err = allocate_budget(c, folio, ui, appending);
if (unlikely(err)) {
ubifs_assert(c, err == -ENOSPC);
/*
* If we skipped reading the page because we were going to
* write all of it, then it is not up to date.
*/
- if (skipped_read) {
- ClearPageChecked(page);
- ClearPageUptodate(page);
- }
+ if (skipped_read)
+ folio_clear_checked(folio);
/*
* Budgeting failed which means it would have to force
* write-back but didn't, because we set the @fast flag in the
@@ -490,8 +480,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
mutex_unlock(&ui->ui_mutex);
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return write_begin_slow(mapping, pos, len, pagep);
}
@@ -502,22 +492,21 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
* with @ui->ui_mutex locked if we are appending pages, and unlocked
* otherwise. This is an optimization (slightly hacky though).
*/
- *pagep = page;
+ *pagep = &folio->page;
return 0;
-
}
/**
* cancel_budget - cancel budget.
* @c: UBIFS file-system description object
- * @page: page to cancel budget for
+ * @folio: folio to cancel budget for
* @ui: UBIFS inode object the page belongs to
* @appending: non-zero if the page is appended
*
* This is a helper function for a page write operation. It unlocks the
* @ui->ui_mutex in case of appending.
*/
-static void cancel_budget(struct ubifs_info *c, struct page *page,
+static void cancel_budget(struct ubifs_info *c, struct folio *folio,
struct ubifs_inode *ui, int appending)
{
if (appending) {
@@ -525,8 +514,8 @@ static void cancel_budget(struct ubifs_info *c, struct page *page,
ubifs_release_dirty_inode_budget(c, ui);
mutex_unlock(&ui->ui_mutex);
}
- if (!PagePrivate(page)) {
- if (PageChecked(page))
+ if (!folio->private) {
+ if (folio_test_checked(folio))
release_new_page_budget(c);
else
release_existing_page_budget(c);
@@ -537,6 +526,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
+ struct folio *folio = page_folio(page);
struct inode *inode = mapping->host;
struct ubifs_inode *ui = ubifs_inode(inode);
struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -544,44 +534,47 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
int appending = !!(end_pos > inode->i_size);
dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld",
- inode->i_ino, pos, page->index, len, copied, inode->i_size);
+ inode->i_ino, pos, folio->index, len, copied, inode->i_size);
- if (unlikely(copied < len && len == PAGE_SIZE)) {
+ if (unlikely(copied < len && !folio_test_uptodate(folio))) {
/*
- * VFS copied less data to the page that it intended and
+ * VFS copied less data to the folio than it intended and
* declared in its '->write_begin()' call via the @len
- * argument. If the page was not up-to-date, and @len was
- * @PAGE_SIZE, the 'ubifs_write_begin()' function did
+ * argument. If the folio was not up-to-date,
+ * the 'ubifs_write_begin()' function did
* not load it from the media (for optimization reasons). This
- * means that part of the page contains garbage. So read the
- * page now.
+ * means that part of the folio contains garbage. So read the
+ * folio now.
*/
dbg_gen("copied %d instead of %d, read page and repeat",
copied, len);
- cancel_budget(c, page, ui, appending);
- ClearPageChecked(page);
+ cancel_budget(c, folio, ui, appending);
+ folio_clear_checked(folio);
/*
* Return 0 to force VFS to repeat the whole operation, or the
* error code if 'do_readpage()' fails.
*/
- copied = do_readpage(page);
+ copied = do_readpage(folio);
goto out;
}
- if (!PagePrivate(page)) {
- attach_page_private(page, (void *)1);
+ if (len == folio_size(folio))
+ folio_mark_uptodate(folio);
+
+ if (!folio->private) {
+ folio_attach_private(folio, (void *)1);
atomic_long_inc(&c->dirty_pg_cnt);
- __set_page_dirty_nobuffers(page);
+ filemap_dirty_folio(mapping, folio);
}
if (appending) {
i_size_write(inode, end_pos);
ui->ui_size = end_pos;
/*
- * Note, we do not set @I_DIRTY_PAGES (which means that the
- * inode has dirty pages), this has been done in
- * '__set_page_dirty_nobuffers()'.
+ * We do not set @I_DIRTY_PAGES (which means that
+ * the inode has dirty pages), this was done in
+ * filemap_dirty_folio().
*/
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
@@ -589,43 +582,43 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
}
out:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return copied;
}
/**
* populate_page - copy data nodes into a page for bulk-read.
* @c: UBIFS file-system description object
- * @page: page
+ * @folio: folio
* @bu: bulk-read information
* @n: next zbranch slot
*
* Returns: %0 on success and a negative error code on failure.
*/
-static int populate_page(struct ubifs_info *c, struct page *page,
+static int populate_page(struct ubifs_info *c, struct folio *folio,
struct bu_info *bu, int *n)
{
int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
loff_t i_size = i_size_read(inode);
unsigned int page_block;
void *addr, *zaddr;
pgoff_t end_index;
dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
- inode->i_ino, page->index, i_size, page->flags);
+ inode->i_ino, folio->index, i_size, folio->flags);
- addr = zaddr = kmap(page);
+ addr = zaddr = kmap_local_folio(folio, 0);
end_index = (i_size - 1) >> PAGE_SHIFT;
- if (!i_size || page->index > end_index) {
+ if (!i_size || folio->index > end_index) {
hole = 1;
- memset(addr, 0, PAGE_SIZE);
+ addr = folio_zero_tail(folio, 0, addr);
goto out_hole;
}
- page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+ page_block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
while (1) {
int err, len, out_len, dlen;
@@ -674,9 +667,13 @@ static int populate_page(struct ubifs_info *c, struct page *page,
break;
addr += UBIFS_BLOCK_SIZE;
page_block += 1;
+ if (folio_test_highmem(folio) && (offset_in_page(addr) == 0)) {
+ kunmap_local(addr - UBIFS_BLOCK_SIZE);
+ addr = kmap_local_folio(folio, i * UBIFS_BLOCK_SIZE);
+ }
}
- if (end_index == page->index) {
+ if (end_index == folio->index) {
int len = i_size & (PAGE_SIZE - 1);
if (len && len < read)
@@ -685,22 +682,19 @@ static int populate_page(struct ubifs_info *c, struct page *page,
out_hole:
if (hole) {
- SetPageChecked(page);
+ folio_set_checked(folio);
dbg_gen("hole");
}
- SetPageUptodate(page);
- ClearPageError(page);
- flush_dcache_page(page);
- kunmap(page);
+ folio_mark_uptodate(folio);
+ flush_dcache_folio(folio);
+ kunmap_local(addr);
*n = nn;
return 0;
out_err:
- ClearPageUptodate(page);
- SetPageError(page);
- flush_dcache_page(page);
- kunmap(page);
+ flush_dcache_folio(folio);
+ kunmap_local(addr);
ubifs_err(c, "bad data node (block %u, inode %lu)",
page_block, inode->i_ino);
return -EINVAL;
@@ -710,15 +704,15 @@ out_err:
* ubifs_do_bulk_read - do bulk-read.
* @c: UBIFS file-system description object
* @bu: bulk-read information
- * @page1: first page to read
+ * @folio1: first folio to read
*
* Returns: %1 if the bulk-read is done, otherwise %0 is returned.
*/
static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
- struct page *page1)
+ struct folio *folio1)
{
- pgoff_t offset = page1->index, end_index;
- struct address_space *mapping = page1->mapping;
+ pgoff_t offset = folio1->index, end_index;
+ struct address_space *mapping = folio1->mapping;
struct inode *inode = mapping->host;
struct ubifs_inode *ui = ubifs_inode(inode);
int err, page_idx, page_cnt, ret = 0, n = 0;
@@ -768,11 +762,11 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
goto out_warn;
}
- err = populate_page(c, page1, bu, &n);
+ err = populate_page(c, folio1, bu, &n);
if (err)
goto out_warn;
- unlock_page(page1);
+ folio_unlock(folio1);
ret = 1;
isize = i_size_read(inode);
@@ -782,19 +776,19 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
for (page_idx = 1; page_idx < page_cnt; page_idx++) {
pgoff_t page_offset = offset + page_idx;
- struct page *page;
+ struct folio *folio;
if (page_offset > end_index)
break;
- page = pagecache_get_page(mapping, page_offset,
+ folio = __filemap_get_folio(mapping, page_offset,
FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT,
ra_gfp_mask);
- if (!page)
+ if (IS_ERR(folio))
break;
- if (!PageUptodate(page))
- err = populate_page(c, page, bu, &n);
- unlock_page(page);
- put_page(page);
+ if (!folio_test_uptodate(folio))
+ err = populate_page(c, folio, bu, &n);
+ folio_unlock(folio);
+ folio_put(folio);
if (err)
break;
}
@@ -817,7 +811,7 @@ out_bu_off:
/**
* ubifs_bulk_read - determine whether to bulk-read and, if so, do it.
- * @page: page from which to start bulk-read.
+ * @folio: folio from which to start bulk-read.
*
* Some flash media are capable of reading sequentially at faster rates. UBIFS
* bulk-read facility is designed to take advantage of that, by reading in one
@@ -826,12 +820,12 @@ out_bu_off:
*
* Returns: %1 if a bulk-read is done and %0 otherwise.
*/
-static int ubifs_bulk_read(struct page *page)
+static int ubifs_bulk_read(struct folio *folio)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
- pgoff_t index = page->index, last_page_read = ui->last_page_read;
+ pgoff_t index = folio->index, last_page_read = ui->last_page_read;
struct bu_info *bu;
int err = 0, allocated = 0;
@@ -879,8 +873,8 @@ static int ubifs_bulk_read(struct page *page)
bu->buf_len = c->max_bu_buf_len;
data_key_init(c, &bu->key, inode->i_ino,
- page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
- err = ubifs_do_bulk_read(c, bu, page);
+ folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
+ err = ubifs_do_bulk_read(c, bu, folio);
if (!allocated)
mutex_unlock(&c->bu_mutex);
@@ -894,69 +888,71 @@ out_unlock:
static int ubifs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
-
- if (ubifs_bulk_read(page))
+ if (ubifs_bulk_read(folio))
return 0;
- do_readpage(page);
+ do_readpage(folio);
folio_unlock(folio);
return 0;
}
-static int do_writepage(struct page *page, int len)
+static int do_writepage(struct folio *folio, size_t len)
{
- int err = 0, i, blen;
+ int err = 0, blen;
unsigned int block;
void *addr;
+ size_t offset = 0;
union ubifs_key key;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
#ifdef UBIFS_DEBUG
struct ubifs_inode *ui = ubifs_inode(inode);
spin_lock(&ui->ui_lock);
- ubifs_assert(c, page->index <= ui->synced_i_size >> PAGE_SHIFT);
+ ubifs_assert(c, folio->index <= ui->synced_i_size >> PAGE_SHIFT);
spin_unlock(&ui->ui_lock);
#endif
- /* Update radix tree tags */
- set_page_writeback(page);
+ folio_start_writeback(folio);
- addr = kmap(page);
- block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
- i = 0;
- while (len) {
- blen = min_t(int, len, UBIFS_BLOCK_SIZE);
+ addr = kmap_local_folio(folio, offset);
+ block = folio->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+ for (;;) {
+ blen = min_t(size_t, len, UBIFS_BLOCK_SIZE);
data_key_init(c, &key, inode->i_ino, block);
err = ubifs_jnl_write_data(c, inode, &key, addr, blen);
if (err)
break;
- if (++i >= UBIFS_BLOCKS_PER_PAGE)
+ len -= blen;
+ if (!len)
break;
block += 1;
addr += blen;
- len -= blen;
+ if (folio_test_highmem(folio) && !offset_in_page(addr)) {
+ kunmap_local(addr - blen);
+ offset += PAGE_SIZE;
+ addr = kmap_local_folio(folio, offset);
+ }
}
+ kunmap_local(addr);
if (err) {
- SetPageError(page);
- ubifs_err(c, "cannot write page %lu of inode %lu, error %d",
- page->index, inode->i_ino, err);
+ mapping_set_error(folio->mapping, err);
+ ubifs_err(c, "cannot write folio %lu of inode %lu, error %d",
+ folio->index, inode->i_ino, err);
ubifs_ro_mode(c, err);
}
- ubifs_assert(c, PagePrivate(page));
- if (PageChecked(page))
+ ubifs_assert(c, folio->private != NULL);
+ if (folio_test_checked(folio))
release_new_page_budget(c);
else
release_existing_page_budget(c);
atomic_long_dec(&c->dirty_pg_cnt);
- detach_page_private(page);
- ClearPageChecked(page);
+ folio_detach_private(folio);
+ folio_clear_checked(folio);
- kunmap(page);
- unlock_page(page);
- end_page_writeback(page);
+ folio_unlock(folio);
+ folio_end_writeback(folio);
return err;
}
@@ -1006,22 +1002,21 @@ static int do_writepage(struct page *page, int len)
* on the page lock and it would not write the truncated inode node to the
* journal before we have finished.
*/
-static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
+static int ubifs_writepage(struct folio *folio, struct writeback_control *wbc,
+ void *data)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
loff_t i_size = i_size_read(inode), synced_i_size;
- pgoff_t end_index = i_size >> PAGE_SHIFT;
- int err, len = i_size & (PAGE_SIZE - 1);
- void *kaddr;
+ int err, len = folio_size(folio);
dbg_gen("ino %lu, pg %lu, pg flags %#lx",
- inode->i_ino, page->index, page->flags);
- ubifs_assert(c, PagePrivate(page));
+ inode->i_ino, folio->index, folio->flags);
+ ubifs_assert(c, folio->private != NULL);
- /* Is the page fully outside @i_size? (truncate in progress) */
- if (page->index > end_index || (page->index == end_index && !len)) {
+ /* Is the folio fully outside @i_size? (truncate in progress) */
+ if (folio_pos(folio) >= i_size) {
err = 0;
goto out_unlock;
}
@@ -1030,9 +1025,9 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
synced_i_size = ui->synced_i_size;
spin_unlock(&ui->ui_lock);
- /* Is the page fully inside @i_size? */
- if (page->index < end_index) {
- if (page->index >= synced_i_size >> PAGE_SHIFT) {
+ /* Is the folio fully inside i_size? */
+ if (folio_pos(folio) + len <= i_size) {
+ if (folio_pos(folio) >= synced_i_size) {
err = inode->i_sb->s_op->write_inode(inode, NULL);
if (err)
goto out_redirty;
@@ -1045,20 +1040,18 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
* with this.
*/
}
- return do_writepage(page, PAGE_SIZE);
+ return do_writepage(folio, len);
}
/*
- * The page straddles @i_size. It must be zeroed out on each and every
+ * The folio straddles @i_size. It must be zeroed out on each and every
* writepage invocation because it may be mmapped. "A file is mapped
* in multiples of the page size. For a file that is not a multiple of
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- kaddr = kmap_atomic(page);
- memset(kaddr + len, 0, PAGE_SIZE - len);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
+ len = i_size - folio_pos(folio);
+ folio_zero_segment(folio, len, folio_size(folio));
if (i_size > synced_i_size) {
err = inode->i_sb->s_op->write_inode(inode, NULL);
@@ -1066,19 +1059,25 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
goto out_redirty;
}
- return do_writepage(page, len);
+ return do_writepage(folio, len);
out_redirty:
/*
- * redirty_page_for_writepage() won't call ubifs_dirty_inode() because
+ * folio_redirty_for_writepage() won't call ubifs_dirty_inode() because
* it passes I_DIRTY_PAGES flag while calling __mark_inode_dirty(), so
* there is no need to do space budget for dirty inode.
*/
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
out_unlock:
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
+static int ubifs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ return write_cache_pages(mapping, wbc, ubifs_writepage, NULL);
+}
+
/**
* do_attr_changes - change inode attributes.
* @inode: inode to change attributes for
@@ -1155,11 +1154,11 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
if (offset) {
pgoff_t index = new_size >> PAGE_SHIFT;
- struct page *page;
+ struct folio *folio;
- page = find_lock_page(inode->i_mapping, index);
- if (page) {
- if (PageDirty(page)) {
+ folio = filemap_lock_folio(inode->i_mapping, index);
+ if (!IS_ERR(folio)) {
+ if (folio_test_dirty(folio)) {
/*
* 'ubifs_jnl_truncate()' will try to truncate
* the last data node, but it contains
@@ -1168,14 +1167,14 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
* 'ubifs_jnl_truncate()' will see an already
* truncated (and up to date) data node.
*/
- ubifs_assert(c, PagePrivate(page));
+ ubifs_assert(c, folio->private != NULL);
- clear_page_dirty_for_io(page);
+ folio_clear_dirty_for_io(folio);
if (UBIFS_BLOCKS_PER_PAGE_SHIFT)
- offset = new_size &
- (PAGE_SIZE - 1);
- err = do_writepage(page, offset);
- put_page(page);
+ offset = offset_in_folio(folio,
+ new_size);
+ err = do_writepage(folio, offset);
+ folio_put(folio);
if (err)
goto out_budg;
/*
@@ -1188,8 +1187,8 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
* to 'ubifs_jnl_truncate()' to save it from
* having to read it.
*/
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
}
}
}
@@ -1512,14 +1511,14 @@ static bool ubifs_release_folio(struct folio *folio, gfp_t unused_gfp_flags)
*/
static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
{
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vmf->vma->vm_file);
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct timespec64 now = current_time(inode);
struct ubifs_budget_req req = { .new_page = 1 };
int err, update_time;
- dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index,
+ dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, folio->index,
i_size_read(inode));
ubifs_assert(c, !c->ro_media && !c->ro_mount);
@@ -1527,17 +1526,17 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
return VM_FAULT_SIGBUS; /* -EROFS */
/*
- * We have not locked @page so far so we may budget for changing the
- * page. Note, we cannot do this after we locked the page, because
+ * We have not locked @folio so far so we may budget for changing the
+ * folio. Note, we cannot do this after we locked the folio, because
* budgeting may cause write-back which would cause deadlock.
*
- * At the moment we do not know whether the page is dirty or not, so we
- * assume that it is not and budget for a new page. We could look at
+ * At the moment we do not know whether the folio is dirty or not, so we
+ * assume that it is not and budget for a new folio. We could look at
* the @PG_private flag and figure this out, but we may race with write
- * back and the page state may change by the time we lock it, so this
+ * back and the folio state may change by the time we lock it, so this
* would need additional care. We do not bother with this at the
* moment, although it might be good idea to do. Instead, we allocate
- * budget for a new page and amend it later on if the page was in fact
+ * budget for a new folio and amend it later on if the folio was in fact
* dirty.
*
* The budgeting-related logic of this function is similar to what we
@@ -1560,21 +1559,21 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
- lock_page(page);
- if (unlikely(page->mapping != inode->i_mapping ||
- page_offset(page) > i_size_read(inode))) {
- /* Page got truncated out from underneath us */
+ folio_lock(folio);
+ if (unlikely(folio->mapping != inode->i_mapping ||
+ folio_pos(folio) >= i_size_read(inode))) {
+ /* Folio got truncated out from underneath us */
goto sigbus;
}
- if (PagePrivate(page))
+ if (folio->private)
release_new_page_budget(c);
else {
- if (!PageChecked(page))
+ if (!folio_test_checked(folio))
ubifs_convert_page_budget(c);
- attach_page_private(page, (void *)1);
+ folio_attach_private(folio, (void *)1);
atomic_long_inc(&c->dirty_pg_cnt);
- __set_page_dirty_nobuffers(page);
+ filemap_dirty_folio(folio->mapping, folio);
}
if (update_time) {
@@ -1590,11 +1589,11 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
ubifs_release_dirty_inode_budget(c, ui);
}
- wait_for_stable_page(page);
+ folio_wait_stable(folio);
return VM_FAULT_LOCKED;
sigbus:
- unlock_page(page);
+ folio_unlock(folio);
ubifs_release_budget(c, &req);
return VM_FAULT_SIGBUS;
}
@@ -1648,7 +1647,7 @@ static int ubifs_symlink_getattr(struct mnt_idmap *idmap,
const struct address_space_operations ubifs_file_address_operations = {
.read_folio = ubifs_read_folio,
- .writepage = ubifs_writepage,
+ .writepages = ubifs_writepages,
.write_begin = ubifs_write_begin,
.write_end = ubifs_write_end,
.invalidate_folio = ubifs_invalidate_folio,
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 873e6e1c92b5..6ebf3c04ac5f 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -82,8 +82,9 @@ static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops)
*/
static int scan_for_dirty_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
- struct scan_data *data)
+ void *arg)
{
+ struct scan_data *data = arg;
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
@@ -166,8 +167,7 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
data.pick_free = pick_free;
data.lnum = -1;
data.exclude_index = exclude_index;
- err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
- (ubifs_lpt_scan_callback)scan_for_dirty_cb,
+ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_for_dirty_cb,
&data);
if (err)
return ERR_PTR(err);
@@ -349,8 +349,9 @@ out:
*/
static int scan_for_free_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
- struct scan_data *data)
+ void *arg)
{
+ struct scan_data *data = arg;
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
@@ -446,7 +447,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
data.pick_free = pick_free;
data.lnum = -1;
err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
- (ubifs_lpt_scan_callback)scan_for_free_cb,
+ scan_for_free_cb,
&data);
if (err)
return ERR_PTR(err);
@@ -589,8 +590,9 @@ out:
*/
static int scan_for_idx_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
- struct scan_data *data)
+ void *arg)
{
+ struct scan_data *data = arg;
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
@@ -625,8 +627,7 @@ static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c)
int err;
data.lnum = -1;
- err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
- (ubifs_lpt_scan_callback)scan_for_idx_cb,
+ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_for_idx_cb,
&data);
if (err)
return ERR_PTR(err);
@@ -726,11 +727,10 @@ out:
return err;
}
-static int cmp_dirty_idx(const struct ubifs_lprops **a,
- const struct ubifs_lprops **b)
+static int cmp_dirty_idx(const void *a, const void *b)
{
- const struct ubifs_lprops *lpa = *a;
- const struct ubifs_lprops *lpb = *b;
+ const struct ubifs_lprops *lpa = *(const struct ubifs_lprops **)a;
+ const struct ubifs_lprops *lpb = *(const struct ubifs_lprops **)b;
return lpa->dirty + lpa->free - lpb->dirty - lpb->free;
}
@@ -754,7 +754,7 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c)
sizeof(void *) * c->dirty_idx.cnt);
/* Sort it so that the dirtiest is now at the end */
sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *),
- (int (*)(const void *, const void *))cmp_dirty_idx, NULL);
+ cmp_dirty_idx, NULL);
dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt);
if (c->dirty_idx.cnt)
dbg_find("dirtiest index LEB is %d with dirty %d and free %d",
@@ -782,8 +782,9 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c)
*/
static int scan_dirty_idx_cb(struct ubifs_info *c,
const struct ubifs_lprops *lprops, int in_tree,
- struct scan_data *data)
+ void *arg)
{
+ struct scan_data *data = arg;
int ret = LPT_SCAN_CONTINUE;
/* Exclude LEBs that are currently in use */
@@ -842,8 +843,7 @@ static int find_dirty_idx_leb(struct ubifs_info *c)
if (c->pnodes_have >= c->pnode_cnt)
/* All pnodes are in memory, so skip scan */
return -ENOSPC;
- err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
- (ubifs_lpt_scan_callback)scan_dirty_idx_cb,
+ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, scan_dirty_idx_cb,
&data);
if (err)
return err;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index f0a5538c84b0..74aee92433d7 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -293,6 +293,96 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
}
/**
+ * __queue_and_wait - queue a task and wait until the task is waked up.
+ * @c: UBIFS file-system description object
+ *
+ * This function adds current task in queue and waits until the task is waked
+ * up. This function should be called with @c->reserve_space_wq locked.
+ */
+static void __queue_and_wait(struct ubifs_info *c)
+{
+ DEFINE_WAIT(wait);
+
+ __add_wait_queue_entry_tail_exclusive(&c->reserve_space_wq, &wait);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(&c->reserve_space_wq.lock);
+
+ schedule();
+ finish_wait(&c->reserve_space_wq, &wait);
+}
+
+/**
+ * wait_for_reservation - try queuing current task to wait until waked up.
+ * @c: UBIFS file-system description object
+ *
+ * This function queues current task to wait until waked up, if queuing is
+ * started(@c->need_wait_space is not %0). Returns %true if current task is
+ * added in queue, otherwise %false is returned.
+ */
+static bool wait_for_reservation(struct ubifs_info *c)
+{
+ if (likely(atomic_read(&c->need_wait_space) == 0))
+ /* Quick path to check whether queuing is started. */
+ return false;
+
+ spin_lock(&c->reserve_space_wq.lock);
+ if (atomic_read(&c->need_wait_space) == 0) {
+ /* Queuing is not started, don't queue current task. */
+ spin_unlock(&c->reserve_space_wq.lock);
+ return false;
+ }
+
+ __queue_and_wait(c);
+ return true;
+}
+
+/**
+ * wake_up_reservation - wake up first task in queue or stop queuing.
+ * @c: UBIFS file-system description object
+ *
+ * This function wakes up the first task in queue if it exists, or stops
+ * queuing if no tasks in queue.
+ */
+static void wake_up_reservation(struct ubifs_info *c)
+{
+ spin_lock(&c->reserve_space_wq.lock);
+ if (waitqueue_active(&c->reserve_space_wq))
+ wake_up_locked(&c->reserve_space_wq);
+ else
+ /*
+ * Compared with wait_for_reservation(), set @c->need_wait_space
+ * under the protection of wait queue lock, which can avoid that
+ * @c->need_wait_space is set to 0 after new task queued.
+ */
+ atomic_set(&c->need_wait_space, 0);
+ spin_unlock(&c->reserve_space_wq.lock);
+}
+
+/**
+ * wake_up_reservation - add current task in queue or start queuing.
+ * @c: UBIFS file-system description object
+ *
+ * This function starts queuing if queuing is not started, otherwise adds
+ * current task in queue.
+ */
+static void add_or_start_queue(struct ubifs_info *c)
+{
+ spin_lock(&c->reserve_space_wq.lock);
+ if (atomic_cmpxchg(&c->need_wait_space, 0, 1) == 0) {
+ /* Starts queuing, task can go on directly. */
+ spin_unlock(&c->reserve_space_wq.lock);
+ return;
+ }
+
+ /*
+ * There are at least two tasks have retried more than 32 times
+ * at certain point, first task has started queuing, just queue
+ * the left tasks.
+ */
+ __queue_and_wait(c);
+}
+
+/**
* make_reservation - reserve journal space.
* @c: UBIFS file-system description object
* @jhead: journal head
@@ -311,33 +401,27 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
static int make_reservation(struct ubifs_info *c, int jhead, int len)
{
int err, cmt_retries = 0, nospc_retries = 0;
+ bool blocked = wait_for_reservation(c);
again:
down_read(&c->commit_sem);
err = reserve_space(c, jhead, len);
- if (!err)
+ if (!err) {
/* c->commit_sem will get released via finish_reservation(). */
- return 0;
+ goto out_wake_up;
+ }
up_read(&c->commit_sem);
if (err == -ENOSPC) {
/*
* GC could not make any progress. We should try to commit
- * once because it could make some dirty space and GC would
- * make progress, so make the error -EAGAIN so that the below
+ * because it could make some dirty space and GC would make
+ * progress, so make the error -EAGAIN so that the below
* will commit and re-try.
*/
- if (nospc_retries++ < 2) {
- dbg_jnl("no space, retry");
- err = -EAGAIN;
- }
-
- /*
- * This means that the budgeting is incorrect. We always have
- * to be able to write to the media, because all operations are
- * budgeted. Deletions are not budgeted, though, but we reserve
- * an extra LEB for them.
- */
+ nospc_retries++;
+ dbg_jnl("no space, retry");
+ err = -EAGAIN;
}
if (err != -EAGAIN)
@@ -349,15 +433,37 @@ again:
*/
if (cmt_retries > 128) {
/*
- * This should not happen unless the journal size limitations
- * are too tough.
+ * This should not happen unless:
+ * 1. The journal size limitations are too tough.
+ * 2. The budgeting is incorrect. We always have to be able to
+ * write to the media, because all operations are budgeted.
+ * Deletions are not budgeted, though, but we reserve an
+ * extra LEB for them.
*/
- ubifs_err(c, "stuck in space allocation");
+ ubifs_err(c, "stuck in space allocation, nospc_retries %d",
+ nospc_retries);
err = -ENOSPC;
goto out;
- } else if (cmt_retries > 32)
- ubifs_warn(c, "too many space allocation re-tries (%d)",
- cmt_retries);
+ } else if (cmt_retries > 32) {
+ /*
+ * It's almost impossible to happen, unless there are many tasks
+ * making reservation concurrently and someone task has retried
+ * gc + commit for many times, generated available space during
+ * this period are grabbed by other tasks.
+ * But if it happens, start queuing up all tasks that will make
+ * space reservation, then there is only one task making space
+ * reservation at any time, and it can always make success under
+ * the premise of correct budgeting.
+ */
+ ubifs_warn(c, "too many space allocation cmt_retries (%d) "
+ "nospc_retries (%d), start queuing tasks",
+ cmt_retries, nospc_retries);
+
+ if (!blocked) {
+ blocked = true;
+ add_or_start_queue(c);
+ }
+ }
dbg_jnl("-EAGAIN, commit and retry (retried %d times)",
cmt_retries);
@@ -365,7 +471,7 @@ again:
err = ubifs_run_commit(c);
if (err)
- return err;
+ goto out_wake_up;
goto again;
out:
@@ -380,6 +486,27 @@ out:
cmt_retries = dbg_check_lprops(c);
up_write(&c->commit_sem);
}
+out_wake_up:
+ if (blocked) {
+ /*
+ * Only tasks that have ever started queuing or ever been queued
+ * can wake up other queued tasks, which can make sure that
+ * there is only one task waked up to make space reservation.
+ * For example:
+ * task A task B task C
+ * make_reservation make_reservation
+ * reserve_space // 0
+ * wake_up_reservation
+ * atomic_cmpxchg // 0, start queuing
+ * reserve_space
+ * wait_for_reservation
+ * __queue_and_wait
+ * add_wait_queue
+ * if (blocked) // false
+ * // So that task C won't be waked up to race with task B
+ */
+ wake_up_reservation(c);
+ }
return err;
}
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 6d6cd85c2b4c..a11c3dab7e16 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1014,8 +1014,9 @@ out:
*/
static int scan_check_cb(struct ubifs_info *c,
const struct ubifs_lprops *lp, int in_tree,
- struct ubifs_lp_stats *lst)
+ void *arg)
{
+ struct ubifs_lp_stats *lst = arg;
struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod;
int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
@@ -1269,8 +1270,7 @@ int dbg_check_lprops(struct ubifs_info *c)
memset(&lst, 0, sizeof(struct ubifs_lp_stats));
err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
- (ubifs_lpt_scan_callback)scan_check_cb,
- &lst);
+ scan_check_cb, &lst);
if (err && err != -ENOSPC)
goto out;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index c4d079328b92..07351fdce722 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1646,7 +1646,6 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
len -= node_len;
}
- err = 0;
out:
vfree(buf);
return err;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 7f4031a15f4d..291583005dd1 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2151,6 +2151,8 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
mutex_init(&c->bu_mutex);
mutex_init(&c->write_reserve_mutex);
init_waitqueue_head(&c->cmt_wq);
+ init_waitqueue_head(&c->reserve_space_wq);
+ atomic_set(&c->need_wait_space, 0);
c->buds = RB_ROOT;
c->old_idx = RB_ROOT;
c->size_tree = RB_ROOT;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index f4728e65d1bd..45cacdcd4746 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -3116,14 +3116,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
void ubifs_tnc_close(struct ubifs_info *c)
{
tnc_destroy_cnext(c);
- if (c->zroot.znode) {
- long n, freed;
-
- n = atomic_long_read(&c->clean_zn_cnt);
- freed = ubifs_destroy_tnc_subtree(c, c->zroot.znode);
- ubifs_assert(c, freed == n);
- atomic_long_sub(n, &ubifs_clean_zn_cnt);
- }
+ ubifs_destroy_tnc_tree(c);
kfree(c->gap_lebs);
kfree(c->ilebs);
destroy_old_idx(c);
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index 4d686e34e64d..d3f8a6aa1f49 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -251,6 +251,28 @@ long ubifs_destroy_tnc_subtree(const struct ubifs_info *c,
}
/**
+ * ubifs_destroy_tnc_tree - destroy all znodes connected to the TNC tree.
+ * @c: UBIFS file-system description object
+ *
+ * This function destroys the whole TNC tree and updates clean global znode
+ * count.
+ */
+void ubifs_destroy_tnc_tree(struct ubifs_info *c)
+{
+ long n, freed;
+
+ if (!c->zroot.znode)
+ return;
+
+ n = atomic_long_read(&c->clean_zn_cnt);
+ freed = ubifs_destroy_tnc_subtree(c, c->zroot.znode);
+ ubifs_assert(c, freed == n);
+ atomic_long_sub(n, &ubifs_clean_zn_cnt);
+
+ c->zroot.znode = NULL;
+}
+
+/**
* read_znode - read an indexing node from flash and fill znode.
* @c: UBIFS file-system description object
* @zzbr: the zbranch describing the node to read
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 3916dc4f30ca..1f3ea879d93a 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1047,6 +1047,8 @@ struct ubifs_debug_info;
* @bg_bud_bytes: number of bud bytes when background commit is initiated
* @old_buds: buds to be released after commit ends
* @max_bud_cnt: maximum number of buds
+ * @need_wait_space: Non %0 means space reservation tasks need to wait in queue
+ * @reserve_space_wq: wait queue to sleep on if @need_wait_space is not %0
*
* @commit_sem: synchronizes committer with other processes
* @cmt_state: commit state
@@ -1305,6 +1307,8 @@ struct ubifs_info {
long long bg_bud_bytes;
struct list_head old_buds;
int max_bud_cnt;
+ atomic_t need_wait_space;
+ wait_queue_head_t reserve_space_wq;
struct rw_semaphore commit_sem;
int cmt_state;
@@ -1903,6 +1907,7 @@ struct ubifs_znode *ubifs_tnc_postorder_next(const struct ubifs_info *c,
struct ubifs_znode *znode);
long ubifs_destroy_tnc_subtree(const struct ubifs_info *c,
struct ubifs_znode *zr);
+void ubifs_destroy_tnc_tree(struct ubifs_info *c);
struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
struct ubifs_zbranch *zbr,
struct ubifs_znode *parent, int iip);
diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c
index 8ad38c64708e..9bb2d24de709 100644
--- a/fs/xfs/xfs_buf_mem.c
+++ b/fs/xfs/xfs_buf_mem.c
@@ -81,8 +81,6 @@ xmbuf_alloc(
/* ensure all writes are below EOF to avoid pagecache zeroing */
i_size_write(inode, inode->i_sb->s_maxbytes);
- trace_xmbuf_create(btp);
-
error = xfs_buf_cache_init(btp->bt_cache);
if (error)
goto out_file;
@@ -99,6 +97,8 @@ xmbuf_alloc(
if (error)
goto out_bcache;
+ trace_xmbuf_create(btp);
+
*btpp = btp;
return 0;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 30d36596a2e4..c98cb468c357 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -811,6 +811,12 @@ restart:
* caller should throw away the dquot and start over. Otherwise, the dquot
* is returned locked (and held by the cache) as if there had been a cache
* hit.
+ *
+ * The insert needs to be done under memalloc_nofs context because the radix
+ * tree can do memory allocation during insert. The qi->qi_tree_lock is taken in
+ * memory reclaim when freeing unused dquots, so we cannot have the radix tree
+ * node allocation recursing into filesystem reclaim whilst we hold the
+ * qi_tree_lock.
*/
static int
xfs_qm_dqget_cache_insert(
@@ -820,25 +826,27 @@ xfs_qm_dqget_cache_insert(
xfs_dqid_t id,
struct xfs_dquot *dqp)
{
+ unsigned int nofs_flags;
int error;
+ nofs_flags = memalloc_nofs_save();
mutex_lock(&qi->qi_tree_lock);
error = radix_tree_insert(tree, id, dqp);
if (unlikely(error)) {
/* Duplicate found! Caller must try again. */
- mutex_unlock(&qi->qi_tree_lock);
trace_xfs_dqget_dup(dqp);
- return error;
+ goto out_unlock;
}
/* Return a locked dquot to the caller, with a reference taken. */
xfs_dqlock(dqp);
dqp->q_nrefs = 1;
-
qi->qi_dquots++;
- mutex_unlock(&qi->qi_tree_lock);
- return 0;
+out_unlock:
+ mutex_unlock(&qi->qi_tree_lock);
+ memalloc_nofs_restore(nofs_flags);
+ return error;
}
/* Check our input parameters. */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 56b07d8ed431..aea97fc074f8 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -4626,6 +4626,7 @@ TRACE_EVENT(xmbuf_create,
char *path;
struct file *file = btp->bt_file;
+ __entry->dev = btp->bt_mount->m_super->s_dev;
__entry->ino = file_inode(file)->i_ino;
memset(pathname, 0, sizeof(pathname));
path = file_path(file, pathname, sizeof(pathname) - 1);
@@ -4633,7 +4634,8 @@ TRACE_EVENT(xmbuf_create,
path = "(unknown)";
strncpy(__entry->pathname, path, sizeof(__entry->pathname));
),
- TP_printk("xmino 0x%lx path '%s'",
+ TP_printk("dev %d:%d xmino 0x%lx path '%s'",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->pathname)
);
@@ -4642,6 +4644,7 @@ TRACE_EVENT(xmbuf_free,
TP_PROTO(struct xfs_buftarg *btp),
TP_ARGS(btp),
TP_STRUCT__entry(
+ __field(dev_t, dev)
__field(unsigned long, ino)
__field(unsigned long long, bytes)
__field(loff_t, size)
@@ -4650,11 +4653,13 @@ TRACE_EVENT(xmbuf_free,
struct file *file = btp->bt_file;
struct inode *inode = file_inode(file);
+ __entry->dev = btp->bt_mount->m_super->s_dev;
__entry->size = i_size_read(inode);
__entry->bytes = (inode->i_blocks << SECTOR_SHIFT) + inode->i_bytes;
__entry->ino = inode->i_ino;
),
- TP_printk("xmino 0x%lx mem_bytes 0x%llx isize 0x%llx",
+ TP_printk("dev %d:%d xmino 0x%lx mem_bytes 0x%llx isize 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->bytes,
__entry->size)