summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/file.c9
-rw-r--r--fs/afs/cell.c11
-rw-r--r--fs/afs/dynroot.c15
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/proc.c4
-rw-r--r--fs/bcachefs/btree_io.c2
-rw-r--r--fs/bcachefs/btree_update.h8
-rw-r--r--fs/bcachefs/btree_write_buffer.c21
-rw-r--r--fs/bcachefs/extents.c2
-rw-r--r--fs/bcachefs/inode.c1
-rw-r--r--fs/bcachefs/io_read.c19
-rw-r--r--fs/bcachefs/journal.c59
-rw-r--r--fs/bcachefs/movinggc.c25
-rw-r--r--fs/bcachefs/super-io.c24
-rw-r--r--fs/bcachefs/super-io.h11
-rw-r--r--fs/bcachefs/super.c11
-rw-r--r--fs/bcachefs/util.c24
-rw-r--r--fs/bcachefs/util.h2
-rw-r--r--fs/btrfs/inode.c9
-rw-r--r--fs/btrfs/sysfs.c4
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/coredump.c15
-rw-r--r--fs/exfat/balloc.c10
-rw-r--r--fs/exfat/exfat_fs.h2
-rw-r--r--fs/exfat/fatent.c11
-rw-r--r--fs/exfat/file.c2
-rw-r--r--fs/exfat/namei.c7
-rw-r--r--fs/ext4/file.c3
-rw-r--r--fs/fuse/dev.c15
-rw-r--r--fs/nfs/file.c3
-rw-r--r--fs/pipe.c32
-rw-r--r--fs/smb/client/cifsacl.c34
-rw-r--r--fs/smb/client/connect.c16
-rw-r--r--fs/smb/client/fs_context.c18
-rw-r--r--fs/smb/common/smbacl.h3
-rw-r--r--fs/smb/server/connection.c20
-rw-r--r--fs/smb/server/connection.h2
-rw-r--r--fs/smb/server/ksmbd_work.c3
-rw-r--r--fs/smb/server/ksmbd_work.h1
-rw-r--r--fs/smb/server/oplock.c43
-rw-r--r--fs/smb/server/oplock.h1
-rw-r--r--fs/smb/server/server.c14
-rw-r--r--fs/smb/server/smb2pdu.c8
-rw-r--r--fs/smb/server/smbacl.c52
-rw-r--r--fs/smb/server/smbacl.h2
-rw-r--r--fs/smb/server/transport_ipc.c1
-rw-r--r--fs/splice.c20
-rw-r--r--fs/vboxsf/super.c3
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c8
-rw-r--r--fs/xfs/xfs_buf.c182
-rw-r--r--fs/xfs/xfs_buf.h7
-rw-r--r--fs/xfs/xfs_buf_mem.c2
-rw-r--r--fs/xfs/xfs_file.c13
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_mount.c7
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_trace.h1
57 files changed, 441 insertions, 388 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c
index a5a861dd5223..7a71018e3f67 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -596,7 +596,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
BUG_ON(tmp > bsize);
AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino);
- AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx);
+ AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1);
AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp);
affs_fix_checksum(sb, bh);
bh->b_state &= ~(1UL << BH_New);
@@ -724,7 +724,8 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
tmp = min(bsize - boff, to - from);
BUG_ON(boff + tmp > bsize || tmp > bsize);
memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
- be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
+ AFFS_DATA_HEAD(bh)->size = cpu_to_be32(
+ max(boff + tmp, be32_to_cpu(AFFS_DATA_HEAD(bh)->size)));
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
written += tmp;
@@ -746,7 +747,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
if (buffer_new(bh)) {
AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino);
- AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx);
+ AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1);
AFFS_DATA_HEAD(bh)->size = cpu_to_be32(bsize);
AFFS_DATA_HEAD(bh)->next = 0;
bh->b_state &= ~(1UL << BH_New);
@@ -780,7 +781,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
if (buffer_new(bh)) {
AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino);
- AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx);
+ AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx + 1);
AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp);
AFFS_DATA_HEAD(bh)->next = 0;
bh->b_state &= ~(1UL << BH_New);
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index cee42646736c..96a6781f3653 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -64,7 +64,8 @@ static struct afs_cell *afs_find_cell_locked(struct afs_net *net,
return ERR_PTR(-ENAMETOOLONG);
if (!name) {
- cell = net->ws_cell;
+ cell = rcu_dereference_protected(net->ws_cell,
+ lockdep_is_held(&net->cells_lock));
if (!cell)
return ERR_PTR(-EDESTADDRREQ);
goto found;
@@ -388,8 +389,8 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
/* install the new cell */
down_write(&net->cells_lock);
afs_see_cell(new_root, afs_cell_trace_see_ws);
- old_root = net->ws_cell;
- net->ws_cell = new_root;
+ old_root = rcu_replace_pointer(net->ws_cell, new_root,
+ lockdep_is_held(&net->cells_lock));
up_write(&net->cells_lock);
afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws);
@@ -945,8 +946,8 @@ void afs_cell_purge(struct afs_net *net)
_enter("");
down_write(&net->cells_lock);
- ws = net->ws_cell;
- net->ws_cell = NULL;
+ ws = rcu_replace_pointer(net->ws_cell, NULL,
+ lockdep_is_held(&net->cells_lock));
up_write(&net->cells_lock);
afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws);
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index d8bf52f77d93..008698d706ca 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -314,12 +314,23 @@ static const char *afs_atcell_get_link(struct dentry *dentry, struct inode *inod
const char *name;
bool dotted = vnode->fid.vnode == 3;
- if (!net->ws_cell)
+ if (!dentry) {
+ /* We're in RCU-pathwalk. */
+ cell = rcu_dereference(net->ws_cell);
+ if (dotted)
+ name = cell->name - 1;
+ else
+ name = cell->name;
+ /* Shouldn't need to set a delayed call. */
+ return name;
+ }
+
+ if (!rcu_access_pointer(net->ws_cell))
return ERR_PTR(-ENOENT);
down_read(&net->cells_lock);
- cell = net->ws_cell;
+ cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock));
if (dotted)
name = cell->name - 1;
else
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 90f407774a9a..df30bd62da79 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -287,7 +287,7 @@ struct afs_net {
/* Cell database */
struct rb_root cells;
- struct afs_cell *ws_cell;
+ struct afs_cell __rcu *ws_cell;
struct work_struct cells_manager;
struct timer_list cells_timer;
atomic_t cells_outstanding;
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index e7614f4f30c2..12c88d8be3fe 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -206,7 +206,7 @@ static int afs_proc_rootcell_show(struct seq_file *m, void *v)
net = afs_seq2net_single(m);
down_read(&net->cells_lock);
- cell = net->ws_cell;
+ cell = rcu_dereference_protected(net->ws_cell, lockdep_is_held(&net->cells_lock));
if (cell)
seq_printf(m, "%s\n", cell->name);
up_read(&net->cells_lock);
@@ -242,7 +242,7 @@ static int afs_proc_rootcell_write(struct file *file, char *buf, size_t size)
ret = -EEXIST;
inode_lock(file_inode(file));
- if (!net->ws_cell)
+ if (!rcu_access_pointer(net->ws_cell))
ret = afs_cell_init(net, buf);
else
printk("busy\n");
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index dece27d9db04..756736f9243d 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1186,7 +1186,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
le64_to_cpu(i->journal_seq),
b->written, b->written + sectors, ptr_written);
- b->written += sectors;
+ b->written = min(b->written + sectors, btree_sectors(c));
if (blacklisted && !first)
continue;
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 8f22ef9a7651..47d8690f01bf 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -126,10 +126,18 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *);
+int bch2_btree_write_buffer_insert_err(struct btree_trans *,
+ enum btree_id, struct bkey_i *);
+
static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
enum btree_id btree,
struct bkey_i *k)
{
+ if (unlikely(!btree_type_uses_write_buffer(btree))) {
+ int ret = bch2_btree_write_buffer_insert_err(trans, btree, k);
+ dump_stack();
+ return ret;
+ }
/*
* Most updates skip the btree write buffer until journal replay is
* finished because synchronization with journal replay relies on having
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index b56c4987b8c9..2c09d19dd621 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -264,6 +264,22 @@ out:
BUG_ON(wb->sorted.size < wb->flushing.keys.nr);
}
+int bch2_btree_write_buffer_insert_err(struct btree_trans *trans,
+ enum btree_id btree, struct bkey_i *k)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "attempting to do write buffer update on non wb btree=");
+ bch2_btree_id_to_text(&buf, btree);
+ prt_str(&buf, "\n");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+ bch2_fs_inconsistent(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ return -EROFS;
+}
+
static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
@@ -312,7 +328,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
darray_for_each(wb->sorted, i) {
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
- BUG_ON(!btree_type_uses_write_buffer(k->btree));
+ if (unlikely(!btree_type_uses_write_buffer(k->btree))) {
+ ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k);
+ goto err;
+ }
for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
prefetch(&wb->flushing.keys.data[n->idx]);
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 05d5f71a7ca9..2d8042f853dc 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -99,7 +99,7 @@ static inline bool ptr_better(struct bch_fs *c,
/* Pick at random, biased in favor of the faster device: */
- return bch2_rand_range(l1 + l2) > l1;
+ return bch2_get_random_u64_below(l1 + l2) > l1;
}
if (bch2_force_reconstruct_read)
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 04ec05206f8c..339b80770f1d 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -1198,6 +1198,7 @@ void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c,
opts->_name##_from_inode = true; \
} else { \
opts->_name = c->opts._name; \
+ opts->_name##_from_inode = false; \
}
BCH_INODE_OPTS()
#undef x
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index 8c7b2d3d779d..aa91fcf51eec 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -59,7 +59,7 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
}
rcu_read_unlock();
- return bch2_rand_range(nr * CONGESTED_MAX) < total;
+ return get_random_u32_below(nr * CONGESTED_MAX) < total;
}
#else
@@ -951,12 +951,6 @@ retry_pick:
goto retry_pick;
}
- /*
- * Unlock the iterator while the btree node's lock is still in
- * cache, before doing the IO:
- */
- bch2_trans_unlock(trans);
-
if (flags & BCH_READ_NODECODE) {
/*
* can happen if we retry, and the extent we were going to read
@@ -1113,6 +1107,15 @@ get_bio:
trace_and_count(c, read_split, &orig->bio);
}
+ /*
+ * Unlock the iterator while the btree node's lock is still in
+ * cache, before doing the IO:
+ */
+ if (!(flags & BCH_READ_IN_RETRY))
+ bch2_trans_unlock(trans);
+ else
+ bch2_trans_unlock_long(trans);
+
if (!rbio->pick.idx) {
if (unlikely(!rbio->have_ioref)) {
struct printbuf buf = PRINTBUF;
@@ -1160,6 +1163,8 @@ out:
if (likely(!(flags & BCH_READ_IN_RETRY))) {
return 0;
} else {
+ bch2_trans_unlock(trans);
+
int ret;
rbio->context = RBIO_CONTEXT_UNBOUND;
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 5dabbf3c0965..05b1250619ec 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1021,8 +1021,8 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j,
/* allocate journal on a device: */
-static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
- bool new_fs, struct closure *cl)
+static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr,
+ bool new_fs, struct closure *cl)
{
struct bch_fs *c = ca->fs;
struct journal_device *ja = &ca->journal;
@@ -1150,26 +1150,20 @@ err_free:
return ret;
}
-/*
- * Allocate more journal space at runtime - not currently making use if it, but
- * the code works:
- */
-int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
- unsigned nr)
+static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca,
+ unsigned nr, bool new_fs)
{
struct journal_device *ja = &ca->journal;
- struct closure cl;
int ret = 0;
+ struct closure cl;
closure_init_stack(&cl);
- down_write(&c->state_lock);
-
/* don't handle reducing nr of buckets yet: */
if (nr < ja->nr)
- goto unlock;
+ return 0;
- while (ja->nr < nr) {
+ while (!ret && ja->nr < nr) {
struct disk_reservation disk_res = { 0, 0, 0 };
/*
@@ -1182,27 +1176,38 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
* filesystem-wide allocation will succeed, this is a device
* specific allocation - we can hang here:
*/
+ if (!new_fs) {
+ ret = bch2_disk_reservation_get(c, &disk_res,
+ bucket_to_sector(ca, nr - ja->nr), 1, 0);
+ if (ret)
+ break;
+ }
- ret = bch2_disk_reservation_get(c, &disk_res,
- bucket_to_sector(ca, nr - ja->nr), 1, 0);
- if (ret)
- break;
+ ret = bch2_set_nr_journal_buckets_iter(ca, nr, new_fs, &cl);
- ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl);
+ if (ret == -BCH_ERR_bucket_alloc_blocked ||
+ ret == -BCH_ERR_open_buckets_empty)
+ ret = 0; /* wait and retry */
bch2_disk_reservation_put(c, &disk_res);
-
closure_sync(&cl);
-
- if (ret &&
- ret != -BCH_ERR_bucket_alloc_blocked &&
- ret != -BCH_ERR_open_buckets_empty)
- break;
}
- bch_err_fn(c, ret);
-unlock:
+ return ret;
+}
+
+/*
+ * Allocate more journal space at runtime - not currently making use if it, but
+ * the code works:
+ */
+int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
+ unsigned nr)
+{
+ down_write(&c->state_lock);
+ int ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, false);
up_write(&c->state_lock);
+
+ bch_err_fn(c, ret);
return ret;
}
@@ -1228,7 +1233,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
min(1 << 13,
(1 << 24) / ca->mi.bucket_size));
- ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL);
+ ret = bch2_set_nr_journal_buckets_loop(ca->fs, ca, nr, new_fs);
err:
bch_err_fn(ca, ret);
return ret;
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 21805509ab9e..6718dc37c5a3 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -74,20 +74,14 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
struct move_bucket *b, u64 time)
{
struct bch_fs *c = trans->c;
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bch_alloc_v4 _a;
- const struct bch_alloc_v4 *a;
- int ret;
- if (bch2_bucket_is_open(trans->c,
- b->k.bucket.inode,
- b->k.bucket.offset))
+ if (bch2_bucket_is_open(c, b->k.bucket.inode, b->k.bucket.offset))
return 0;
- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc,
- b->k.bucket, BTREE_ITER_cached);
- ret = bkey_err(k);
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc,
+ b->k.bucket, BTREE_ITER_cached);
+ int ret = bkey_err(k);
if (ret)
return ret;
@@ -95,13 +89,18 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
if (!ca)
goto out;
- a = bch2_alloc_to_v4(k, &_a);
+ if (ca->mi.state != BCH_MEMBER_STATE_rw ||
+ !bch2_dev_is_online(ca))
+ goto out_put;
+
+ struct bch_alloc_v4 _a;
+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
b->k.gen = a->gen;
b->sectors = bch2_bucket_sectors_dirty(*a);
u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca);
ret = lru_idx && lru_idx <= time;
-
+out_put:
bch2_dev_put(ca);
out:
bch2_trans_iter_exit(trans, &iter);
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 8037ccbacf6a..a81a7b6c0989 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -69,14 +69,20 @@ enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_meta
return v;
}
-void bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version)
+bool bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version)
{
- mutex_lock(&c->sb_lock);
- SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb,
- max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version));
- c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field);
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ bool ret = (c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) &&
+ version <= c->sb.version_incompat_allowed;
+
+ if (ret) {
+ mutex_lock(&c->sb_lock);
+ SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb,
+ max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version));
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ }
+
+ return ret;
}
const char * const bch2_sb_fields[] = {
@@ -1219,9 +1225,11 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat)
c->disk_sb.sb->version = cpu_to_le16(new_version);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
- if (incompat)
+ if (incompat) {
SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb,
max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version));
+ c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field);
+ }
}
static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f,
diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
index f1ab4f943720..b4cff9ebdebb 100644
--- a/fs/bcachefs/super-io.h
+++ b/fs/bcachefs/super-io.h
@@ -21,17 +21,14 @@ static inline bool bch2_version_compatible(u16 version)
void bch2_version_to_text(struct printbuf *, enum bcachefs_metadata_version);
enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version);
-void bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version);
+bool bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version);
static inline bool bch2_request_incompat_feature(struct bch_fs *c,
enum bcachefs_metadata_version version)
{
- if (unlikely(version > c->sb.version_incompat)) {
- if (version > c->sb.version_incompat_allowed)
- return false;
- bch2_set_version_incompat(c, version);
- }
- return true;
+ return likely(version <= c->sb.version_incompat)
+ ? true
+ : bch2_set_version_incompat(c, version);
}
static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f)
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 6d97d412fed9..0459c875e189 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1811,7 +1811,11 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
goto err_late;
up_write(&c->state_lock);
- return 0;
+out:
+ printbuf_exit(&label);
+ printbuf_exit(&errbuf);
+ bch_err_fn(c, ret);
+ return ret;
err_unlock:
mutex_unlock(&c->sb_lock);
@@ -1820,10 +1824,7 @@ err:
if (ca)
bch2_dev_free(ca);
bch2_free_super(&sb);
- printbuf_exit(&label);
- printbuf_exit(&errbuf);
- bch_err_fn(c, ret);
- return ret;
+ goto out;
err_late:
up_write(&c->state_lock);
ca = NULL;
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index e0a876cbaa6b..da2cd11b3025 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -653,19 +653,25 @@ int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
return 0;
}
-size_t bch2_rand_range(size_t max)
+u64 bch2_get_random_u64_below(u64 ceil)
{
- size_t rand;
+ if (ceil <= U32_MAX)
+ return __get_random_u32_below(ceil);
- if (!max)
- return 0;
+ /* this is the same (clever) algorithm as in __get_random_u32_below() */
+ u64 rand = get_random_u64();
+ u64 mult = ceil * rand;
- do {
- rand = get_random_long();
- rand &= roundup_pow_of_two(max) - 1;
- } while (rand >= max);
+ if (unlikely(mult < ceil)) {
+ u64 bound;
+ div64_u64_rem(-ceil, ceil, &bound);
+ while (unlikely(mult < bound)) {
+ rand = get_random_u64();
+ mult = ceil * rand;
+ }
+ }
- return rand;
+ return mul_u64_u64_shr(ceil, rand, 64);
}
void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index e7c3541b38f3..f4a4783219d9 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -401,7 +401,7 @@ do { \
_ret; \
})
-size_t bch2_rand_range(size_t);
+u64 bch2_get_random_u64_below(u64);
void memcpy_to_bio(struct bio *, struct bvec_iter, const void *);
void memcpy_from_bio(void *, struct bio *, struct bvec_iter);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a9322601ab5c..38756f8cef46 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1382,8 +1382,13 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
continue;
}
if (done_offset) {
- *done_offset = start - 1;
- return 0;
+ /*
+ * Move @end to the end of the processed range,
+ * and exit the loop to unlock the processed extents.
+ */
+ end = start - 1;
+ ret = 0;
+ break;
}
ret = -ENOSPC;
}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 53b846d99ece..14f53f757555 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1330,13 +1330,13 @@ MODULE_PARM_DESC(read_policy,
int btrfs_read_policy_to_enum(const char *str, s64 *value_ret)
{
- char param[32] = { 0 };
+ char param[32];
char __maybe_unused *value_str;
if (!str || strlen(str) == 0)
return 0;
- strncpy(param, str, sizeof(param) - 1);
+ strscpy(param, str);
#ifdef CONFIG_BTRFS_EXPERIMENTAL
/* Separate value from input in policy:value format. */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fb22d4425cb0..3f8afbd1ebb5 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7155,6 +7155,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
btrfs_err(fs_info,
"failed to add chunk map, start=%llu len=%llu: %d",
map->start, map->chunk_len, ret);
+ btrfs_free_chunk_map(map);
}
return ret;
diff --git a/fs/coredump.c b/fs/coredump.c
index 591700e1b2ce..4375c70144d0 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -63,6 +63,7 @@ static void free_vma_snapshot(struct coredump_params *cprm);
static int core_uses_pid;
static unsigned int core_pipe_limit;
+static unsigned int core_sort_vma;
static char core_pattern[CORENAME_MAX_SIZE] = "core";
static int core_name_size = CORENAME_MAX_SIZE;
unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT;
@@ -1026,6 +1027,15 @@ static const struct ctl_table coredump_sysctls[] = {
.extra1 = (unsigned int *)&core_file_note_size_min,
.extra2 = (unsigned int *)&core_file_note_size_max,
},
+ {
+ .procname = "core_sort_vma",
+ .data = &core_sort_vma,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
};
static int __init init_fs_coredump_sysctls(void)
@@ -1256,8 +1266,9 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
cprm->vma_data_size += m->dump_size;
}
- sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta),
- cmp_vma_size, NULL);
+ if (core_sort_vma)
+ sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta),
+ cmp_vma_size, NULL);
return true;
}
diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c
index ce9be95c9172..9ff825f1502d 100644
--- a/fs/exfat/balloc.c
+++ b/fs/exfat/balloc.c
@@ -141,7 +141,7 @@ int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync)
return 0;
}
-void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync)
+int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync)
{
int i, b;
unsigned int ent_idx;
@@ -150,13 +150,17 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync)
struct exfat_mount_options *opts = &sbi->options;
if (!is_valid_cluster(sbi, clu))
- return;
+ return -EIO;
ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx);
+ if (!test_bit_le(b, sbi->vol_amap[i]->b_data))
+ return -EIO;
+
clear_bit_le(b, sbi->vol_amap[i]->b_data);
+
exfat_update_bh(sbi->vol_amap[i], sync);
if (opts->discard) {
@@ -171,6 +175,8 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync)
opts->discard = 0;
}
}
+
+ return 0;
}
/*
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 78be6964a8a0..d30ce18a88b7 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -456,7 +456,7 @@ int exfat_count_num_clusters(struct super_block *sb,
int exfat_load_bitmap(struct super_block *sb);
void exfat_free_bitmap(struct exfat_sb_info *sbi);
int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync);
-void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync);
+int exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync);
unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu);
int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count);
int exfat_trim_fs(struct inode *inode, struct fstrim_range *range);
diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
index 9e5492ac409b..6f3651c6ca91 100644
--- a/fs/exfat/fatent.c
+++ b/fs/exfat/fatent.c
@@ -175,6 +175,7 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain
BITMAP_OFFSET_SECTOR_INDEX(sb, CLUSTER_TO_BITMAP_ENT(clu));
if (p_chain->flags == ALLOC_NO_FAT_CHAIN) {
+ int err;
unsigned int last_cluster = p_chain->dir + p_chain->size - 1;
do {
bool sync = false;
@@ -189,7 +190,9 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain
cur_cmap_i = next_cmap_i;
}
- exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode)));
+ err = exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode)));
+ if (err)
+ break;
clu++;
num_clusters++;
} while (num_clusters < p_chain->size);
@@ -210,12 +213,13 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain
cur_cmap_i = next_cmap_i;
}
- exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode)));
+ if (exfat_clear_bitmap(inode, clu, (sync && IS_DIRSYNC(inode))))
+ break;
clu = n_clu;
num_clusters++;
if (err)
- goto dec_used_clus;
+ break;
if (num_clusters >= sbi->num_clusters - EXFAT_FIRST_CLUSTER) {
/*
@@ -229,7 +233,6 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain
} while (clu != EXFAT_EOF_CLUSTER);
}
-dec_used_clus:
sbi->used_clusters -= num_clusters;
return 0;
}
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 05b51e721783..807349d8ea05 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -587,7 +587,7 @@ static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
valid_size = ei->valid_size;
ret = generic_write_checks(iocb, iter);
- if (ret < 0)
+ if (ret <= 0)
goto unlock;
if (iocb->ki_flags & IOCB_DIRECT) {
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 691dd77b6ab5..8b30027d8251 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -232,7 +232,7 @@ static int exfat_search_empty_slot(struct super_block *sb,
dentry = 0;
}
- while (dentry + num_entries < total_entries &&
+ while (dentry + num_entries <= total_entries &&
clu.dir != EXFAT_EOF_CLUSTER) {
i = dentry & (dentries_per_clu - 1);
@@ -646,6 +646,11 @@ static int exfat_find(struct inode *dir, struct qstr *qname,
info->valid_size = le64_to_cpu(ep2->dentry.stream.valid_size);
info->size = le64_to_cpu(ep2->dentry.stream.size);
+ if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) {
+ exfat_fs_error(sb, "data size is invalid(%lld)", info->size);
+ return -EIO;
+ }
+
info->start_clu = le32_to_cpu(ep2->dentry.stream.start_clu);
if (!is_valid_cluster(sbi, info->start_clu) && info->size) {
exfat_warn(sb, "start_clu is invalid cluster(0x%x)",
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index a5205149adba..3bd96c3d4cd0 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -756,9 +756,6 @@ retry:
return VM_FAULT_SIGBUS;
}
} else {
- result = filemap_fsnotify_fault(vmf);
- if (unlikely(result))
- return result;
filemap_invalidate_lock_shared(mapping);
}
result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 2b2d1b755544..2c3a4d09e500 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1457,7 +1457,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (ret < 0)
goto out;
- if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) {
+ if (pipe_buf_usage(pipe) + cs.nr_segs > pipe->max_usage) {
ret = -EIO;
goto out;
}
@@ -2107,7 +2107,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
{
- unsigned int head, tail, mask, count;
+ unsigned int head, tail, count;
unsigned nbuf;
unsigned idx;
struct pipe_buffer *bufs;
@@ -2124,8 +2124,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
head = pipe->head;
tail = pipe->tail;
- mask = pipe->ring_size - 1;
- count = head - tail;
+ count = pipe_occupancy(head, tail);
bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL);
if (!bufs) {
@@ -2135,8 +2134,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
nbuf = 0;
rem = 0;
- for (idx = tail; idx != head && rem < len; idx++)
- rem += pipe->bufs[idx & mask].len;
+ for (idx = tail; !pipe_empty(head, idx) && rem < len; idx++)
+ rem += pipe_buf(pipe, idx)->len;
ret = -EINVAL;
if (rem < len)
@@ -2147,10 +2146,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
struct pipe_buffer *ibuf;
struct pipe_buffer *obuf;
- if (WARN_ON(nbuf >= count || tail == head))
+ if (WARN_ON(nbuf >= count || pipe_empty(head, tail)))
goto out_free;
- ibuf = &pipe->bufs[tail & mask];
+ ibuf = pipe_buf(pipe, tail);
obuf = &bufs[nbuf];
if (rem >= ibuf->len) {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 1bb646752e46..033feeab8c34 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -29,6 +29,7 @@
#include <linux/pagemap.h>
#include <linux/gfp.h>
#include <linux/swap.h>
+#include <linux/compaction.h>
#include <linux/uaccess.h>
#include <linux/filelock.h>
@@ -457,7 +458,7 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp)
/* If the private flag is set, then the folio is not freeable */
if (folio_test_private(folio)) {
if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL ||
- current_is_kswapd())
+ current_is_kswapd() || current_is_kcompactd())
return false;
if (nfs_wb_folio(folio->mapping->host, folio) < 0)
return false;
diff --git a/fs/pipe.c b/fs/pipe.c
index ce1af7592780..4d0799e4e719 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -210,11 +210,10 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
static inline bool pipe_readable(const struct pipe_inode_info *pipe)
{
- unsigned int head = READ_ONCE(pipe->head);
- unsigned int tail = READ_ONCE(pipe->tail);
+ union pipe_index idx = { .head_tail = READ_ONCE(pipe->head_tail) };
unsigned int writers = READ_ONCE(pipe->writers);
- return !pipe_empty(head, tail) || !writers;
+ return !pipe_empty(idx.head, idx.tail) || !writers;
}
static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe,
@@ -395,7 +394,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
wake_next_reader = true;
mutex_lock(&pipe->mutex);
}
- if (pipe_empty(pipe->head, pipe->tail))
+ if (pipe_is_empty(pipe))
wake_next_reader = false;
mutex_unlock(&pipe->mutex);
@@ -417,11 +416,10 @@ static inline int is_packetized(struct file *file)
/* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
static inline bool pipe_writable(const struct pipe_inode_info *pipe)
{
- unsigned int head = READ_ONCE(pipe->head);
- unsigned int tail = READ_ONCE(pipe->tail);
+ union pipe_index idx = { .head_tail = READ_ONCE(pipe->head_tail) };
unsigned int max_usage = READ_ONCE(pipe->max_usage);
- return !pipe_full(head, tail, max_usage) ||
+ return !pipe_full(idx.head, idx.tail, max_usage) ||
!READ_ONCE(pipe->readers);
}
@@ -579,11 +577,11 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
mutex_lock(&pipe->mutex);
- was_empty = pipe_empty(pipe->head, pipe->tail);
+ was_empty = pipe_is_empty(pipe);
wake_next_writer = true;
}
out:
- if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+ if (pipe_is_full(pipe))
wake_next_writer = false;
mutex_unlock(&pipe->mutex);
@@ -616,7 +614,7 @@ out:
static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct pipe_inode_info *pipe = filp->private_data;
- unsigned int count, head, tail, mask;
+ unsigned int count, head, tail;
switch (cmd) {
case FIONREAD:
@@ -624,10 +622,9 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
count = 0;
head = pipe->head;
tail = pipe->tail;
- mask = pipe->ring_size - 1;
- while (tail != head) {
- count += pipe->bufs[tail & mask].len;
+ while (!pipe_empty(head, tail)) {
+ count += pipe_buf(pipe, tail)->len;
tail++;
}
mutex_unlock(&pipe->mutex);
@@ -659,7 +656,7 @@ pipe_poll(struct file *filp, poll_table *wait)
{
__poll_t mask;
struct pipe_inode_info *pipe = filp->private_data;
- unsigned int head, tail;
+ union pipe_index idx;
/* Epoll has some historical nasty semantics, this enables them */
WRITE_ONCE(pipe->poll_usage, true);
@@ -680,19 +677,18 @@ pipe_poll(struct file *filp, poll_table *wait)
* if something changes and you got it wrong, the poll
* table entry will wake you up and fix it.
*/
- head = READ_ONCE(pipe->head);
- tail = READ_ONCE(pipe->tail);
+ idx.head_tail = READ_ONCE(pipe->head_tail);
mask = 0;
if (filp->f_mode & FMODE_READ) {
- if (!pipe_empty(head, tail))
+ if (!pipe_empty(idx.head, idx.tail))
mask |= EPOLLIN | EPOLLRDNORM;
if (!pipe->writers && filp->f_pipe != pipe->w_counter)
mask |= EPOLLHUP;
}
if (filp->f_mode & FMODE_WRITE) {
- if (!pipe_full(head, tail, pipe->max_usage))
+ if (!pipe_full(idx.head, idx.tail, pipe->max_usage))
mask |= EPOLLOUT | EPOLLWRNORM;
/*
* Most Unices do not set EPOLLERR for FIFOs but on Linux they
diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c
index 699a3f76d083..64bd68f750f8 100644
--- a/fs/smb/client/cifsacl.c
+++ b/fs/smb/client/cifsacl.c
@@ -763,7 +763,7 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl,
struct cifs_fattr *fattr, bool mode_from_special_sid)
{
int i;
- int num_aces = 0;
+ u16 num_aces = 0;
int acl_size;
char *acl_base;
struct smb_ace **ppace;
@@ -778,14 +778,15 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl,
}
/* validate that we do not go past end of acl */
- if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) {
+ if (end_of_acl < (char *)pdacl + sizeof(struct smb_acl) ||
+ end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) {
cifs_dbg(VFS, "ACL too small to parse DACL\n");
return;
}
cifs_dbg(NOISY, "DACL revision %d size %d num aces %d\n",
le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
- le32_to_cpu(pdacl->num_aces));
+ le16_to_cpu(pdacl->num_aces));
/* reset rwx permissions for user/group/other.
Also, if num_aces is 0 i.e. DACL has no ACEs,
@@ -795,12 +796,15 @@ static void parse_dacl(struct smb_acl *pdacl, char *end_of_acl,
acl_base = (char *)pdacl;
acl_size = sizeof(struct smb_acl);
- num_aces = le32_to_cpu(pdacl->num_aces);
+ num_aces = le16_to_cpu(pdacl->num_aces);
if (num_aces > 0) {
umode_t denied_mode = 0;
- if (num_aces > ULONG_MAX / sizeof(struct smb_ace *))
+ if (num_aces > (le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) /
+ (offsetof(struct smb_ace, sid) +
+ offsetof(struct smb_sid, sub_auth) + sizeof(__le16)))
return;
+
ppace = kmalloc_array(num_aces, sizeof(struct smb_ace *),
GFP_KERNEL);
if (!ppace)
@@ -937,12 +941,12 @@ unsigned int setup_special_user_owner_ACE(struct smb_ace *pntace)
static void populate_new_aces(char *nacl_base,
struct smb_sid *pownersid,
struct smb_sid *pgrpsid,
- __u64 *pnmode, u32 *pnum_aces, u16 *pnsize,
+ __u64 *pnmode, u16 *pnum_aces, u16 *pnsize,
bool modefromsid,
bool posix)
{
__u64 nmode;
- u32 num_aces = 0;
+ u16 num_aces = 0;
u16 nsize = 0;
__u64 user_mode;
__u64 group_mode;
@@ -1050,7 +1054,7 @@ static __u16 replace_sids_and_copy_aces(struct smb_acl *pdacl, struct smb_acl *p
u16 size = 0;
struct smb_ace *pntace = NULL;
char *acl_base = NULL;
- u32 src_num_aces = 0;
+ u16 src_num_aces = 0;
u16 nsize = 0;
struct smb_ace *pnntace = NULL;
char *nacl_base = NULL;
@@ -1058,7 +1062,7 @@ static __u16 replace_sids_and_copy_aces(struct smb_acl *pdacl, struct smb_acl *p
acl_base = (char *)pdacl;
size = sizeof(struct smb_acl);
- src_num_aces = le32_to_cpu(pdacl->num_aces);
+ src_num_aces = le16_to_cpu(pdacl->num_aces);
nacl_base = (char *)pndacl;
nsize = sizeof(struct smb_acl);
@@ -1090,11 +1094,11 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl,
u16 size = 0;
struct smb_ace *pntace = NULL;
char *acl_base = NULL;
- u32 src_num_aces = 0;
+ u16 src_num_aces = 0;
u16 nsize = 0;
struct smb_ace *pnntace = NULL;
char *nacl_base = NULL;
- u32 num_aces = 0;
+ u16 num_aces = 0;
bool new_aces_set = false;
/* Assuming that pndacl and pnmode are never NULL */
@@ -1112,7 +1116,7 @@ static int set_chmod_dacl(struct smb_acl *pdacl, struct smb_acl *pndacl,
acl_base = (char *)pdacl;
size = sizeof(struct smb_acl);
- src_num_aces = le32_to_cpu(pdacl->num_aces);
+ src_num_aces = le16_to_cpu(pdacl->num_aces);
/* Retain old ACEs which we can retain */
for (i = 0; i < src_num_aces; ++i) {
@@ -1158,7 +1162,7 @@ next_ace:
}
finalize_dacl:
- pndacl->num_aces = cpu_to_le32(num_aces);
+ pndacl->num_aces = cpu_to_le16(num_aces);
pndacl->size = cpu_to_le16(nsize);
return 0;
@@ -1293,7 +1297,7 @@ static int build_sec_desc(struct smb_ntsd *pntsd, struct smb_ntsd *pnntsd,
dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION);
ndacl_ptr->size = cpu_to_le16(0);
- ndacl_ptr->num_aces = cpu_to_le32(0);
+ ndacl_ptr->num_aces = cpu_to_le16(0);
rc = set_chmod_dacl(dacl_ptr, ndacl_ptr, owner_sid_ptr, group_sid_ptr,
pnmode, mode_from_sid, posix);
@@ -1653,7 +1657,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
dacl_ptr = (struct smb_acl *)((char *)pntsd + dacloffset);
if (mode_from_sid)
nsecdesclen +=
- le32_to_cpu(dacl_ptr->num_aces) * sizeof(struct smb_ace);
+ le16_to_cpu(dacl_ptr->num_aces) * sizeof(struct smb_ace);
else /* cifsacl */
nsecdesclen += le16_to_cpu(dacl_ptr->size);
}
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index f917de020dd5..73f93a35eedd 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -1825,9 +1825,8 @@ static int match_session(struct cifs_ses *ses,
struct smb3_fs_context *ctx,
bool match_super)
{
- if (ctx->sectype != Unspecified &&
- ctx->sectype != ses->sectype)
- return 0;
+ struct TCP_Server_Info *server = ses->server;
+ enum securityEnum ctx_sec, ses_sec;
if (!match_super && ctx->dfs_root_ses != ses->dfs_root_ses)
return 0;
@@ -1839,11 +1838,20 @@ static int match_session(struct cifs_ses *ses,
if (ses->chan_max < ctx->max_channels)
return 0;
- switch (ses->sectype) {
+ ctx_sec = server->ops->select_sectype(server, ctx->sectype);
+ ses_sec = server->ops->select_sectype(server, ses->sectype);
+
+ if (ctx_sec != ses_sec)
+ return 0;
+
+ switch (ctx_sec) {
+ case IAKerb:
case Kerberos:
if (!uid_eq(ctx->cred_uid, ses->cred_uid))
return 0;
break;
+ case NTLMv2:
+ case RawNTLMSSP:
default:
/* NULL username means anonymous session */
if (ses->user_name == NULL) {
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index e9b286d9a7ba..8c73d4d60d1a 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -171,6 +171,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_string("username", Opt_user),
fsparam_string("pass", Opt_pass),
fsparam_string("password", Opt_pass),
+ fsparam_string("pass2", Opt_pass2),
fsparam_string("password2", Opt_pass2),
fsparam_string("ip", Opt_ip),
fsparam_string("addr", Opt_ip),
@@ -1131,6 +1132,9 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
} else if (!strcmp("user", param->key) || !strcmp("username", param->key)) {
skip_parsing = true;
opt = Opt_user;
+ } else if (!strcmp("pass2", param->key) || !strcmp("password2", param->key)) {
+ skip_parsing = true;
+ opt = Opt_pass2;
}
}
@@ -1340,21 +1344,21 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
}
break;
case Opt_acregmax:
- ctx->acregmax = HZ * result.uint_32;
- if (ctx->acregmax > CIFS_MAX_ACTIMEO) {
+ if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) {
cifs_errorf(fc, "acregmax too large\n");
goto cifs_parse_mount_err;
}
+ ctx->acregmax = HZ * result.uint_32;
break;
case Opt_acdirmax:
- ctx->acdirmax = HZ * result.uint_32;
- if (ctx->acdirmax > CIFS_MAX_ACTIMEO) {
+ if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) {
cifs_errorf(fc, "acdirmax too large\n");
goto cifs_parse_mount_err;
}
+ ctx->acdirmax = HZ * result.uint_32;
break;
case Opt_actimeo:
- if (HZ * result.uint_32 > CIFS_MAX_ACTIMEO) {
+ if (result.uint_32 > CIFS_MAX_ACTIMEO / HZ) {
cifs_errorf(fc, "timeout too large\n");
goto cifs_parse_mount_err;
}
@@ -1366,11 +1370,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->acdirmax = ctx->acregmax = HZ * result.uint_32;
break;
case Opt_closetimeo:
- ctx->closetimeo = HZ * result.uint_32;
- if (ctx->closetimeo > SMB3_MAX_DCLOSETIMEO) {
+ if (result.uint_32 > SMB3_MAX_DCLOSETIMEO / HZ) {
cifs_errorf(fc, "closetimeo too large\n");
goto cifs_parse_mount_err;
}
+ ctx->closetimeo = HZ * result.uint_32;
break;
case Opt_echo_interval:
ctx->echo_interval = result.uint_32;
diff --git a/fs/smb/common/smbacl.h b/fs/smb/common/smbacl.h
index 6a60698fc6f0..a624ec9e4a14 100644
--- a/fs/smb/common/smbacl.h
+++ b/fs/smb/common/smbacl.h
@@ -107,7 +107,8 @@ struct smb_sid {
struct smb_acl {
__le16 revision; /* revision level */
__le16 size;
- __le32 num_aces;
+ __le16 num_aces;
+ __le16 reserved;
} __attribute__((packed));
struct smb_ace {
diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
index f8a40f65db6a..c1f22c129111 100644
--- a/fs/smb/server/connection.c
+++ b/fs/smb/server/connection.c
@@ -433,6 +433,26 @@ void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops)
default_conn_ops.terminate_fn = ops->terminate_fn;
}
+void ksmbd_conn_r_count_inc(struct ksmbd_conn *conn)
+{
+ atomic_inc(&conn->r_count);
+}
+
+void ksmbd_conn_r_count_dec(struct ksmbd_conn *conn)
+{
+ /*
+ * Checking waitqueue to dropping pending requests on
+ * disconnection. waitqueue_active is safe because it
+ * uses atomic operation for condition.
+ */
+ atomic_inc(&conn->refcnt);
+ if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q))
+ wake_up(&conn->r_count_q);
+
+ if (atomic_dec_and_test(&conn->refcnt))
+ kfree(conn);
+}
+
int ksmbd_conn_transport_init(void)
{
int ret;
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index b379ae4fdcdf..91c2318639e7 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -168,6 +168,8 @@ int ksmbd_conn_transport_init(void);
void ksmbd_conn_transport_destroy(void);
void ksmbd_conn_lock(struct ksmbd_conn *conn);
void ksmbd_conn_unlock(struct ksmbd_conn *conn);
+void ksmbd_conn_r_count_inc(struct ksmbd_conn *conn);
+void ksmbd_conn_r_count_dec(struct ksmbd_conn *conn);
/*
* WARNING
diff --git a/fs/smb/server/ksmbd_work.c b/fs/smb/server/ksmbd_work.c
index 4af2e6007c29..72b00ca6e455 100644
--- a/fs/smb/server/ksmbd_work.c
+++ b/fs/smb/server/ksmbd_work.c
@@ -26,7 +26,6 @@ struct ksmbd_work *ksmbd_alloc_work_struct(void)
INIT_LIST_HEAD(&work->request_entry);
INIT_LIST_HEAD(&work->async_request_entry);
INIT_LIST_HEAD(&work->fp_entry);
- INIT_LIST_HEAD(&work->interim_entry);
INIT_LIST_HEAD(&work->aux_read_list);
work->iov_alloc_cnt = 4;
work->iov = kcalloc(work->iov_alloc_cnt, sizeof(struct kvec),
@@ -56,8 +55,6 @@ void ksmbd_free_work_struct(struct ksmbd_work *work)
kfree(work->tr_buf);
kvfree(work->request_buf);
kfree(work->iov);
- if (!list_empty(&work->interim_entry))
- list_del(&work->interim_entry);
if (work->async_id)
ksmbd_release_id(&work->conn->async_ida, work->async_id);
diff --git a/fs/smb/server/ksmbd_work.h b/fs/smb/server/ksmbd_work.h
index 8ca2c813246e..d36393ff8310 100644
--- a/fs/smb/server/ksmbd_work.h
+++ b/fs/smb/server/ksmbd_work.h
@@ -89,7 +89,6 @@ struct ksmbd_work {
/* List head at conn->async_requests */
struct list_head async_request_entry;
struct list_head fp_entry;
- struct list_head interim_entry;
};
/**
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 3a3fe4afbdf0..28886ff1ee57 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -46,7 +46,6 @@ static struct oplock_info *alloc_opinfo(struct ksmbd_work *work,
opinfo->fid = id;
opinfo->Tid = Tid;
INIT_LIST_HEAD(&opinfo->op_entry);
- INIT_LIST_HEAD(&opinfo->interim_list);
init_waitqueue_head(&opinfo->oplock_q);
init_waitqueue_head(&opinfo->oplock_brk);
atomic_set(&opinfo->refcount, 1);
@@ -635,6 +634,7 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
{
struct smb2_oplock_break *rsp = NULL;
struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
+ struct ksmbd_conn *conn = work->conn;
struct oplock_break_info *br_info = work->request_buf;
struct smb2_hdr *rsp_hdr;
struct ksmbd_file *fp;
@@ -690,6 +690,7 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
out:
ksmbd_free_work_struct(work);
+ ksmbd_conn_r_count_dec(conn);
}
/**
@@ -724,6 +725,7 @@ static int smb2_oplock_break_noti(struct oplock_info *opinfo)
work->sess = opinfo->sess;
if (opinfo->op_state == OPLOCK_ACK_WAIT) {
+ ksmbd_conn_r_count_inc(conn);
INIT_WORK(&work->work, __smb2_oplock_break_noti);
ksmbd_queue_work(work);
@@ -745,6 +747,7 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
{
struct smb2_lease_break *rsp = NULL;
struct ksmbd_work *work = container_of(wk, struct ksmbd_work, work);
+ struct ksmbd_conn *conn = work->conn;
struct lease_break_info *br_info = work->request_buf;
struct smb2_hdr *rsp_hdr;
@@ -791,6 +794,7 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
out:
ksmbd_free_work_struct(work);
+ ksmbd_conn_r_count_dec(conn);
}
/**
@@ -803,7 +807,6 @@ out:
static int smb2_lease_break_noti(struct oplock_info *opinfo)
{
struct ksmbd_conn *conn = opinfo->conn;
- struct list_head *tmp, *t;
struct ksmbd_work *work;
struct lease_break_info *br_info;
struct lease *lease = opinfo->o_lease;
@@ -831,16 +834,7 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo)
work->sess = opinfo->sess;
if (opinfo->op_state == OPLOCK_ACK_WAIT) {
- list_for_each_safe(tmp, t, &opinfo->interim_list) {
- struct ksmbd_work *in_work;
-
- in_work = list_entry(tmp, struct ksmbd_work,
- interim_entry);
- setup_async_work(in_work, NULL, NULL);
- smb2_send_interim_resp(in_work, STATUS_PENDING);
- list_del_init(&in_work->interim_entry);
- release_async_work(in_work);
- }
+ ksmbd_conn_r_count_inc(conn);
INIT_WORK(&work->work, __smb2_lease_break_noti);
ksmbd_queue_work(work);
wait_for_break_ack(opinfo);
@@ -871,7 +865,8 @@ static void wait_lease_breaking(struct oplock_info *opinfo)
}
}
-static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level)
+static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level,
+ struct ksmbd_work *in_work)
{
int err = 0;
@@ -914,9 +909,15 @@ static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level)
}
if (lease->state & (SMB2_LEASE_WRITE_CACHING_LE |
- SMB2_LEASE_HANDLE_CACHING_LE))
+ SMB2_LEASE_HANDLE_CACHING_LE)) {
+ if (in_work) {
+ setup_async_work(in_work, NULL, NULL);
+ smb2_send_interim_resp(in_work, STATUS_PENDING);
+ release_async_work(in_work);
+ }
+
brk_opinfo->op_state = OPLOCK_ACK_WAIT;
- else
+ } else
atomic_dec(&brk_opinfo->breaking_cnt);
} else {
err = oplock_break_pending(brk_opinfo, req_op_level);
@@ -1116,7 +1117,7 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp,
if (ksmbd_conn_releasing(opinfo->conn))
continue;
- oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE);
+ oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL);
opinfo_put(opinfo);
}
}
@@ -1152,7 +1153,7 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp)
if (ksmbd_conn_releasing(opinfo->conn))
continue;
- oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE);
+ oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL);
opinfo_put(opinfo);
}
}
@@ -1252,8 +1253,7 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
goto op_break_not_needed;
}
- list_add(&work->interim_entry, &prev_opinfo->interim_list);
- err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II);
+ err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II, work);
opinfo_put(prev_opinfo);
if (err == -ENOENT)
goto set_lev;
@@ -1322,8 +1322,7 @@ static void smb_break_all_write_oplock(struct ksmbd_work *work,
}
brk_opinfo->open_trunc = is_trunc;
- list_add(&work->interim_entry, &brk_opinfo->interim_list);
- oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II);
+ oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II, work);
opinfo_put(brk_opinfo);
}
@@ -1386,7 +1385,7 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
SMB2_LEASE_KEY_SIZE))
goto next;
brk_op->open_trunc = is_trunc;
- oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE);
+ oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE, NULL);
next:
opinfo_put(brk_op);
rcu_read_lock();
diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h
index 72bc88a63a40..3f64f0787263 100644
--- a/fs/smb/server/oplock.h
+++ b/fs/smb/server/oplock.h
@@ -67,7 +67,6 @@ struct oplock_info {
bool is_lease;
bool open_trunc; /* truncate on open */
struct lease *o_lease;
- struct list_head interim_list;
struct list_head op_entry;
struct list_head lease_entry;
wait_queue_head_t oplock_q; /* Other server threads */
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index 601e7fcbcf1e..ab533c602987 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -270,17 +270,7 @@ static void handle_ksmbd_work(struct work_struct *wk)
ksmbd_conn_try_dequeue_request(work);
ksmbd_free_work_struct(work);
- /*
- * Checking waitqueue to dropping pending requests on
- * disconnection. waitqueue_active is safe because it
- * uses atomic operation for condition.
- */
- atomic_inc(&conn->refcnt);
- if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q))
- wake_up(&conn->r_count_q);
-
- if (atomic_dec_and_test(&conn->refcnt))
- kfree(conn);
+ ksmbd_conn_r_count_dec(conn);
}
/**
@@ -310,7 +300,7 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn)
conn->request_buf = NULL;
ksmbd_conn_enqueue_request(work);
- atomic_inc(&conn->r_count);
+ ksmbd_conn_r_count_inc(conn);
/* update activity on connection */
conn->last_active = jiffies;
INIT_WORK(&work->work, handle_ksmbd_work);
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index f1efcd027475..c53121538990 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -7458,17 +7458,17 @@ out_check_cl:
}
no_check_cl:
+ flock = smb_lock->fl;
+ list_del(&smb_lock->llist);
+
if (smb_lock->zero_len) {
err = 0;
goto skip;
}
-
- flock = smb_lock->fl;
- list_del(&smb_lock->llist);
retry:
rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL);
skip:
- if (flags & SMB2_LOCKFLAG_UNLOCK) {
+ if (smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) {
if (!rc) {
ksmbd_debug(SMB, "File unlocked\n");
} else if (rc == -ENOENT) {
diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c
index d39d3e553366..49b128698670 100644
--- a/fs/smb/server/smbacl.c
+++ b/fs/smb/server/smbacl.c
@@ -333,7 +333,7 @@ void posix_state_to_acl(struct posix_acl_state *state,
pace->e_perm = state->other.allow;
}
-int init_acl_state(struct posix_acl_state *state, int cnt)
+int init_acl_state(struct posix_acl_state *state, u16 cnt)
{
int alloc;
@@ -368,7 +368,7 @@ static void parse_dacl(struct mnt_idmap *idmap,
struct smb_fattr *fattr)
{
int i, ret;
- int num_aces = 0;
+ u16 num_aces = 0;
unsigned int acl_size;
char *acl_base;
struct smb_ace **ppace;
@@ -389,16 +389,18 @@ static void parse_dacl(struct mnt_idmap *idmap,
ksmbd_debug(SMB, "DACL revision %d size %d num aces %d\n",
le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
- le32_to_cpu(pdacl->num_aces));
+ le16_to_cpu(pdacl->num_aces));
acl_base = (char *)pdacl;
acl_size = sizeof(struct smb_acl);
- num_aces = le32_to_cpu(pdacl->num_aces);
+ num_aces = le16_to_cpu(pdacl->num_aces);
if (num_aces <= 0)
return;
- if (num_aces > ULONG_MAX / sizeof(struct smb_ace *))
+ if (num_aces > (le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) /
+ (offsetof(struct smb_ace, sid) +
+ offsetof(struct smb_sid, sub_auth) + sizeof(__le16)))
return;
ret = init_acl_state(&acl_state, num_aces);
@@ -432,6 +434,7 @@ static void parse_dacl(struct mnt_idmap *idmap,
offsetof(struct smb_sid, sub_auth);
if (end_of_acl - acl_base < acl_size ||
+ ppace[i]->sid.num_subauth == 0 ||
ppace[i]->sid.num_subauth > SID_MAX_SUB_AUTHORITIES ||
(end_of_acl - acl_base <
acl_size + sizeof(__le32) * ppace[i]->sid.num_subauth) ||
@@ -580,7 +583,7 @@ static void parse_dacl(struct mnt_idmap *idmap,
static void set_posix_acl_entries_dacl(struct mnt_idmap *idmap,
struct smb_ace *pndace,
- struct smb_fattr *fattr, u32 *num_aces,
+ struct smb_fattr *fattr, u16 *num_aces,
u16 *size, u32 nt_aces_num)
{
struct posix_acl_entry *pace;
@@ -701,7 +704,7 @@ static void set_ntacl_dacl(struct mnt_idmap *idmap,
struct smb_fattr *fattr)
{
struct smb_ace *ntace, *pndace;
- int nt_num_aces = le32_to_cpu(nt_dacl->num_aces), num_aces = 0;
+ u16 nt_num_aces = le16_to_cpu(nt_dacl->num_aces), num_aces = 0;
unsigned short size = 0;
int i;
@@ -728,7 +731,7 @@ static void set_ntacl_dacl(struct mnt_idmap *idmap,
set_posix_acl_entries_dacl(idmap, pndace, fattr,
&num_aces, &size, nt_num_aces);
- pndacl->num_aces = cpu_to_le32(num_aces);
+ pndacl->num_aces = cpu_to_le16(num_aces);
pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size);
}
@@ -736,7 +739,7 @@ static void set_mode_dacl(struct mnt_idmap *idmap,
struct smb_acl *pndacl, struct smb_fattr *fattr)
{
struct smb_ace *pace, *pndace;
- u32 num_aces = 0;
+ u16 num_aces = 0;
u16 size = 0, ace_size = 0;
uid_t uid;
const struct smb_sid *sid;
@@ -792,7 +795,7 @@ static void set_mode_dacl(struct mnt_idmap *idmap,
fattr->cf_mode, 0007);
out:
- pndacl->num_aces = cpu_to_le32(num_aces);
+ pndacl->num_aces = cpu_to_le16(num_aces);
pndacl->size = cpu_to_le16(le16_to_cpu(pndacl->size) + size);
}
@@ -807,6 +810,13 @@ static int parse_sid(struct smb_sid *psid, char *end_of_acl)
return -EINVAL;
}
+ if (!psid->num_subauth)
+ return 0;
+
+ if (psid->num_subauth > SID_MAX_SUB_AUTHORITIES ||
+ end_of_acl < (char *)psid + 8 + sizeof(__le32) * psid->num_subauth)
+ return -EINVAL;
+
return 0;
}
@@ -848,6 +858,9 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd,
pntsd->type = cpu_to_le16(DACL_PRESENT);
if (pntsd->osidoffset) {
+ if (le32_to_cpu(pntsd->osidoffset) < sizeof(struct smb_ntsd))
+ return -EINVAL;
+
rc = parse_sid(owner_sid_ptr, end_of_acl);
if (rc) {
pr_err("%s: Error %d parsing Owner SID\n", __func__, rc);
@@ -863,6 +876,9 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd,
}
if (pntsd->gsidoffset) {
+ if (le32_to_cpu(pntsd->gsidoffset) < sizeof(struct smb_ntsd))
+ return -EINVAL;
+
rc = parse_sid(group_sid_ptr, end_of_acl);
if (rc) {
pr_err("%s: Error %d mapping Owner SID to gid\n",
@@ -884,6 +900,9 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd,
pntsd->type |= cpu_to_le16(DACL_PROTECTED);
if (dacloffset) {
+ if (dacloffset < sizeof(struct smb_ntsd))
+ return -EINVAL;
+
parse_dacl(idmap, dacl_ptr, end_of_acl,
owner_sid_ptr, group_sid_ptr, fattr);
}
@@ -1006,8 +1025,9 @@ int smb_inherit_dacl(struct ksmbd_conn *conn,
struct smb_sid owner_sid, group_sid;
struct dentry *parent = path->dentry->d_parent;
struct mnt_idmap *idmap = mnt_idmap(path->mnt);
- int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0, pdacl_size;
- int rc = 0, num_aces, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size;
+ int inherited_flags = 0, flags = 0, i, nt_size = 0, pdacl_size;
+ int rc = 0, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size;
+ u16 num_aces, ace_cnt = 0;
char *aces_base;
bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode);
@@ -1023,7 +1043,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn,
parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset);
acl_len = pntsd_size - dacloffset;
- num_aces = le32_to_cpu(parent_pdacl->num_aces);
+ num_aces = le16_to_cpu(parent_pdacl->num_aces);
pntsd_type = le16_to_cpu(parent_pntsd->type);
pdacl_size = le16_to_cpu(parent_pdacl->size);
@@ -1183,7 +1203,7 @@ pass:
pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset));
pdacl->revision = cpu_to_le16(2);
pdacl->size = cpu_to_le16(sizeof(struct smb_acl) + nt_size);
- pdacl->num_aces = cpu_to_le32(ace_cnt);
+ pdacl->num_aces = cpu_to_le16(ace_cnt);
pace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
memcpy(pace, aces_base, nt_size);
pntsd_size += sizeof(struct smb_acl) + nt_size;
@@ -1264,7 +1284,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
aces_size = acl_size - sizeof(struct smb_acl);
- for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
+ for (i = 0; i < le16_to_cpu(pdacl->num_aces); i++) {
if (offsetof(struct smb_ace, access_req) > aces_size)
break;
ace_size = le16_to_cpu(ace->size);
@@ -1285,7 +1305,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
aces_size = acl_size - sizeof(struct smb_acl);
- for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
+ for (i = 0; i < le16_to_cpu(pdacl->num_aces); i++) {
if (offsetof(struct smb_ace, access_req) > aces_size)
break;
ace_size = le16_to_cpu(ace->size);
diff --git a/fs/smb/server/smbacl.h b/fs/smb/server/smbacl.h
index 24ce576fc292..355adaee39b8 100644
--- a/fs/smb/server/smbacl.h
+++ b/fs/smb/server/smbacl.h
@@ -86,7 +86,7 @@ int parse_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd,
int build_sec_desc(struct mnt_idmap *idmap, struct smb_ntsd *pntsd,
struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info,
__u32 *secdesclen, struct smb_fattr *fattr);
-int init_acl_state(struct posix_acl_state *state, int cnt);
+int init_acl_state(struct posix_acl_state *state, u16 cnt);
void free_acl_state(struct posix_acl_state *state);
void posix_state_to_acl(struct posix_acl_state *state,
struct posix_acl_entry *pace);
diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
index 0460ebea6ff0..3f185ae60dc5 100644
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -281,6 +281,7 @@ static int handle_response(int type, void *payload, size_t sz)
if (entry->type + 1 != type) {
pr_err("Waiting for IPC type %d, got %d. Ignore.\n",
entry->type + 1, type);
+ continue;
}
entry->response = kvzalloc(sz, KSMBD_DEFAULT_GFP);
diff --git a/fs/splice.c b/fs/splice.c
index 28cfa63aa236..23fa5561b944 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -331,7 +331,7 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos,
int i;
/* Work out how much data we can actually add into the pipe */
- used = pipe_occupancy(pipe->head, pipe->tail);
+ used = pipe_buf_usage(pipe);
npages = max_t(ssize_t, pipe->max_usage - used, 0);
len = min_t(size_t, len, npages * PAGE_SIZE);
npages = DIV_ROUND_UP(len, PAGE_SIZE);
@@ -527,7 +527,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des
return -ERESTARTSYS;
repeat:
- while (pipe_empty(pipe->head, pipe->tail)) {
+ while (pipe_is_empty(pipe)) {
if (!pipe->writers)
return 0;
@@ -820,7 +820,7 @@ ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
if (signal_pending(current))
break;
- while (pipe_empty(pipe->head, pipe->tail)) {
+ while (pipe_is_empty(pipe)) {
ret = 0;
if (!pipe->writers)
goto out;
@@ -968,7 +968,7 @@ static ssize_t do_splice_read(struct file *in, loff_t *ppos,
return 0;
/* Don't try to read more the pipe has space for. */
- p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail);
+ p_space = pipe->max_usage - pipe_buf_usage(pipe);
len = min_t(size_t, len, p_space << PAGE_SHIFT);
if (unlikely(len > MAX_RW_COUNT))
@@ -1080,7 +1080,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
more = sd->flags & SPLICE_F_MORE;
sd->flags |= SPLICE_F_MORE;
- WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail));
+ WARN_ON_ONCE(!pipe_is_empty(pipe));
while (len) {
size_t read_len;
@@ -1268,7 +1268,7 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
send_sig(SIGPIPE, current, 0);
return -EPIPE;
}
- if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+ if (!pipe_is_full(pipe))
return 0;
if (flags & SPLICE_F_NONBLOCK)
return -EAGAIN;
@@ -1652,13 +1652,13 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
* Check the pipe occupancy without the inode lock first. This function
* is speculative anyways, so missing one is ok.
*/
- if (!pipe_empty(pipe->head, pipe->tail))
+ if (!pipe_is_empty(pipe))
return 0;
ret = 0;
pipe_lock(pipe);
- while (pipe_empty(pipe->head, pipe->tail)) {
+ while (pipe_is_empty(pipe)) {
if (signal_pending(current)) {
ret = -ERESTARTSYS;
break;
@@ -1688,13 +1688,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
* Check pipe occupancy without the inode lock first. This function
* is speculative anyways, so missing one is ok.
*/
- if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+ if (!pipe_is_full(pipe))
return 0;
ret = 0;
pipe_lock(pipe);
- while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
+ while (pipe_is_full(pipe)) {
if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
ret = -EPIPE;
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index 1d94bb784108..0bc96ab6580b 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -21,8 +21,7 @@
#define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */
-static const unsigned char VBSF_MOUNT_SIGNATURE[4] = { '\000', '\377', '\376',
- '\375' };
+static const unsigned char VBSF_MOUNT_SIGNATURE[4] __nonstring = "\000\377\376\375";
static int follow_symlinks;
module_param(follow_symlinks, int, 0444);
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3d33e17f2e5c..7839efe050bf 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -33,8 +33,6 @@ struct kmem_cache *xfs_extfree_item_cache;
struct workqueue_struct *xfs_alloc_wq;
-#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
-
#define XFSA_FIXUP_BNO_OK 1
#define XFSA_FIXUP_CNT_OK 2
@@ -410,8 +408,8 @@ xfs_alloc_compute_diff(
if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
if (newlen1 < newlen2 ||
(newlen1 == newlen2 &&
- XFS_ABSDIFF(newbno1, wantbno) >
- XFS_ABSDIFF(newbno2, wantbno)))
+ abs_diff(newbno1, wantbno) >
+ abs_diff(newbno2, wantbno)))
newbno1 = newbno2;
} else if (newbno2 != NULLAGBLOCK)
newbno1 = newbno2;
@@ -427,7 +425,7 @@ xfs_alloc_compute_diff(
} else
newbno1 = freeend - wantlen;
*newbnop = newbno1;
- return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
+ return newbno1 == NULLAGBLOCK ? 0 : abs_diff(newbno1, wantbno);
}
/*
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 15bb790359f8..5d560e9073f4 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -29,11 +29,6 @@ struct kmem_cache *xfs_buf_cache;
/*
* Locking orders
*
- * xfs_buf_ioacct_inc:
- * xfs_buf_ioacct_dec:
- * b_sema (caller holds)
- * b_lock
- *
* xfs_buf_stale:
* b_sema (caller holds)
* b_lock
@@ -82,51 +77,6 @@ xfs_buf_vmap_len(
}
/*
- * Bump the I/O in flight count on the buftarg if we haven't yet done so for
- * this buffer. The count is incremented once per buffer (per hold cycle)
- * because the corresponding decrement is deferred to buffer release. Buffers
- * can undergo I/O multiple times in a hold-release cycle and per buffer I/O
- * tracking adds unnecessary overhead. This is used for sychronization purposes
- * with unmount (see xfs_buftarg_drain()), so all we really need is a count of
- * in-flight buffers.
- *
- * Buffers that are never released (e.g., superblock, iclog buffers) must set
- * the XBF_NO_IOACCT flag before I/O submission. Otherwise, the buftarg count
- * never reaches zero and unmount hangs indefinitely.
- */
-static inline void
-xfs_buf_ioacct_inc(
- struct xfs_buf *bp)
-{
- if (bp->b_flags & XBF_NO_IOACCT)
- return;
-
- ASSERT(bp->b_flags & XBF_ASYNC);
- spin_lock(&bp->b_lock);
- if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
- bp->b_state |= XFS_BSTATE_IN_FLIGHT;
- percpu_counter_inc(&bp->b_target->bt_io_count);
- }
- spin_unlock(&bp->b_lock);
-}
-
-/*
- * Clear the in-flight state on a buffer about to be released to the LRU or
- * freed and unaccount from the buftarg.
- */
-static inline void
-__xfs_buf_ioacct_dec(
- struct xfs_buf *bp)
-{
- lockdep_assert_held(&bp->b_lock);
-
- if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
- bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
- percpu_counter_dec(&bp->b_target->bt_io_count);
- }
-}
-
-/*
* When we mark a buffer stale, we remove the buffer from the LRU and clear the
* b_lru_ref count so that the buffer is freed immediately when the buffer
* reference count falls to zero. If the buffer is already on the LRU, we need
@@ -149,15 +99,7 @@ xfs_buf_stale(
*/
bp->b_flags &= ~_XBF_DELWRI_Q;
- /*
- * Once the buffer is marked stale and unlocked, a subsequent lookup
- * could reset b_flags. There is no guarantee that the buffer is
- * unaccounted (released to LRU) before that occurs. Drop in-flight
- * status now to preserve accounting consistency.
- */
spin_lock(&bp->b_lock);
- __xfs_buf_ioacct_dec(bp);
-
atomic_set(&bp->b_lru_ref, 0);
if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
(list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru)))
@@ -794,18 +736,13 @@ out_put_perag:
int
_xfs_buf_read(
- struct xfs_buf *bp,
- xfs_buf_flags_t flags)
+ struct xfs_buf *bp)
{
- ASSERT(!(flags & XBF_WRITE));
ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE);
- bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
-
+ bp->b_flags |= XBF_READ;
xfs_buf_submit(bp);
- if (flags & XBF_ASYNC)
- return 0;
return xfs_buf_iowait(bp);
}
@@ -857,6 +794,8 @@ xfs_buf_read_map(
struct xfs_buf *bp;
int error;
+ ASSERT(!(flags & (XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD)));
+
flags |= XBF_READ;
*bpp = NULL;
@@ -870,21 +809,11 @@ xfs_buf_read_map(
/* Initiate the buffer read and wait. */
XFS_STATS_INC(target->bt_mount, xb_get_read);
bp->b_ops = ops;
- error = _xfs_buf_read(bp, flags);
-
- /* Readahead iodone already dropped the buffer, so exit. */
- if (flags & XBF_ASYNC)
- return 0;
+ error = _xfs_buf_read(bp);
} else {
/* Buffer already read; all we need to do is check it. */
error = xfs_buf_reverify(bp, ops);
- /* Readahead already finished; drop the buffer and exit. */
- if (flags & XBF_ASYNC) {
- xfs_buf_relse(bp);
- return 0;
- }
-
/* We do not want read in the flags */
bp->b_flags &= ~XBF_READ;
ASSERT(bp->b_ops != NULL || ops == NULL);
@@ -936,6 +865,7 @@ xfs_buf_readahead_map(
int nmaps,
const struct xfs_buf_ops *ops)
{
+ const xfs_buf_flags_t flags = XBF_READ | XBF_ASYNC | XBF_READ_AHEAD;
struct xfs_buf *bp;
/*
@@ -945,9 +875,21 @@ xfs_buf_readahead_map(
if (xfs_buftarg_is_mem(target))
return;
- xfs_buf_read_map(target, map, nmaps,
- XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
- __this_address);
+ if (xfs_buf_get_map(target, map, nmaps, flags | XBF_TRYLOCK, &bp))
+ return;
+ trace_xfs_buf_readahead(bp, 0, _RET_IP_);
+
+ if (bp->b_flags & XBF_DONE) {
+ xfs_buf_reverify(bp, ops);
+ xfs_buf_relse(bp);
+ return;
+ }
+ XFS_STATS_INC(target->bt_mount, xb_get_read);
+ bp->b_ops = ops;
+ bp->b_flags &= ~(XBF_WRITE | XBF_DONE);
+ bp->b_flags |= flags;
+ percpu_counter_inc(&target->bt_readahead_count);
+ xfs_buf_submit(bp);
}
/*
@@ -1003,10 +945,12 @@ xfs_buf_get_uncached(
struct xfs_buf *bp;
DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
+ /* there are currently no valid flags for xfs_buf_get_uncached */
+ ASSERT(flags == 0);
+
*bpp = NULL;
- /* flags might contain irrelevant bits, pass only what we care about */
- error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp);
+ error = _xfs_buf_alloc(target, &map, 1, flags, &bp);
if (error)
return error;
@@ -1060,7 +1004,6 @@ xfs_buf_rele_uncached(
spin_unlock(&bp->b_lock);
return;
}
- __xfs_buf_ioacct_dec(bp);
spin_unlock(&bp->b_lock);
xfs_buf_free(bp);
}
@@ -1079,20 +1022,12 @@ xfs_buf_rele_cached(
spin_lock(&bp->b_lock);
ASSERT(bp->b_hold >= 1);
if (bp->b_hold > 1) {
- /*
- * Drop the in-flight state if the buffer is already on the LRU
- * and it holds the only reference. This is racy because we
- * haven't acquired the pag lock, but the use of _XBF_IN_FLIGHT
- * ensures the decrement occurs only once per-buf.
- */
- if (--bp->b_hold == 1 && !list_empty(&bp->b_lru))
- __xfs_buf_ioacct_dec(bp);
+ bp->b_hold--;
goto out_unlock;
}
/* we are asked to drop the last reference */
- __xfs_buf_ioacct_dec(bp);
- if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
+ if (atomic_read(&bp->b_lru_ref)) {
/*
* If the buffer is added to the LRU, keep the reference to the
* buffer for the LRU and clear the (now stale) dispose list
@@ -1345,6 +1280,7 @@ xfs_buf_ioend_handle_error(
resubmit:
xfs_buf_ioerror(bp, 0);
bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL);
+ reinit_completion(&bp->b_iowait);
xfs_buf_submit(bp);
return true;
out_stale:
@@ -1355,8 +1291,9 @@ out_stale:
return false;
}
-static void
-xfs_buf_ioend(
+/* returns false if the caller needs to resubmit the I/O, else true */
+static bool
+__xfs_buf_ioend(
struct xfs_buf *bp)
{
trace_xfs_buf_iodone(bp, _RET_IP_);
@@ -1369,6 +1306,8 @@ xfs_buf_ioend(
bp->b_ops->verify_read(bp);
if (!bp->b_error)
bp->b_flags |= XBF_DONE;
+ if (bp->b_flags & XBF_READ_AHEAD)
+ percpu_counter_dec(&bp->b_target->bt_readahead_count);
} else {
if (!bp->b_error) {
bp->b_flags &= ~XBF_WRITE_FAIL;
@@ -1376,7 +1315,7 @@ xfs_buf_ioend(
}
if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp))
- return;
+ return false;
/* clear the retry state */
bp->b_last_error = 0;
@@ -1397,7 +1336,15 @@ xfs_buf_ioend(
bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD |
_XBF_LOGRECOVERY);
+ return true;
+}
+static void
+xfs_buf_ioend(
+ struct xfs_buf *bp)
+{
+ if (!__xfs_buf_ioend(bp))
+ return;
if (bp->b_flags & XBF_ASYNC)
xfs_buf_relse(bp);
else
@@ -1411,15 +1358,8 @@ xfs_buf_ioend_work(
struct xfs_buf *bp =
container_of(work, struct xfs_buf, b_ioend_work);
- xfs_buf_ioend(bp);
-}
-
-static void
-xfs_buf_ioend_async(
- struct xfs_buf *bp)
-{
- INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
- queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work);
+ if (__xfs_buf_ioend(bp))
+ xfs_buf_relse(bp);
}
void
@@ -1491,7 +1431,13 @@ xfs_buf_bio_end_io(
XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
xfs_buf_ioerror(bp, -EIO);
- xfs_buf_ioend_async(bp);
+ if (bp->b_flags & XBF_ASYNC) {
+ INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
+ queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work);
+ } else {
+ complete(&bp->b_iowait);
+ }
+
bio_put(bio);
}
@@ -1568,9 +1514,11 @@ xfs_buf_iowait(
{
ASSERT(!(bp->b_flags & XBF_ASYNC));
- trace_xfs_buf_iowait(bp, _RET_IP_);
- wait_for_completion(&bp->b_iowait);
- trace_xfs_buf_iowait_done(bp, _RET_IP_);
+ do {
+ trace_xfs_buf_iowait(bp, _RET_IP_);
+ wait_for_completion(&bp->b_iowait);
+ trace_xfs_buf_iowait_done(bp, _RET_IP_);
+ } while (!__xfs_buf_ioend(bp));
return bp->b_error;
}
@@ -1648,9 +1596,6 @@ xfs_buf_submit(
*/
bp->b_error = 0;
- if (bp->b_flags & XBF_ASYNC)
- xfs_buf_ioacct_inc(bp);
-
if ((bp->b_flags & XBF_WRITE) && !xfs_buf_verify_write(bp)) {
xfs_force_shutdown(bp->b_mount, SHUTDOWN_CORRUPT_INCORE);
xfs_buf_ioend(bp);
@@ -1776,9 +1721,8 @@ xfs_buftarg_wait(
struct xfs_buftarg *btp)
{
/*
- * First wait on the buftarg I/O count for all in-flight buffers to be
- * released. This is critical as new buffers do not make the LRU until
- * they are released.
+ * First wait for all in-flight readahead buffers to be released. This is
+ * critical as new buffers do not make the LRU until they are released.
*
* Next, flush the buffer workqueue to ensure all completion processing
* has finished. Just waiting on buffer locks is not sufficient for
@@ -1787,7 +1731,7 @@ xfs_buftarg_wait(
* all reference counts have been dropped before we start walking the
* LRU list.
*/
- while (percpu_counter_sum(&btp->bt_io_count))
+ while (percpu_counter_sum(&btp->bt_readahead_count))
delay(100);
flush_workqueue(btp->bt_mount->m_buf_workqueue);
}
@@ -1904,8 +1848,8 @@ xfs_destroy_buftarg(
struct xfs_buftarg *btp)
{
shrinker_free(btp->bt_shrinker);
- ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
- percpu_counter_destroy(&btp->bt_io_count);
+ ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0);
+ percpu_counter_destroy(&btp->bt_readahead_count);
list_lru_destroy(&btp->bt_lru);
}
@@ -1959,7 +1903,7 @@ xfs_init_buftarg(
if (list_lru_init(&btp->bt_lru))
return -ENOMEM;
- if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
+ if (percpu_counter_init(&btp->bt_readahead_count, 0, GFP_KERNEL))
goto out_destroy_lru;
btp->bt_shrinker =
@@ -1973,7 +1917,7 @@ xfs_init_buftarg(
return 0;
out_destroy_io_count:
- percpu_counter_destroy(&btp->bt_io_count);
+ percpu_counter_destroy(&btp->bt_readahead_count);
out_destroy_lru:
list_lru_destroy(&btp->bt_lru);
return -ENOMEM;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 3b4ed42e11c0..80e06eecaf56 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -27,7 +27,6 @@ struct xfs_buf;
#define XBF_READ (1u << 0) /* buffer intended for reading from device */
#define XBF_WRITE (1u << 1) /* buffer intended for writing to device */
#define XBF_READ_AHEAD (1u << 2) /* asynchronous read-ahead */
-#define XBF_NO_IOACCT (1u << 3) /* bypass I/O accounting (non-LRU bufs) */
#define XBF_ASYNC (1u << 4) /* initiator will not wait for completion */
#define XBF_DONE (1u << 5) /* all pages in the buffer uptodate */
#define XBF_STALE (1u << 6) /* buffer has been staled, do not find it */
@@ -58,7 +57,6 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_READ, "READ" }, \
{ XBF_WRITE, "WRITE" }, \
{ XBF_READ_AHEAD, "READ_AHEAD" }, \
- { XBF_NO_IOACCT, "NO_IOACCT" }, \
{ XBF_ASYNC, "ASYNC" }, \
{ XBF_DONE, "DONE" }, \
{ XBF_STALE, "STALE" }, \
@@ -77,7 +75,6 @@ typedef unsigned int xfs_buf_flags_t;
* Internal state flags.
*/
#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */
-#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */
struct xfs_buf_cache {
struct rhashtable bc_hash;
@@ -116,7 +113,7 @@ struct xfs_buftarg {
struct shrinker *bt_shrinker;
struct list_lru bt_lru;
- struct percpu_counter bt_io_count;
+ struct percpu_counter bt_readahead_count;
struct ratelimit_state bt_ioerror_rl;
/* Atomic write unit values */
@@ -291,7 +288,7 @@ int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
size_t numblks, xfs_buf_flags_t flags, struct xfs_buf **bpp,
const struct xfs_buf_ops *ops);
-int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags);
+int _xfs_buf_read(struct xfs_buf *bp);
void xfs_buf_hold(struct xfs_buf *bp);
/* Releasing Buffers */
diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c
index 07bebbfb16ee..5b64a2b3b113 100644
--- a/fs/xfs/xfs_buf_mem.c
+++ b/fs/xfs/xfs_buf_mem.c
@@ -117,7 +117,7 @@ xmbuf_free(
struct xfs_buftarg *btp)
{
ASSERT(xfs_buftarg_is_mem(btp));
- ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
+ ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0);
trace_xmbuf_free(btp);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f7a7d89c345e..9a435b1ff264 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1451,9 +1451,6 @@ xfs_dax_read_fault(
trace_xfs_read_fault(ip, order);
- ret = filemap_fsnotify_fault(vmf);
- if (unlikely(ret))
- return ret;
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
ret = xfs_dax_fault_locked(vmf, order, false);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
@@ -1482,16 +1479,6 @@ xfs_write_fault(
vm_fault_t ret;
trace_xfs_write_fault(ip, order);
- /*
- * Usually we get here from ->page_mkwrite callback but in case of DAX
- * we will get here also for ordinary write fault. Handle HSM
- * notifications for that case.
- */
- if (IS_DAX(inode)) {
- ret = filemap_fsnotify_fault(vmf);
- if (unlikely(ret))
- return ret;
- }
sb_start_pagefault(inode->i_sb);
file_update_time(vmf->vma->vm_file);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b3c27dbccce8..2f76531842f8 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3380,7 +3380,7 @@ xlog_do_recover(
*/
xfs_buf_lock(bp);
xfs_buf_hold(bp);
- error = _xfs_buf_read(bp, XBF_READ);
+ error = _xfs_buf_read(bp);
if (error) {
if (!xlog_is_shutdown(log)) {
xfs_buf_ioerror_alert(bp, __this_address);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 477c5262cf91..b69356582b86 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -181,14 +181,11 @@ xfs_readsb(
/*
* Allocate a (locked) buffer to hold the superblock. This will be kept
- * around at all times to optimize access to the superblock. Therefore,
- * set XBF_NO_IOACCT to make sure it doesn't hold the buftarg count
- * elevated.
+ * around at all times to optimize access to the superblock.
*/
reread:
error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
- BTOBB(sector_size), XBF_NO_IOACCT, &bp,
- buf_ops);
+ BTOBB(sector_size), 0, &bp, buf_ops);
if (error) {
if (loud)
xfs_warn(mp, "SB validate failed with error %d.", error);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index d8e6d073d64d..57bef567e011 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1407,7 +1407,7 @@ xfs_rtmount_readsb(
/* m_blkbb_log is not set up yet */
error = xfs_buf_read_uncached(mp->m_rtdev_targp, XFS_RTSB_DADDR,
- mp->m_sb.sb_blocksize >> BBSHIFT, XBF_NO_IOACCT, &bp,
+ mp->m_sb.sb_blocksize >> BBSHIFT, 0, &bp,
&xfs_rtsb_buf_ops);
if (error) {
xfs_warn(mp, "rt sb validate failed with error %d.", error);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index b29462363b81..bfc2f1249022 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -593,6 +593,7 @@ DEFINE_EVENT(xfs_buf_flags_class, name, \
DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
+DEFINE_BUF_FLAGS_EVENT(xfs_buf_readahead);
TRACE_EVENT(xfs_buf_ioerror,
TP_PROTO(struct xfs_buf *bp, int error, xfs_failaddr_t caller_ip),