summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2024-09-08 08:58:01 +0200
committerChristian Brauner <brauner@kernel.org>2024-09-12 11:58:46 +0200
commit24a988f75c8a5f16ef935c51039700e985767eb9 (patch)
treed071e54a8009b45461848dc550e0ee4800297b92
parent0f389adb4b80eef29920db2e2c99392aa5d06811 (diff)
parent11068e0b64cbb540b96e577fcca0926242ecaf58 (diff)
Merge patch series "file: remove f_version"
Christian Brauner <brauner@kernel.org> says: The f_version member in struct file isn't particularly well-defined. It is mainly used as a cookie to detect concurrent seeks when iterating directories. But it is also abused by some subsystems for completely unrelated things. It is mostly a directory specific thing that doesn't really need to live in struct file and with its wonky semantics it really lacks a specific function. For pipes, f_version is (ab)used to defer poll notifications until a write has happened. And struct pipe_inode_info is used by multiple struct files in their ->private_data so there's no chance of pushing that down into file->private_data without introducing another pointer indirection. But this should be a solvable problem. Only regular files with FMODE_ATOMIC_POS and directories require f_pos_lock. Pipes and other files don't. So this adds a union into struct file encompassing f_pos_lock and a pipe specific f_pipe member that pipes can use. This union of course can be extended to other file types and is similar to what we do in struct inode already. * patches from https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-0-6d3e4816aa7b@kernel.org: fs: remove f_version pipe: use f_pipe fs: add f_pipe ubifs: store cookie in private data ufs: store cookie in private data udf: store cookie in private data proc: store cookie in private data ocfs2: store cookie in private data input: remove f_version abuse ext4: store cookie in private data ext2: store cookie in private data affs: store cookie in private data fs: add generic_llseek_cookie() fs: use must_set_pos() fs: add must_set_pos() fs: add vfs_setpos_cookie() s390: remove unused f_version ceph: remove unused f_version adi: remove unused f_version file: remove pointless comment Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-0-6d3e4816aa7b@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--drivers/char/adi.c1
-rw-r--r--drivers/input/input.c47
-rw-r--r--drivers/s390/char/hmcdrv_dev.c3
-rw-r--r--fs/affs/dir.c44
-rw-r--r--fs/ceph/dir.c1
-rw-r--r--fs/ext2/dir.c28
-rw-r--r--fs/ext4/dir.c50
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/inline.c7
-rw-r--r--fs/file_table.c7
-rw-r--r--fs/ocfs2/dir.c3
-rw-r--r--fs/ocfs2/file.c11
-rw-r--r--fs/ocfs2/file.h1
-rw-r--r--fs/pipe.c8
-rw-r--r--fs/proc/base.c30
-rw-r--r--fs/read_write.c171
-rw-r--r--fs/ubifs/dir.c64
-rw-r--r--fs/udf/dir.c28
-rw-r--r--fs/ufs/dir.c28
-rw-r--r--include/linux/fs.h14
20 files changed, 405 insertions, 143 deletions
diff --git a/drivers/char/adi.c b/drivers/char/adi.c
index 751d7cc0da1b..c091a0282ad0 100644
--- a/drivers/char/adi.c
+++ b/drivers/char/adi.c
@@ -196,7 +196,6 @@ static loff_t adi_llseek(struct file *file, loff_t offset, int whence)
if (offset != file->f_pos) {
file->f_pos = offset;
- file->f_version = 0;
ret = offset;
}
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 54c57b267b25..19ea1888da9f 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1079,33 +1079,31 @@ static inline void input_wakeup_procfs_readers(void)
wake_up(&input_devices_poll_wait);
}
+struct input_seq_state {
+ unsigned short pos;
+ bool mutex_acquired;
+ int input_devices_state;
+};
+
static __poll_t input_proc_devices_poll(struct file *file, poll_table *wait)
{
+ struct seq_file *seq = file->private_data;
+ struct input_seq_state *state = seq->private;
+
poll_wait(file, &input_devices_poll_wait, wait);
- if (file->f_version != input_devices_state) {
- file->f_version = input_devices_state;
+ if (state->input_devices_state != input_devices_state) {
+ state->input_devices_state = input_devices_state;
return EPOLLIN | EPOLLRDNORM;
}
return 0;
}
-union input_seq_state {
- struct {
- unsigned short pos;
- bool mutex_acquired;
- };
- void *p;
-};
-
static void *input_devices_seq_start(struct seq_file *seq, loff_t *pos)
{
- union input_seq_state *state = (union input_seq_state *)&seq->private;
+ struct input_seq_state *state = seq->private;
int error;
- /* We need to fit into seq->private pointer */
- BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private));
-
error = mutex_lock_interruptible(&input_mutex);
if (error) {
state->mutex_acquired = false;
@@ -1124,7 +1122,7 @@ static void *input_devices_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void input_seq_stop(struct seq_file *seq, void *v)
{
- union input_seq_state *state = (union input_seq_state *)&seq->private;
+ struct input_seq_state *state = seq->private;
if (state->mutex_acquired)
mutex_unlock(&input_mutex);
@@ -1210,7 +1208,8 @@ static const struct seq_operations input_devices_seq_ops = {
static int input_proc_devices_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &input_devices_seq_ops);
+ return seq_open_private(file, &input_devices_seq_ops,
+ sizeof(struct input_seq_state));
}
static const struct proc_ops input_devices_proc_ops = {
@@ -1218,17 +1217,14 @@ static const struct proc_ops input_devices_proc_ops = {
.proc_poll = input_proc_devices_poll,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
- .proc_release = seq_release,
+ .proc_release = seq_release_private,
};
static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos)
{
- union input_seq_state *state = (union input_seq_state *)&seq->private;
+ struct input_seq_state *state = seq->private;
int error;
- /* We need to fit into seq->private pointer */
- BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private));
-
error = mutex_lock_interruptible(&input_mutex);
if (error) {
state->mutex_acquired = false;
@@ -1243,7 +1239,7 @@ static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos)
static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- union input_seq_state *state = (union input_seq_state *)&seq->private;
+ struct input_seq_state *state = seq->private;
state->pos = *pos + 1;
return seq_list_next(v, &input_handler_list, pos);
@@ -1252,7 +1248,7 @@ static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static int input_handlers_seq_show(struct seq_file *seq, void *v)
{
struct input_handler *handler = container_of(v, struct input_handler, node);
- union input_seq_state *state = (union input_seq_state *)&seq->private;
+ struct input_seq_state *state = seq->private;
seq_printf(seq, "N: Number=%u Name=%s", state->pos, handler->name);
if (handler->filter)
@@ -1273,14 +1269,15 @@ static const struct seq_operations input_handlers_seq_ops = {
static int input_proc_handlers_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &input_handlers_seq_ops);
+ return seq_open_private(file, &input_handlers_seq_ops,
+ sizeof(struct input_seq_state));
}
static const struct proc_ops input_handlers_proc_ops = {
.proc_open = input_proc_handlers_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
- .proc_release = seq_release,
+ .proc_release = seq_release_private,
};
static int __init input_proc_init(void)
diff --git a/drivers/s390/char/hmcdrv_dev.c b/drivers/s390/char/hmcdrv_dev.c
index 8d50c894711f..e069dd685899 100644
--- a/drivers/s390/char/hmcdrv_dev.c
+++ b/drivers/s390/char/hmcdrv_dev.c
@@ -186,9 +186,6 @@ static loff_t hmcdrv_dev_seek(struct file *fp, loff_t pos, int whence)
if (pos < 0)
return -EINVAL;
- if (fp->f_pos != pos)
- ++fp->f_version;
-
fp->f_pos = pos;
return pos;
}
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index b2bf7016e1b3..bd40d5f08810 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -17,13 +17,44 @@
#include <linux/iversion.h>
#include "affs.h"
+struct affs_dir_data {
+ unsigned long ino;
+ u64 cookie;
+};
+
static int affs_readdir(struct file *, struct dir_context *);
+static loff_t affs_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct affs_dir_data *data = file->private_data;
+
+ return generic_llseek_cookie(file, offset, whence, &data->cookie);
+}
+
+static int affs_dir_open(struct inode *inode, struct file *file)
+{
+ struct affs_dir_data *data;
+
+ data = kzalloc(sizeof(struct affs_dir_data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ file->private_data = data;
+ return 0;
+}
+
+static int affs_dir_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
const struct file_operations affs_dir_operations = {
+ .open = affs_dir_open,
.read = generic_read_dir,
- .llseek = generic_file_llseek,
+ .llseek = affs_dir_llseek,
.iterate_shared = affs_readdir,
.fsync = affs_file_fsync,
+ .release = affs_dir_release,
};
/*
@@ -45,6 +76,7 @@ static int
affs_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
+ struct affs_dir_data *data = file->private_data;
struct super_block *sb = inode->i_sb;
struct buffer_head *dir_bh = NULL;
struct buffer_head *fh_bh = NULL;
@@ -59,7 +91,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
pr_debug("%s(ino=%lu,f_pos=%llx)\n", __func__, inode->i_ino, ctx->pos);
if (ctx->pos < 2) {
- file->private_data = (void *)0;
+ data->ino = 0;
if (!dir_emit_dots(file, ctx))
return 0;
}
@@ -80,8 +112,8 @@ affs_readdir(struct file *file, struct dir_context *ctx)
/* If the directory hasn't changed since the last call to readdir(),
* we can jump directly to where we left off.
*/
- ino = (u32)(long)file->private_data;
- if (ino && inode_eq_iversion(inode, file->f_version)) {
+ ino = data->ino;
+ if (ino && inode_eq_iversion(inode, data->cookie)) {
pr_debug("readdir() left off=%d\n", ino);
goto inside;
}
@@ -131,8 +163,8 @@ inside:
} while (ino);
}
done:
- file->f_version = inode_query_iversion(inode);
- file->private_data = (void *)(long)ino;
+ data->cookie = inode_query_iversion(inode);
+ data->ino = ino;
affs_brelse(fh_bh);
out_brelse_dir:
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 18c72b305858..ddec8c9244ee 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -707,7 +707,6 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
if (offset != file->f_pos) {
file->f_pos = offset;
- file->f_version = 0;
dfi->file_info.flags &= ~CEPH_F_ATEND;
}
retval = offset;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 087457061c6e..6622c582f550 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -263,7 +263,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
unsigned long n = pos >> PAGE_SHIFT;
unsigned long npages = dir_pages(inode);
unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
- bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
+ bool need_revalidate = !inode_eq_iversion(inode, *(u64 *)file->private_data);
bool has_filetype;
if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
@@ -290,7 +290,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
offset = ext2_validate_entry(kaddr, offset, chunk_mask);
ctx->pos = (n<<PAGE_SHIFT) + offset;
}
- file->f_version = inode_query_iversion(inode);
+ *(u64 *)file->private_data = inode_query_iversion(inode);
need_revalidate = false;
}
de = (ext2_dirent *)(kaddr+offset);
@@ -703,8 +703,30 @@ not_empty:
return 0;
}
+static int ext2_dir_open(struct inode *inode, struct file *file)
+{
+ file->private_data = kzalloc(sizeof(u64), GFP_KERNEL);
+ if (!file->private_data)
+ return -ENOMEM;
+ return 0;
+}
+
+static int ext2_dir_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
+static loff_t ext2_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ return generic_llseek_cookie(file, offset, whence,
+ (u64 *)file->private_data);
+}
+
const struct file_operations ext2_dir_operations = {
- .llseek = generic_file_llseek,
+ .open = ext2_dir_open,
+ .release = ext2_dir_release,
+ .llseek = ext2_dir_llseek,
.read = generic_read_dir,
.iterate_shared = ext2_readdir,
.unlocked_ioctl = ext2_ioctl,
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ff4514e4626b..13196afe55ce 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -133,6 +133,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct super_block *sb = inode->i_sb;
struct buffer_head *bh = NULL;
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
+ struct dir_private_info *info = file->private_data;
err = fscrypt_prepare_readdir(inode);
if (err)
@@ -229,7 +230,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
* readdir(2), then we might be pointing to an invalid
* dirent right now. Scan from the start of the block
* to make sure. */
- if (!inode_eq_iversion(inode, file->f_version)) {
+ if (!inode_eq_iversion(inode, info->cookie)) {
for (i = 0; i < sb->s_blocksize && i < offset; ) {
de = (struct ext4_dir_entry_2 *)
(bh->b_data + i);
@@ -249,7 +250,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
offset = i;
ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1))
| offset;
- file->f_version = inode_query_iversion(inode);
+ info->cookie = inode_query_iversion(inode);
}
while (ctx->pos < inode->i_size
@@ -384,6 +385,7 @@ static inline loff_t ext4_get_htree_eof(struct file *filp)
static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
+ struct dir_private_info *info = file->private_data;
int dx_dir = is_dx_dir(inode);
loff_t ret, htree_max = ext4_get_htree_eof(file);
@@ -392,7 +394,7 @@ static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
htree_max, htree_max);
else
ret = ext4_llseek(file, offset, whence);
- file->f_version = inode_peek_iversion(inode) - 1;
+ info->cookie = inode_peek_iversion(inode) - 1;
return ret;
}
@@ -429,18 +431,15 @@ static void free_rb_tree_fname(struct rb_root *root)
*root = RB_ROOT;
}
-
-static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
- loff_t pos)
+static void ext4_htree_init_dir_info(struct file *filp, loff_t pos)
{
- struct dir_private_info *p;
-
- p = kzalloc(sizeof(*p), GFP_KERNEL);
- if (!p)
- return NULL;
- p->curr_hash = pos2maj_hash(filp, pos);
- p->curr_minor_hash = pos2min_hash(filp, pos);
- return p;
+ struct dir_private_info *p = filp->private_data;
+
+ if (is_dx_dir(file_inode(filp)) && !p->initialized) {
+ p->curr_hash = pos2maj_hash(filp, pos);
+ p->curr_minor_hash = pos2min_hash(filp, pos);
+ p->initialized = true;
+ }
}
void ext4_htree_free_dir_info(struct dir_private_info *p)
@@ -552,12 +551,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
struct fname *fname;
int ret = 0;
- if (!info) {
- info = ext4_htree_create_dir_info(file, ctx->pos);
- if (!info)
- return -ENOMEM;
- file->private_data = info;
- }
+ ext4_htree_init_dir_info(file, ctx->pos);
if (ctx->pos == ext4_get_htree_eof(file))
return 0; /* EOF */
@@ -590,10 +584,10 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
* cached entries.
*/
if ((!info->curr_node) ||
- !inode_eq_iversion(inode, file->f_version)) {
+ !inode_eq_iversion(inode, info->cookie)) {
info->curr_node = NULL;
free_rb_tree_fname(&info->root);
- file->f_version = inode_query_iversion(inode);
+ info->cookie = inode_query_iversion(inode);
ret = ext4_htree_fill_tree(file, info->curr_hash,
info->curr_minor_hash,
&info->next_hash);
@@ -664,7 +658,19 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
return 0;
}
+static int ext4_dir_open(struct inode *inode, struct file *file)
+{
+ struct dir_private_info *info;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+ file->private_data = info;
+ return 0;
+}
+
const struct file_operations ext4_dir_operations = {
+ .open = ext4_dir_open,
.llseek = ext4_dir_llseek,
.read = generic_read_dir,
.iterate_shared = ext4_readdir,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 08acd152261e..d62a4b9b26ce 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2553,6 +2553,8 @@ struct dir_private_info {
__u32 curr_hash;
__u32 curr_minor_hash;
__u32 next_hash;
+ u64 cookie;
+ bool initialized;
};
/* calculate the first block number of the group */
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index e7a09a99837b..4282e12dc405 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1460,6 +1460,7 @@ int ext4_read_inline_dir(struct file *file,
struct ext4_iloc iloc;
void *dir_buf = NULL;
int dotdot_offset, dotdot_size, extra_offset, extra_size;
+ struct dir_private_info *info = file->private_data;
ret = ext4_get_inode_loc(inode, &iloc);
if (ret)
@@ -1503,12 +1504,12 @@ int ext4_read_inline_dir(struct file *file,
extra_size = extra_offset + inline_size;
/*
- * If the version has changed since the last call to
+ * If the cookie has changed since the last call to
* readdir(2), then we might be pointing to an invalid
* dirent right now. Scan from the start of the inline
* dir to make sure.
*/
- if (!inode_eq_iversion(inode, file->f_version)) {
+ if (!inode_eq_iversion(inode, info->cookie)) {
for (i = 0; i < extra_size && i < offset;) {
/*
* "." is with offset 0 and
@@ -1540,7 +1541,7 @@ int ext4_read_inline_dir(struct file *file,
}
offset = i;
ctx->pos = offset;
- file->f_version = inode_query_iversion(inode);
+ info->cookie = inode_query_iversion(inode);
}
while (ctx->pos < extra_size) {
diff --git a/fs/file_table.c b/fs/file_table.c
index 3ef558f27a1c..7ce4d5dac080 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -156,6 +156,13 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
}
spin_lock_init(&f->f_lock);
+ /*
+ * Note that f_pos_lock is only used for files raising
+ * FMODE_ATOMIC_POS and directories. Other files such as pipes
+ * don't need it and since f_pos_lock is in a union may reuse
+ * the space for other purposes. They are expected to initialize
+ * the respective member when opening the file.
+ */
mutex_init(&f->f_pos_lock);
f->f_flags = flags;
f->f_mode = OPEN_FMODE(flags);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index f0beb173dbba..ccef3f42b333 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1932,6 +1932,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)
{
int error = 0;
struct inode *inode = file_inode(file);
+ struct ocfs2_file_private *fp = file->private_data;
int lock_level = 0;
trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -1952,7 +1953,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)
goto bail_nolock;
}
- error = ocfs2_dir_foreach_blk(inode, &file->f_version, ctx, false);
+ error = ocfs2_dir_foreach_blk(inode, &fp->cookie, ctx, false);
ocfs2_inode_unlock(inode, lock_level);
if (error)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ccc57038a977..115ab2172820 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2750,6 +2750,13 @@ out_unlock:
return remapped > 0 ? remapped : ret;
}
+static loff_t ocfs2_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct ocfs2_file_private *fp = file->private_data;
+
+ return generic_llseek_cookie(file, offset, whence, &fp->cookie);
+}
+
const struct inode_operations ocfs2_file_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
@@ -2797,7 +2804,7 @@ const struct file_operations ocfs2_fops = {
WRAP_DIR_ITER(ocfs2_readdir) // FIXME!
const struct file_operations ocfs2_dops = {
- .llseek = generic_file_llseek,
+ .llseek = ocfs2_dir_llseek,
.read = generic_read_dir,
.iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file,
@@ -2843,7 +2850,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
};
const struct file_operations ocfs2_dops_no_plocks = {
- .llseek = generic_file_llseek,
+ .llseek = ocfs2_dir_llseek,
.read = generic_read_dir,
.iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file,
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 8e53e4ac1120..41e65e45a9f3 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -20,6 +20,7 @@ struct ocfs2_alloc_context;
enum ocfs2_alloc_restarted;
struct ocfs2_file_private {
+ u64 cookie;
struct file *fp_file;
struct mutex fp_mutex;
struct ocfs2_lock_res fp_flock;
diff --git a/fs/pipe.c b/fs/pipe.c
index 7dff2aa50a6d..d3735f1d6f00 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -686,7 +686,7 @@ pipe_poll(struct file *filp, poll_table *wait)
if (filp->f_mode & FMODE_READ) {
if (!pipe_empty(head, tail))
mask |= EPOLLIN | EPOLLRDNORM;
- if (!pipe->writers && filp->f_version != pipe->w_counter)
+ if (!pipe->writers && filp->f_pipe != pipe->w_counter)
mask |= EPOLLHUP;
}
@@ -945,6 +945,7 @@ int create_pipe_files(struct file **res, int flags)
}
f->private_data = inode->i_pipe;
+ f->f_pipe = 0;
res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
&pipefifo_fops);
@@ -954,6 +955,7 @@ int create_pipe_files(struct file **res, int flags)
return PTR_ERR(res[0]);
}
res[0]->private_data = inode->i_pipe;
+ res[0]->f_pipe = 0;
res[1] = f;
stream_open(inode, res[0]);
stream_open(inode, res[1]);
@@ -1108,7 +1110,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
int ret;
- filp->f_version = 0;
+ filp->f_pipe = 0;
spin_lock(&inode->i_lock);
if (inode->i_pipe) {
@@ -1155,7 +1157,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
if ((filp->f_flags & O_NONBLOCK)) {
/* suppress EPOLLHUP until we have
* seen a writer */
- filp->f_version = pipe->w_counter;
+ filp->f_pipe = pipe->w_counter;
} else {
if (wait_for_partner(pipe, &pipe->w_counter))
goto err_rd;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 72a1acd03675..988f6bbfc4bd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3870,12 +3870,12 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
if (!dir_emit_dots(file, ctx))
return 0;
- /* f_version caches the tgid value that the last readdir call couldn't
- * return. lseek aka telldir automagically resets f_version to 0.
+ /* We cache the tgid value that the last readdir call couldn't
+ * return and lseek resets it to 0.
*/
ns = proc_pid_ns(inode->i_sb);
- tid = (int)file->f_version;
- file->f_version = 0;
+ tid = (int)(intptr_t)file->private_data;
+ file->private_data = NULL;
for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
task;
task = next_tid(task), ctx->pos++) {
@@ -3890,7 +3890,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
proc_task_instantiate, task, NULL)) {
/* returning this tgid failed, save it as the first
* pid for the next readir call */
- file->f_version = (u64)tid;
+ file->private_data = (void *)(intptr_t)tid;
put_task_struct(task);
break;
}
@@ -3915,6 +3915,24 @@ static int proc_task_getattr(struct mnt_idmap *idmap,
return 0;
}
+/*
+ * proc_task_readdir() set @file->private_data to a positive integer
+ * value, so casting that to u64 is safe. generic_llseek_cookie() will
+ * set @cookie to 0, so casting to an int is safe. The WARN_ON_ONCE() is
+ * here to catch any unexpected change in behavior either in
+ * proc_task_readdir() or generic_llseek_cookie().
+ */
+static loff_t proc_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ u64 cookie = (u64)(intptr_t)file->private_data;
+ loff_t off;
+
+ off = generic_llseek_cookie(file, offset, whence, &cookie);
+ WARN_ON_ONCE(cookie > INT_MAX);
+ file->private_data = (void *)(intptr_t)cookie; /* serialized by f_pos_lock */
+ return off;
+}
+
static const struct inode_operations proc_task_inode_operations = {
.lookup = proc_task_lookup,
.getattr = proc_task_getattr,
@@ -3925,7 +3943,7 @@ static const struct inode_operations proc_task_inode_operations = {
static const struct file_operations proc_task_operations = {
.read = generic_read_dir,
.iterate_shared = proc_task_readdir,
- .llseek = generic_file_llseek,
+ .llseek = proc_dir_llseek,
};
void __init set_proc_pid_nlink(void)
diff --git a/fs/read_write.c b/fs/read_write.c
index 90e283b31ca1..b19cce8e55b9 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -40,18 +40,20 @@ static inline bool unsigned_offsets(struct file *file)
}
/**
- * vfs_setpos - update the file offset for lseek
+ * vfs_setpos_cookie - update the file offset for lseek and reset cookie
* @file: file structure in question
* @offset: file offset to seek to
* @maxsize: maximum file size
+ * @cookie: cookie to reset
*
- * This is a low-level filesystem helper for updating the file offset to
- * the value specified by @offset if the given offset is valid and it is
- * not equal to the current file offset.
+ * Update the file offset to the value specified by @offset if the given
+ * offset is valid and it is not equal to the current file offset and
+ * reset the specified cookie to indicate that a seek happened.
*
* Return the specified offset on success and -EINVAL on invalid offset.
*/
-loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
+static loff_t vfs_setpos_cookie(struct file *file, loff_t offset,
+ loff_t maxsize, u64 *cookie)
{
if (offset < 0 && !unsigned_offsets(file))
return -EINVAL;
@@ -60,35 +62,48 @@ loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
if (offset != file->f_pos) {
file->f_pos = offset;
- file->f_version = 0;
+ if (cookie)
+ *cookie = 0;
}
return offset;
}
-EXPORT_SYMBOL(vfs_setpos);
/**
- * generic_file_llseek_size - generic llseek implementation for regular files
- * @file: file structure to seek on
+ * vfs_setpos - update the file offset for lseek
+ * @file: file structure in question
* @offset: file offset to seek to
- * @whence: type of seek
- * @maxsize: max size of this file in file system
- * @eof: offset used for SEEK_END position
+ * @maxsize: maximum file size
*
- * This is a variant of generic_file_llseek that allows passing in a custom
- * maximum file size and a custom EOF position, for e.g. hashed directories
+ * This is a low-level filesystem helper for updating the file offset to
+ * the value specified by @offset if the given offset is valid and it is
+ * not equal to the current file offset.
*
- * Synchronization:
- * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
- * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
- * read/writes behave like SEEK_SET against seeks.
+ * Return the specified offset on success and -EINVAL on invalid offset.
*/
-loff_t
-generic_file_llseek_size(struct file *file, loff_t offset, int whence,
- loff_t maxsize, loff_t eof)
+loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
+{
+ return vfs_setpos_cookie(file, offset, maxsize, NULL);
+}
+EXPORT_SYMBOL(vfs_setpos);
+
+/**
+ * must_set_pos - check whether f_pos has to be updated
+ * @file: file to seek on
+ * @offset: offset to use
+ * @whence: type of seek operation
+ * @eof: end of file
+ *
+ * Check whether f_pos needs to be updated and update @offset according
+ * to @whence.
+ *
+ * Return: 0 if f_pos doesn't need to be updated, 1 if f_pos has to be
+ * updated, and negative error code on failure.
+ */
+static int must_set_pos(struct file *file, loff_t *offset, int whence, loff_t eof)
{
switch (whence) {
case SEEK_END:
- offset += eof;
+ *offset += eof;
break;
case SEEK_CUR:
/*
@@ -97,23 +112,17 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
* f_pos value back to the file because a concurrent read(),
* write() or lseek() might have altered it
*/
- if (offset == 0)
- return file->f_pos;
- /*
- * f_lock protects against read/modify/write race with other
- * SEEK_CURs. Note that parallel writes and reads behave
- * like SEEK_SET.
- */
- spin_lock(&file->f_lock);
- offset = vfs_setpos(file, file->f_pos + offset, maxsize);
- spin_unlock(&file->f_lock);
- return offset;
+ if (*offset == 0) {
+ *offset = file->f_pos;
+ return 0;
+ }
+ break;
case SEEK_DATA:
/*
* In the generic case the entire file is data, so as long as
* offset isn't at the end of the file then the offset is data.
*/
- if ((unsigned long long)offset >= eof)
+ if ((unsigned long long)*offset >= eof)
return -ENXIO;
break;
case SEEK_HOLE:
@@ -121,17 +130,103 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
* There is a virtual hole at the end of the file, so as long as
* offset isn't i_size or larger, return i_size.
*/
- if ((unsigned long long)offset >= eof)
+ if ((unsigned long long)*offset >= eof)
return -ENXIO;
- offset = eof;
+ *offset = eof;
break;
}
+ return 1;
+}
+
+/**
+ * generic_file_llseek_size - generic llseek implementation for regular files
+ * @file: file structure to seek on
+ * @offset: file offset to seek to
+ * @whence: type of seek
+ * @maxsize: max size of this file in file system
+ * @eof: offset used for SEEK_END position
+ *
+ * This is a variant of generic_file_llseek that allows passing in a custom
+ * maximum file size and a custom EOF position, for e.g. hashed directories
+ *
+ * Synchronization:
+ * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
+ * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
+ * read/writes behave like SEEK_SET against seeks.
+ */
+loff_t
+generic_file_llseek_size(struct file *file, loff_t offset, int whence,
+ loff_t maxsize, loff_t eof)
+{
+ int ret;
+
+ ret = must_set_pos(file, &offset, whence, eof);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return offset;
+
+ if (whence == SEEK_CUR) {
+ /*
+ * f_lock protects against read/modify/write race with
+ * other SEEK_CURs. Note that parallel writes and reads
+ * behave like SEEK_SET.
+ */
+ guard(spinlock)(&file->f_lock);
+ return vfs_setpos(file, file->f_pos + offset, maxsize);
+ }
+
return vfs_setpos(file, offset, maxsize);
}
EXPORT_SYMBOL(generic_file_llseek_size);
/**
+ * generic_llseek_cookie - versioned llseek implementation
+ * @file: file structure to seek on
+ * @offset: file offset to seek to
+ * @whence: type of seek
+ * @cookie: cookie to update
+ *
+ * See generic_file_llseek for a general description and locking assumptions.
+ *
+ * In contrast to generic_file_llseek, this function also resets a
+ * specified cookie to indicate a seek took place.
+ */
+loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence,
+ u64 *cookie)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t maxsize = inode->i_sb->s_maxbytes;
+ loff_t eof = i_size_read(inode);
+ int ret;
+
+ if (WARN_ON_ONCE(!cookie))
+ return -EINVAL;
+
+ /*
+ * Require that this is only used for directories that guarantee
+ * synchronization between readdir and seek so that an update to
+ * @cookie is correctly synchronized with concurrent readdir.
+ */
+ if (WARN_ON_ONCE(!(file->f_mode & FMODE_ATOMIC_POS)))
+ return -EINVAL;
+
+ ret = must_set_pos(file, &offset, whence, eof);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return offset;
+
+ /* No need to hold f_lock because we know that f_pos_lock is held. */
+ if (whence == SEEK_CUR)
+ return vfs_setpos_cookie(file, file->f_pos + offset, maxsize, cookie);
+
+ return vfs_setpos_cookie(file, offset, maxsize, cookie);
+}
+EXPORT_SYMBOL(generic_llseek_cookie);
+
+/**
* generic_file_llseek - generic llseek implementation for regular files
* @file: file structure to seek on
* @offset: file offset to seek to
@@ -270,10 +365,8 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
}
retval = -EINVAL;
if (offset >= 0 || unsigned_offsets(file)) {
- if (offset != file->f_pos) {
+ if (offset != file->f_pos)
file->f_pos = offset;
- file->f_version = 0;
- }
retval = offset;
}
out:
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index c77ea57fe696..fda82f3e16e8 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -555,6 +555,11 @@ static unsigned int vfs_dent_type(uint8_t type)
return 0;
}
+struct ubifs_dir_data {
+ struct ubifs_dent_node *dent;
+ u64 cookie;
+};
+
/*
* The classical Unix view for directory is that it is a linear array of
* (name, inode number) entries. Linux/VFS assumes this model as well.
@@ -582,6 +587,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
struct inode *dir = file_inode(file);
struct ubifs_info *c = dir->i_sb->s_fs_info;
bool encrypted = IS_ENCRYPTED(dir);
+ struct ubifs_dir_data *data = file->private_data;
dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos);
@@ -604,27 +610,27 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
fstr_real_len = fstr.len;
}
- if (file->f_version == 0) {
+ if (data->cookie == 0) {
/*
- * The file was seek'ed, which means that @file->private_data
+ * The file was seek'ed, which means that @data->dent
* is now invalid. This may also be just the first
* 'ubifs_readdir()' invocation, in which case
- * @file->private_data is NULL, and the below code is
+ * @data->dent is NULL, and the below code is
* basically a no-op.
*/
- kfree(file->private_data);
- file->private_data = NULL;
+ kfree(data->dent);
+ data->dent = NULL;
}
/*
- * 'generic_file_llseek()' unconditionally sets @file->f_version to
- * zero, and we use this for detecting whether the file was seek'ed.
+ * 'ubifs_dir_llseek()' sets @data->cookie to zero, and we use this
+ * for detecting whether the file was seek'ed.
*/
- file->f_version = 1;
+ data->cookie = 1;
/* File positions 0 and 1 correspond to "." and ".." */
if (ctx->pos < 2) {
- ubifs_assert(c, !file->private_data);
+ ubifs_assert(c, !data->dent);
if (!dir_emit_dots(file, ctx)) {
if (encrypted)
fscrypt_fname_free_buffer(&fstr);
@@ -641,10 +647,10 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
}
ctx->pos = key_hash_flash(c, &dent->key);
- file->private_data = dent;
+ data->dent = dent;
}
- dent = file->private_data;
+ dent = data->dent;
if (!dent) {
/*
* The directory was seek'ed to and is now readdir'ed.
@@ -658,7 +664,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
goto out;
}
ctx->pos = key_hash_flash(c, &dent->key);
- file->private_data = dent;
+ data->dent = dent;
}
while (1) {
@@ -701,15 +707,15 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
goto out;
}
- kfree(file->private_data);
+ kfree(data->dent);
ctx->pos = key_hash_flash(c, &dent->key);
- file->private_data = dent;
+ data->dent = dent;
cond_resched();
}
out:
- kfree(file->private_data);
- file->private_data = NULL;
+ kfree(data->dent);
+ data->dent = NULL;
if (encrypted)
fscrypt_fname_free_buffer(&fstr);
@@ -733,7 +739,10 @@ out:
/* Free saved readdir() state when the directory is closed */
static int ubifs_dir_release(struct inode *dir, struct file *file)
{
- kfree(file->private_data);
+ struct ubifs_dir_data *data = file->private_data;
+
+ kfree(data->dent);
+ kfree(data);
file->private_data = NULL;
return 0;
}
@@ -1712,6 +1721,24 @@ int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path,
return 0;
}
+static int ubifs_dir_open(struct inode *inode, struct file *file)
+{
+ struct ubifs_dir_data *data;
+
+ data = kzalloc(sizeof(struct ubifs_dir_data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ file->private_data = data;
+ return 0;
+}
+
+static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct ubifs_dir_data *data = file->private_data;
+
+ return generic_llseek_cookie(file, offset, whence, &data->cookie);
+}
+
const struct inode_operations ubifs_dir_inode_operations = {
.lookup = ubifs_lookup,
.create = ubifs_create,
@@ -1732,7 +1759,8 @@ const struct inode_operations ubifs_dir_inode_operations = {
};
const struct file_operations ubifs_dir_operations = {
- .llseek = generic_file_llseek,
+ .open = ubifs_dir_open,
+ .llseek = ubifs_dir_llseek,
.release = ubifs_dir_release,
.read = generic_read_dir,
.iterate_shared = ubifs_readdir,
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index f94f45fe2c91..5023dfe191e8 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -60,7 +60,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
* identifying beginning of dir entry (names are under user control),
* we need to scan the directory from the beginning.
*/
- if (!inode_eq_iversion(dir, file->f_version)) {
+ if (!inode_eq_iversion(dir, *(u64 *)file->private_data)) {
emit_pos = nf_pos;
nf_pos = 0;
} else {
@@ -122,15 +122,37 @@ out_iter:
udf_fiiter_release(&iter);
out:
if (pos_valid)
- file->f_version = inode_query_iversion(dir);
+ *(u64 *)file->private_data = inode_query_iversion(dir);
kfree(fname);
return ret;
}
+static int udf_dir_open(struct inode *inode, struct file *file)
+{
+ file->private_data = kzalloc(sizeof(u64), GFP_KERNEL);
+ if (!file->private_data)
+ return -ENOMEM;
+ return 0;
+}
+
+static int udf_dir_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
+static loff_t udf_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ return generic_llseek_cookie(file, offset, whence,
+ (u64 *)file->private_data);
+}
+
/* readdir and lookup functions */
const struct file_operations udf_dir_operations = {
- .llseek = generic_file_llseek,
+ .open = udf_dir_open,
+ .release = udf_dir_release,
+ .llseek = udf_dir_llseek,
.read = generic_read_dir,
.iterate_shared = udf_readdir,
.unlocked_ioctl = udf_ioctl,
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 61f25d3cf3f7..335f0ae529b4 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -435,7 +435,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
unsigned long n = pos >> PAGE_SHIFT;
unsigned long npages = dir_pages(inode);
unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
- bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
+ bool need_revalidate = !inode_eq_iversion(inode, *(u64 *)file->private_data);
unsigned flags = UFS_SB(sb)->s_flags;
UFSD("BEGIN\n");
@@ -462,7 +462,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask);
ctx->pos = (n<<PAGE_SHIFT) + offset;
}
- file->f_version = inode_query_iversion(inode);
+ *(u64 *)file->private_data = inode_query_iversion(inode);
need_revalidate = false;
}
de = (struct ufs_dir_entry *)(kaddr+offset);
@@ -646,9 +646,31 @@ not_empty:
return 0;
}
+static int ufs_dir_open(struct inode *inode, struct file *file)
+{
+ file->private_data = kzalloc(sizeof(u64), GFP_KERNEL);
+ if (!file->private_data)
+ return -ENOMEM;
+ return 0;
+}
+
+static int ufs_dir_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
+static loff_t ufs_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ return generic_llseek_cookie(file, offset, whence,
+ (u64 *)file->private_data);
+}
+
const struct file_operations ufs_dir_operations = {
+ .open = ufs_dir_open,
+ .release = ufs_dir_release,
.read = generic_read_dir,
.iterate_shared = ufs_readdir,
.fsync = generic_file_fsync,
- .llseek = generic_file_llseek,
+ .llseek = ufs_dir_llseek,
};
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 58c91a52cad1..7e11ce172140 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1001,8 +1001,8 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
* @f_cred: stashed credentials of creator/opener
* @f_path: path of the file
* @f_pos_lock: lock protecting file position
+ * @f_pipe: specific to pipes
* @f_pos: file position
- * @f_version: file version
* @f_security: LSM security context of this file
* @f_owner: file owner
* @f_wb_err: writeback error
@@ -1026,13 +1026,17 @@ struct file {
const struct cred *f_cred;
/* --- cacheline 1 boundary (64 bytes) --- */
struct path f_path;
- struct mutex f_pos_lock;
+ union {
+ /* regular files (with FMODE_ATOMIC_POS) and directories */
+ struct mutex f_pos_lock;
+ /* pipes */
+ u64 f_pipe;
+ };
loff_t f_pos;
- u64 f_version;
- /* --- cacheline 2 boundary (128 bytes) --- */
#ifdef CONFIG_SECURITY
void *f_security;
#endif
+ /* --- cacheline 2 boundary (128 bytes) --- */
struct fown_struct *f_owner;
errseq_t f_wb_err;
errseq_t f_sb_err;
@@ -3202,6 +3206,8 @@ extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
int whence, loff_t maxsize, loff_t eof);
+loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence,
+ u64 *cookie);
extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
int whence, loff_t size);
extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);