summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-20 09:59:00 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-20 09:59:00 -0800
commit5f85bd6aeceaecd0ff3a5ee827bf75eb6141ad55 (patch)
tree10077e22dc6b696f798ede82e844cbed8edd26a4 /include
parent4b84a4c8d40dfbfe1becec13a6e373e871e103e9 (diff)
parent3781680fba3eab0b34b071cb9443fd5ad92d23cf (diff)
Merge tag 'vfs-6.14-rc1.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull pidfs updates from Christian Brauner: - Rework inode number allocation Recently we received a patchset that aims to enable file handle encoding and decoding via name_to_handle_at(2) and open_by_handle_at(2). A crucical step in the patch series is how to go from inode number to struct pid without leaking information into unprivileged contexts. The issue is that in order to find a struct pid the pid number in the initial pid namespace must be encoded into the file handle via name_to_handle_at(2). This can be used by containers using a separate pid namespace to learn what the pid number of a given process in the initial pid namespace is. While this is a weak information leak it could be used in various exploits and in general is an ugly wart in the design. To solve this problem a new way is needed to lookup a struct pid based on the inode number allocated for that struct pid. The other part is to remove the custom inode number allocation on 32bit systems that is also an ugly wart that should go away. Allocate unique identifiers for struct pid by simply incrementing a 64 bit counter and insert each struct pid into the rbtree so it can be looked up to decode file handles avoiding to leak actual pids across pid namespaces in file handles. On both 64 bit and 32 bit the same 64 bit identifier is used to lookup struct pid in the rbtree. On 64 bit the unique identifier for struct pid simply becomes the inode number. Comparing two pidfds continues to be as simple as comparing inode numbers. On 32 bit the 64 bit number assigned to struct pid is split into two 32 bit numbers. The lower 32 bits are used as the inode number and the upper 32 bits are used as the inode generation number. Whenever a wraparound happens on 32 bit the 64 bit number will be incremented by 2 so inode numbering starts at 2 again. When a wraparound happens on 32 bit multiple pidfds with the same inode number are likely to exist. This isn't a problem since before pidfs pidfds used the anonymous inode meaning all pidfds had the same inode number. On 32 bit sserspace can thus reconstruct the 64 bit identifier by retrieving both the inode number and the inode generation number to compare, or use file handles. This gives the same guarantees on both 32 bit and 64 bit. - Implement file handle support This is based on custom export operation methods which allows pidfs to implement permission checking and opening of pidfs file handles cleanly without hacking around in the core file handle code too much. - Support bind-mounts Allow bind-mounting pidfds. Similar to nsfs let's allow bind-mounts for pidfds. This allows pidfds to be safely recovered and checked for process recycling. Instead of checking d_ops for both nsfs and pidfs we could in a follow-up patch add a flag argument to struct dentry_operations that functions similar to file_operations->fop_flags. * tag 'vfs-6.14-rc1.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: selftests: add pidfd bind-mount tests pidfs: allow bind-mounts pidfs: lookup pid through rbtree selftests/pidfd: add pidfs file handle selftests pidfs: check for valid ioctl commands pidfs: implement file handle support exportfs: add permission method fhandle: pull CAP_DAC_READ_SEARCH check into may_decode_fh() exportfs: add open method fhandle: simplify error handling pseudofs: add support for export_ops pidfs: support FS_IOC_GETVERSION pidfs: remove 32bit inode number handling pidfs: rework inode number allocation
Diffstat (limited to 'include')
-rw-r--r--include/linux/exportfs.h20
-rw-r--r--include/linux/pid.h2
-rw-r--r--include/linux/pidfs.h3
-rw-r--r--include/linux/pseudo_fs.h1
4 files changed, 26 insertions, 0 deletions
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 4cc8801e50e3..a087606ace19 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -3,6 +3,7 @@
#define LINUX_EXPORTFS_H 1
#include <linux/types.h>
+#include <linux/path.h>
struct dentry;
struct iattr;
@@ -156,6 +157,17 @@ struct fid {
};
};
+enum handle_to_path_flags {
+ HANDLE_CHECK_PERMS = (1 << 0),
+ HANDLE_CHECK_SUBTREE = (1 << 1),
+};
+
+struct handle_to_path_ctx {
+ struct path root;
+ enum handle_to_path_flags flags;
+ unsigned int fh_flags;
+};
+
#define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */
#define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */
#define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */
@@ -225,6 +237,12 @@ struct fid {
* is also a directory. In the event that it cannot be found, or storage
* space cannot be allocated, a %ERR_PTR should be returned.
*
+ * permission:
+ * Allow filesystems to specify a custom permission function.
+ *
+ * open:
+ * Allow filesystems to specify a custom open function.
+ *
* commit_metadata:
* @commit_metadata should commit metadata changes to stable storage.
*
@@ -251,6 +269,8 @@ struct export_operations {
bool write, u32 *device_generation);
int (*commit_blocks)(struct inode *inode, struct iomap *iomaps,
int nr_iomaps, struct iattr *iattr);
+ int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags);
+ struct file * (*open)(struct path *path, unsigned int oflags);
#define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */
#define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */
#define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */
diff --git a/include/linux/pid.h b/include/linux/pid.h
index a3aad9b4074c..fe575fcdb4af 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -59,6 +59,7 @@ struct pid
spinlock_t lock;
struct dentry *stashed;
u64 ino;
+ struct rb_node pidfs_node;
/* lists of tasks that use this pid */
struct hlist_head tasks[PIDTYPE_MAX];
struct hlist_head inodes;
@@ -68,6 +69,7 @@ struct pid
struct upid numbers[];
};
+extern seqcount_spinlock_t pidmap_lock_seq;
extern struct pid init_struct_pid;
struct file;
diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
index 75bdf9807802..7c830d0dec9a 100644
--- a/include/linux/pidfs.h
+++ b/include/linux/pidfs.h
@@ -4,5 +4,8 @@
struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
void __init pidfs_init(void);
+void pidfs_add_pid(struct pid *pid);
+void pidfs_remove_pid(struct pid *pid);
+extern const struct dentry_operations pidfs_dentry_operations;
#endif /* _LINUX_PID_FS_H */
diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h
index 730f77381d55..2503f7625d65 100644
--- a/include/linux/pseudo_fs.h
+++ b/include/linux/pseudo_fs.h
@@ -5,6 +5,7 @@
struct pseudo_fs_context {
const struct super_operations *ops;
+ const struct export_operations *eops;
const struct xattr_handler * const *xattr;
const struct dentry_operations *dops;
unsigned long magic;