summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-28 09:55:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-28 09:55:25 -0700
commitecd7db20474c3859d4d01f34aaabf41bd28c7d84 (patch)
tree70d787f3138a907c3e6eb53c403beac3e2bf91c9 /include
parent615e95831ec3d428cc554ac12e9439e2d66038d3 (diff)
parent572a3d1e5d3a3e335b92e2c28a63c0b27944480c (diff)
Merge tag 'v6.6-vfs.tmpfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull libfs and tmpfs updates from Christian Brauner: "This cycle saw a lot of work for tmpfs that required changes to the vfs layer. Andrew, Hugh, and I decided to take tmpfs through vfs this cycle. Things will go back to mm next cycle. Features ======== - By far the biggest work is the quota support for tmpfs. New tmpfs quota infrastructure is added to support it and a new QFMT_SHMEM uapi option is exposed. This offers user and group quotas to tmpfs (project quotas will be added later). Similar to other filesystems tmpfs quota are not supported within user namespaces yet. - Add support for user xattrs. While tmpfs already supports security xattrs (security.*) and POSIX ACLs for a long time it lacked support for user xattrs (user.*). With this pull request tmpfs will be able to support a limited number of user xattrs. This is accompanied by a fix (see below) to limit persistent simple xattr allocations. - Add support for stable directory offsets. Currently tmpfs relies on the libfs provided cursor-based mechanism for readdir. This causes issues when a tmpfs filesystem is exported via NFS. NFS clients do not open directories. Instead, each server-side readdir operation opens the directory, reads it, and then closes it. Since the cursor state for that directory is associated with the opened file it is discarded after each readdir operation. Such directory offsets are not just cached by NFS clients but also various userspace libraries based on these clients. As it stands there is no way to invalidate the caches when directory offsets have changed and the whole application depends on unchanging directory offsets. At LSFMM we discussed how to solve this problem and decided to support stable directory offsets. libfs now allows filesystems like tmpfs to use an xarrary to map a directory offset to a dentry. This mechanism is currently only used by tmpfs but can be supported by others as well. Fixes ===== - Change persistent simple xattrs allocations in libfs from GFP_KERNEL to GPF_KERNEL_ACCOUNT so they're subject to memory cgroup limits. Since this is a change to libfs it affects both tmpfs and kernfs. - Correctly verify {g,u}id mount options. A new filesystem context is created via fsopen() which records the namespace that becomes the owning namespace of the superblock when fsconfig(FSCONFIG_CMD_CREATE) is called for filesystems that are mountable in namespaces. However, fsconfig() calls can occur in a namespace different from the namespace where fsopen() has been called. Currently, when fsconfig() is called to set {g,u}id mount options the requested {g,u}id is mapped into a k{g,u}id according to the namespace where fsconfig() was called from. The resulting k{g,u}id is not guaranteed to be resolvable in the namespace of the filesystem (the one that fsopen() was called in). This means it's possible for an unprivileged user to create files owned by any group in a tmpfs mount since it's possible to set the setid bits on the tmpfs directory. The contract for {g,u}id mount options and {g,u}id values in general set from userspace has always been that they are translated according to the caller's idmapping. In so far, tmpfs has been doing the correct thing. But since tmpfs is mountable in unprivileged contexts it is also necessary to verify that the resulting {k,g}uid is representable in the namespace of the superblock to avoid such bugs. The new mount api's cross-namespace delegation abilities are already widely used. Having talked to a bunch of userspace this is the most faithful solution with minimal regression risks" * tag 'v6.6-vfs.tmpfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: tmpfs,xattr: GFP_KERNEL_ACCOUNT for simple xattrs mm: invalidation check mapping before folio_contains tmpfs: trivial support for direct IO tmpfs,xattr: enable limited user extended attributes tmpfs: track free_ispace instead of free_inodes xattr: simple_xattr_set() return old_xattr to be freed tmpfs: verify {g,u}id mount options correctly shmem: move spinlock into shmem_recalc_inode() to fix quota support libfs: Remove parent dentry locking in offset_iterate_dir() libfs: Add a lock class for the offset map's xa_lock shmem: stable directory offsets shmem: Refactor shmem_symlink() libfs: Add directory operations for stable offsets shmem: fix quota lock nesting in huge hole handling shmem: Add default quota limit mount options shmem: quota support shmem: prepare shmem quota infrastructure quota: Check presence of quota operation structures instead of ->quota_read and ->quota_write callbacks shmem: make shmem_get_inode() return ERR_PTR instead of NULL shmem: make shmem_inode_acct_block() return error
Diffstat (limited to 'include')
-rw-r--r--include/linux/fs.h18
-rw-r--r--include/linux/shmem_fs.h31
-rw-r--r--include/linux/xattr.h10
-rw-r--r--include/uapi/linux/quota.h1
4 files changed, 55 insertions, 5 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0e50f0316e0c..dfad85b1b629 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1842,6 +1842,7 @@ struct dir_context {
struct iov_iter;
struct io_uring_cmd;
+struct offset_ctx;
struct file_operations {
struct module *owner;
@@ -1935,6 +1936,7 @@ struct inode_operations {
int (*fileattr_set)(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
+ struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
} ____cacheline_aligned;
static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
@@ -3065,6 +3067,22 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
const void __user *from, size_t count);
+struct offset_ctx {
+ struct xarray xa;
+ u32 next_offset;
+};
+
+void simple_offset_init(struct offset_ctx *octx);
+int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
+void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
+int simple_offset_rename_exchange(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry);
+void simple_offset_destroy(struct offset_ctx *octx);
+
+extern const struct file_operations simple_offset_dir_operations;
+
extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 9029abd29b1c..6b0c626620f5 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -13,6 +13,10 @@
/* inode in-kernel data */
+#ifdef CONFIG_TMPFS_QUOTA
+#define SHMEM_MAXQUOTAS 2
+#endif
+
struct shmem_inode_info {
spinlock_t lock;
unsigned int seals; /* shmem seals */
@@ -27,6 +31,10 @@ struct shmem_inode_info {
atomic_t stop_eviction; /* hold when working on inode */
struct timespec64 i_crtime; /* file creation time */
unsigned int fsflags; /* flags for FS_IOC_[SG]ETFLAGS */
+#ifdef CONFIG_TMPFS_QUOTA
+ struct dquot *i_dquot[MAXQUOTAS];
+#endif
+ struct offset_ctx dir_offsets; /* stable entry offsets */
struct inode vfs_inode;
};
@@ -35,11 +43,18 @@ struct shmem_inode_info {
(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL)
#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL)
+struct shmem_quota_limits {
+ qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */
+ qsize_t usrquota_ihardlimit; /* Default user quota inode hard limit */
+ qsize_t grpquota_bhardlimit; /* Default group quota block hard limit */
+ qsize_t grpquota_ihardlimit; /* Default group quota inode hard limit */
+};
+
struct shmem_sb_info {
unsigned long max_blocks; /* How many blocks are allowed */
struct percpu_counter used_blocks; /* How many are allocated */
unsigned long max_inodes; /* How many inodes are allowed */
- unsigned long free_inodes; /* How many are left for allocation */
+ unsigned long free_ispace; /* How much ispace left for allocation */
raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
umode_t mode; /* Mount mode for root directory */
unsigned char huge; /* Whether to try for hugepages */
@@ -53,6 +68,7 @@ struct shmem_sb_info {
spinlock_t shrinklist_lock; /* Protects shrinklist */
struct list_head shrinklist; /* List of shinkable inodes */
unsigned long shrinklist_len; /* Length of shrinklist */
+ struct shmem_quota_limits qlimits; /* Default quota limits */
};
static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
@@ -172,4 +188,17 @@ extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
#endif /* CONFIG_SHMEM */
#endif /* CONFIG_USERFAULTFD */
+/*
+ * Used space is stored as unsigned 64-bit value in bytes but
+ * quota core supports only signed 64-bit values so use that
+ * as a limit
+ */
+#define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */
+#define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL
+
+#ifdef CONFIG_TMPFS_QUOTA
+extern const struct dquot_operations shmem_quota_operations;
+extern struct quota_format_type shmem_quota_format;
+#endif /* CONFIG_TMPFS_QUOTA */
+
#endif
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index d591ef59aa98..d20051865800 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -114,13 +114,15 @@ struct simple_xattr {
};
void simple_xattrs_init(struct simple_xattrs *xattrs);
-void simple_xattrs_free(struct simple_xattrs *xattrs);
+void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space);
+size_t simple_xattr_space(const char *name, size_t size);
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
+void simple_xattr_free(struct simple_xattr *xattr);
int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
void *buffer, size_t size);
-int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
- const void *value, size_t size, int flags,
- ssize_t *removed_size);
+struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
+ const char *name, const void *value,
+ size_t size, int flags);
ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
char *buffer, size_t size);
void simple_xattr_add(struct simple_xattrs *xattrs,
diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h
index f17c9636a859..52090105b828 100644
--- a/include/uapi/linux/quota.h
+++ b/include/uapi/linux/quota.h
@@ -77,6 +77,7 @@
#define QFMT_VFS_V0 2
#define QFMT_OCFS2 3
#define QFMT_VFS_V1 4
+#define QFMT_SHMEM 5
/* Size of block in which space limits are passed through the quota
* interface */