summaryrefslogtreecommitdiff
path: root/mm/shmem.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-28 09:55:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-28 09:55:25 -0700
commitecd7db20474c3859d4d01f34aaabf41bd28c7d84 (patch)
tree70d787f3138a907c3e6eb53c403beac3e2bf91c9 /mm/shmem.c
parent615e95831ec3d428cc554ac12e9439e2d66038d3 (diff)
parent572a3d1e5d3a3e335b92e2c28a63c0b27944480c (diff)
Merge tag 'v6.6-vfs.tmpfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull libfs and tmpfs updates from Christian Brauner: "This cycle saw a lot of work for tmpfs that required changes to the vfs layer. Andrew, Hugh, and I decided to take tmpfs through vfs this cycle. Things will go back to mm next cycle. Features ======== - By far the biggest work is the quota support for tmpfs. New tmpfs quota infrastructure is added to support it and a new QFMT_SHMEM uapi option is exposed. This offers user and group quotas to tmpfs (project quotas will be added later). Similar to other filesystems tmpfs quota are not supported within user namespaces yet. - Add support for user xattrs. While tmpfs already supports security xattrs (security.*) and POSIX ACLs for a long time it lacked support for user xattrs (user.*). With this pull request tmpfs will be able to support a limited number of user xattrs. This is accompanied by a fix (see below) to limit persistent simple xattr allocations. - Add support for stable directory offsets. Currently tmpfs relies on the libfs provided cursor-based mechanism for readdir. This causes issues when a tmpfs filesystem is exported via NFS. NFS clients do not open directories. Instead, each server-side readdir operation opens the directory, reads it, and then closes it. Since the cursor state for that directory is associated with the opened file it is discarded after each readdir operation. Such directory offsets are not just cached by NFS clients but also various userspace libraries based on these clients. As it stands there is no way to invalidate the caches when directory offsets have changed and the whole application depends on unchanging directory offsets. At LSFMM we discussed how to solve this problem and decided to support stable directory offsets. libfs now allows filesystems like tmpfs to use an xarrary to map a directory offset to a dentry. This mechanism is currently only used by tmpfs but can be supported by others as well. Fixes ===== - Change persistent simple xattrs allocations in libfs from GFP_KERNEL to GPF_KERNEL_ACCOUNT so they're subject to memory cgroup limits. Since this is a change to libfs it affects both tmpfs and kernfs. - Correctly verify {g,u}id mount options. A new filesystem context is created via fsopen() which records the namespace that becomes the owning namespace of the superblock when fsconfig(FSCONFIG_CMD_CREATE) is called for filesystems that are mountable in namespaces. However, fsconfig() calls can occur in a namespace different from the namespace where fsopen() has been called. Currently, when fsconfig() is called to set {g,u}id mount options the requested {g,u}id is mapped into a k{g,u}id according to the namespace where fsconfig() was called from. The resulting k{g,u}id is not guaranteed to be resolvable in the namespace of the filesystem (the one that fsopen() was called in). This means it's possible for an unprivileged user to create files owned by any group in a tmpfs mount since it's possible to set the setid bits on the tmpfs directory. The contract for {g,u}id mount options and {g,u}id values in general set from userspace has always been that they are translated according to the caller's idmapping. In so far, tmpfs has been doing the correct thing. But since tmpfs is mountable in unprivileged contexts it is also necessary to verify that the resulting {k,g}uid is representable in the namespace of the superblock to avoid such bugs. The new mount api's cross-namespace delegation abilities are already widely used. Having talked to a bunch of userspace this is the most faithful solution with minimal regression risks" * tag 'v6.6-vfs.tmpfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: tmpfs,xattr: GFP_KERNEL_ACCOUNT for simple xattrs mm: invalidation check mapping before folio_contains tmpfs: trivial support for direct IO tmpfs,xattr: enable limited user extended attributes tmpfs: track free_ispace instead of free_inodes xattr: simple_xattr_set() return old_xattr to be freed tmpfs: verify {g,u}id mount options correctly shmem: move spinlock into shmem_recalc_inode() to fix quota support libfs: Remove parent dentry locking in offset_iterate_dir() libfs: Add a lock class for the offset map's xa_lock shmem: stable directory offsets shmem: Refactor shmem_symlink() libfs: Add directory operations for stable offsets shmem: fix quota lock nesting in huge hole handling shmem: Add default quota limit mount options shmem: quota support shmem: prepare shmem quota infrastructure quota: Check presence of quota operation structures instead of ->quota_read and ->quota_write callbacks shmem: make shmem_get_inode() return ERR_PTR instead of NULL shmem: make shmem_inode_acct_block() return error
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c825
1 files changed, 603 insertions, 222 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 0bb097712c8b..479d1ce65868 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -78,6 +78,7 @@ static struct vfsmount *shm_mnt;
#include <uapi/linux/memfd.h>
#include <linux/rmap.h>
#include <linux/uuid.h>
+#include <linux/quotaops.h>
#include <linux/uaccess.h>
@@ -89,6 +90,9 @@ static struct vfsmount *shm_mnt;
/* Pretend that each entry is of this size in directory's i_size */
#define BOGO_DIRENT_SIZE 20
+/* Pretend that one inode + its dentry occupy this much memory */
+#define BOGO_INODE_SIZE 1024
+
/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
#define SHORT_SYMLINK_LEN 128
@@ -116,11 +120,14 @@ struct shmem_options {
int huge;
int seen;
bool noswap;
+ unsigned short quota_types;
+ struct shmem_quota_limits qlimits;
#define SHMEM_SEEN_BLOCKS 1
#define SHMEM_SEEN_INODES 2
#define SHMEM_SEEN_HUGE 4
#define SHMEM_SEEN_INUMS 8
#define SHMEM_SEEN_NOSWAP 16
+#define SHMEM_SEEN_QUOTA 32
};
#ifdef CONFIG_TMPFS
@@ -133,7 +140,8 @@ static unsigned long shmem_default_max_inodes(void)
{
unsigned long nr_pages = totalram_pages();
- return min(nr_pages - totalhigh_pages(), nr_pages / 2);
+ return min3(nr_pages - totalhigh_pages(), nr_pages / 2,
+ ULONG_MAX / BOGO_INODE_SIZE);
}
#endif
@@ -199,33 +207,47 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
}
-static inline bool shmem_inode_acct_block(struct inode *inode, long pages)
+static int shmem_inode_acct_block(struct inode *inode, long pages)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ int err = -ENOSPC;
if (shmem_acct_block(info->flags, pages))
- return false;
+ return err;
+ might_sleep(); /* when quotas */
if (sbinfo->max_blocks) {
if (percpu_counter_compare(&sbinfo->used_blocks,
sbinfo->max_blocks - pages) > 0)
goto unacct;
+
+ err = dquot_alloc_block_nodirty(inode, pages);
+ if (err)
+ goto unacct;
+
percpu_counter_add(&sbinfo->used_blocks, pages);
+ } else {
+ err = dquot_alloc_block_nodirty(inode, pages);
+ if (err)
+ goto unacct;
}
- return true;
+ return 0;
unacct:
shmem_unacct_blocks(info->flags, pages);
- return false;
+ return err;
}
-static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
+static void shmem_inode_unacct_blocks(struct inode *inode, long pages)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ might_sleep(); /* when quotas */
+ dquot_free_block_nodirty(inode, pages);
+
if (sbinfo->max_blocks)
percpu_counter_sub(&sbinfo->used_blocks, pages);
shmem_unacct_blocks(info->flags, pages);
@@ -254,6 +276,47 @@ bool vma_is_shmem(struct vm_area_struct *vma)
static LIST_HEAD(shmem_swaplist);
static DEFINE_MUTEX(shmem_swaplist_mutex);
+#ifdef CONFIG_TMPFS_QUOTA
+
+static int shmem_enable_quotas(struct super_block *sb,
+ unsigned short quota_types)
+{
+ int type, err = 0;
+
+ sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
+ for (type = 0; type < SHMEM_MAXQUOTAS; type++) {
+ if (!(quota_types & (1 << type)))
+ continue;
+ err = dquot_load_quota_sb(sb, type, QFMT_SHMEM,
+ DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED);
+ if (err)
+ goto out_err;
+ }
+ return 0;
+
+out_err:
+ pr_warn("tmpfs: failed to enable quota tracking (type=%d, err=%d)\n",
+ type, err);
+ for (type--; type >= 0; type--)
+ dquot_quota_off(sb, type);
+ return err;
+}
+
+static void shmem_disable_quotas(struct super_block *sb)
+{
+ int type;
+
+ for (type = 0; type < SHMEM_MAXQUOTAS; type++)
+ dquot_quota_off(sb, type);
+}
+
+static struct dquot **shmem_get_dquots(struct inode *inode)
+{
+ return SHMEM_I(inode)->i_dquot;
+}
+#endif /* CONFIG_TMPFS_QUOTA */
+
/*
* shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and
* produces a novel ino for the newly allocated inode.
@@ -272,11 +335,11 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
if (!(sb->s_flags & SB_KERNMOUNT)) {
raw_spin_lock(&sbinfo->stat_lock);
if (sbinfo->max_inodes) {
- if (!sbinfo->free_inodes) {
+ if (sbinfo->free_ispace < BOGO_INODE_SIZE) {
raw_spin_unlock(&sbinfo->stat_lock);
return -ENOSPC;
}
- sbinfo->free_inodes--;
+ sbinfo->free_ispace -= BOGO_INODE_SIZE;
}
if (inop) {
ino = sbinfo->next_ino++;
@@ -330,12 +393,12 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
return 0;
}
-static void shmem_free_inode(struct super_block *sb)
+static void shmem_free_inode(struct super_block *sb, size_t freed_ispace)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
if (sbinfo->max_inodes) {
raw_spin_lock(&sbinfo->stat_lock);
- sbinfo->free_inodes++;
+ sbinfo->free_ispace += BOGO_INODE_SIZE + freed_ispace;
raw_spin_unlock(&sbinfo->stat_lock);
}
}
@@ -343,62 +406,65 @@ static void shmem_free_inode(struct super_block *sb)
/**
* shmem_recalc_inode - recalculate the block usage of an inode
* @inode: inode to recalc
+ * @alloced: the change in number of pages allocated to inode
+ * @swapped: the change in number of pages swapped from inode
*
* We have to calculate the free blocks since the mm can drop
* undirtied hole pages behind our back.
*
* But normally info->alloced == inode->i_mapping->nrpages + info->swapped
* So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
- *
- * It has to be called with the spinlock held.
*/
-static void shmem_recalc_inode(struct inode *inode)
+static void shmem_recalc_inode(struct inode *inode, long alloced, long swapped)
{
struct shmem_inode_info *info = SHMEM_I(inode);
long freed;
- freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
- if (freed > 0) {
+ spin_lock(&info->lock);
+ info->alloced += alloced;
+ info->swapped += swapped;
+ freed = info->alloced - info->swapped -
+ READ_ONCE(inode->i_mapping->nrpages);
+ /*
+ * Special case: whereas normally shmem_recalc_inode() is called
+ * after i_mapping->nrpages has already been adjusted (up or down),
+ * shmem_writepage() has to raise swapped before nrpages is lowered -
+ * to stop a racing shmem_recalc_inode() from thinking that a page has
+ * been freed. Compensate here, to avoid the need for a followup call.
+ */
+ if (swapped > 0)
+ freed += swapped;
+ if (freed > 0)
info->alloced -= freed;
- inode->i_blocks -= freed * BLOCKS_PER_PAGE;
+ spin_unlock(&info->lock);
+
+ /* The quota case may block */
+ if (freed > 0)
shmem_inode_unacct_blocks(inode, freed);
- }
}
bool shmem_charge(struct inode *inode, long pages)
{
- struct shmem_inode_info *info = SHMEM_I(inode);
- unsigned long flags;
+ struct address_space *mapping = inode->i_mapping;
- if (!shmem_inode_acct_block(inode, pages))
+ if (shmem_inode_acct_block(inode, pages))
return false;
/* nrpages adjustment first, then shmem_recalc_inode() when balanced */
- inode->i_mapping->nrpages += pages;
-
- spin_lock_irqsave(&info->lock, flags);
- info->alloced += pages;
- inode->i_blocks += pages * BLOCKS_PER_PAGE;
- shmem_recalc_inode(inode);
- spin_unlock_irqrestore(&info->lock, flags);
+ xa_lock_irq(&mapping->i_pages);
+ mapping->nrpages += pages;
+ xa_unlock_irq(&mapping->i_pages);
+ shmem_recalc_inode(inode, pages, 0);
return true;
}
void shmem_uncharge(struct inode *inode, long pages)
{
- struct shmem_inode_info *info = SHMEM_I(inode);
- unsigned long flags;
-
+ /* pages argument is currently unused: keep it to help debugging */
/* nrpages adjustment done by __filemap_remove_folio() or caller */
- spin_lock_irqsave(&info->lock, flags);
- info->alloced -= pages;
- inode->i_blocks -= pages * BLOCKS_PER_PAGE;
- shmem_recalc_inode(inode);
- spin_unlock_irqrestore(&info->lock, flags);
-
- shmem_inode_unacct_blocks(inode, pages);
+ shmem_recalc_inode(inode, 0, 0);
}
/*
@@ -1040,10 +1106,7 @@ whole_folios:
folio_batch_release(&fbatch);
}
- spin_lock_irq(&info->lock);
- info->swapped -= nr_swaps_freed;
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
+ shmem_recalc_inode(inode, 0, -nr_swaps_freed);
}
void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
@@ -1061,11 +1124,9 @@ static int shmem_getattr(struct mnt_idmap *idmap,
struct inode *inode = path->dentry->d_inode;
struct shmem_inode_info *info = SHMEM_I(inode);
- if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
- spin_lock_irq(&info->lock);
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
- }
+ if (info->alloced - info->swapped != inode->i_mapping->nrpages)
+ shmem_recalc_inode(inode, 0, 0);
+
if (info->fsflags & FS_APPEND_FL)
stat->attributes |= STATX_ATTR_APPEND;
if (info->fsflags & FS_IMMUTABLE_FL)
@@ -1142,6 +1203,21 @@ static int shmem_setattr(struct mnt_idmap *idmap,
}
}
+ if (is_quota_modification(idmap, inode, attr)) {
+ error = dquot_initialize(inode);
+ if (error)
+ return error;
+ }
+
+ /* Transfer quota accounting */
+ if (i_uid_needs_update(idmap, attr, inode) ||
+ i_gid_needs_update(idmap, attr, inode)) {
+ error = dquot_transfer(idmap, inode, attr);
+
+ if (error)
+ return error;
+ }
+
setattr_copy(idmap, inode, attr);
if (attr->ia_valid & ATTR_MODE)
error = posix_acl_chmod(idmap, dentry, inode->i_mode);
@@ -1158,6 +1234,7 @@ static void shmem_evict_inode(struct inode *inode)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ size_t freed = 0;
if (shmem_mapping(inode->i_mapping)) {
shmem_unacct_size(info->flags, inode->i_size);
@@ -1184,10 +1261,14 @@ static void shmem_evict_inode(struct inode *inode)
}
}
- simple_xattrs_free(&info->xattrs);
+ simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+ shmem_free_inode(inode->i_sb, freed);
WARN_ON(inode->i_blocks);
- shmem_free_inode(inode->i_sb);
clear_inode(inode);
+#ifdef CONFIG_TMPFS_QUOTA
+ dquot_free_inode(inode);
+ dquot_drop(inode);
+#endif
}
static int shmem_find_swap_entries(struct address_space *mapping,
@@ -1431,11 +1512,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
if (add_to_swap_cache(folio, swap,
__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN,
NULL) == 0) {
- spin_lock_irq(&info->lock);
- shmem_recalc_inode(inode);
- info->swapped++;
- spin_unlock_irq(&info->lock);
-
+ shmem_recalc_inode(inode, 0, 1);
swap_shmem_alloc(swap);
shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap));
@@ -1590,13 +1667,14 @@ static struct folio *shmem_alloc_and_acct_folio(gfp_t gfp, struct inode *inode,
struct shmem_inode_info *info = SHMEM_I(inode);
struct folio *folio;
int nr;
- int err = -ENOSPC;
+ int err;
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
huge = false;
nr = huge ? HPAGE_PMD_NR : 1;
- if (!shmem_inode_acct_block(inode, nr))
+ err = shmem_inode_acct_block(inode, nr);
+ if (err)
goto failed;
if (huge)
@@ -1705,7 +1783,6 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
struct folio *folio, swp_entry_t swap)
{
struct address_space *mapping = inode->i_mapping;
- struct shmem_inode_info *info = SHMEM_I(inode);
swp_entry_t swapin_error;
void *old;
@@ -1718,16 +1795,12 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
folio_wait_writeback(folio);
delete_from_swap_cache(folio);
- spin_lock_irq(&info->lock);
/*
- * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks won't
- * be 0 when inode is released and thus trigger WARN_ON(inode->i_blocks) in
- * shmem_evict_inode.
+ * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks
+ * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
+ * in shmem_evict_inode().
*/
- info->alloced--;
- info->swapped--;
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
+ shmem_recalc_inode(inode, -1, -1);
swap_free(swap);
}
@@ -1814,10 +1887,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
if (error)
goto failed;
- spin_lock_irq(&info->lock);
- info->swapped--;
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
+ shmem_recalc_inode(inode, 0, -1);
if (sgp == SGP_WRITE)
folio_mark_accessed(folio);
@@ -1982,13 +2052,9 @@ alloc_nohuge:
charge_mm);
if (error)
goto unacct;
- folio_add_lru(folio);
- spin_lock_irq(&info->lock);
- info->alloced += folio_nr_pages(folio);
- inode->i_blocks += (blkcnt_t)BLOCKS_PER_PAGE << folio_order(folio);
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
+ folio_add_lru(folio);
+ shmem_recalc_inode(inode, folio_nr_pages(folio), 0);
alloced = true;
if (folio_test_pmd_mappable(folio) &&
@@ -2037,9 +2103,7 @@ clear:
if (alloced) {
folio_clear_dirty(folio);
filemap_remove_folio(folio);
- spin_lock_irq(&info->lock);
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
+ shmem_recalc_inode(inode, 0, 0);
}
error = -EINVAL;
goto unlock;
@@ -2065,9 +2129,7 @@ unlock:
folio_put(folio);
}
if (error == -ENOSPC && !once++) {
- spin_lock_irq(&info->lock);
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
+ shmem_recalc_inode(inode, 0, 0);
goto repeat;
}
if (error == -EEXIST)
@@ -2328,6 +2390,12 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
+static int shmem_file_open(struct inode *inode, struct file *file)
+{
+ file->f_mode |= FMODE_CAN_ODIRECT;
+ return generic_file_open(inode, file);
+}
+
#ifdef CONFIG_TMPFS_XATTR
static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
@@ -2357,78 +2425,128 @@ static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
#define shmem_initxattrs NULL
#endif
-static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb,
- struct inode *dir, umode_t mode, dev_t dev,
- unsigned long flags)
+static struct offset_ctx *shmem_get_offset_ctx(struct inode *inode)
+{
+ return &SHMEM_I(inode)->dir_offsets;
+}
+
+static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
+ struct super_block *sb,
+ struct inode *dir, umode_t mode,
+ dev_t dev, unsigned long flags)
{
struct inode *inode;
struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
ino_t ino;
+ int err;
+
+ err = shmem_reserve_inode(sb, &ino);
+ if (err)
+ return ERR_PTR(err);
- if (shmem_reserve_inode(sb, &ino))
- return NULL;
inode = new_inode(sb);
- if (inode) {
- inode->i_ino = ino;
- inode_init_owner(idmap, inode, dir, mode);
- inode->i_blocks = 0;
- inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
- inode->i_generation = get_random_u32();
- info = SHMEM_I(inode);
- memset(info, 0, (char *)inode - (char *)info);
- spin_lock_init(&info->lock);
- atomic_set(&info->stop_eviction, 0);
- info->seals = F_SEAL_SEAL;
- info->flags = flags & VM_NORESERVE;
- info->i_crtime = inode->i_mtime;
- info->fsflags = (dir == NULL) ? 0 :
- SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
- if (info->fsflags)
- shmem_set_inode_flags(inode, info->fsflags);
- INIT_LIST_HEAD(&info->shrinklist);
- INIT_LIST_HEAD(&info->swaplist);
- if (sbinfo->noswap)
- mapping_set_unevictable(inode->i_mapping);
- simple_xattrs_init(&info->xattrs);
- cache_no_acl(inode);
- mapping_set_large_folios(inode->i_mapping);
-
- switch (mode & S_IFMT) {
- default:
- inode->i_op = &shmem_special_inode_operations;
- init_special_inode(inode, mode, dev);
- break;
- case S_IFREG:
- inode->i_mapping->a_ops = &shmem_aops;
- inode->i_op = &shmem_inode_operations;
- inode->i_fop = &shmem_file_operations;
- mpol_shared_policy_init(&info->policy,
- shmem_get_sbmpol(sbinfo));
- break;
- case S_IFDIR:
- inc_nlink(inode);
- /* Some things misbehave if size == 0 on a directory */
- inode->i_size = 2 * BOGO_DIRENT_SIZE;
- inode->i_op = &shmem_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
- break;
- case S_IFLNK:
- /*
- * Must not load anything in the rbtree,
- * mpol_free_shared_policy will not be called.
- */
- mpol_shared_policy_init(&info->policy, NULL);
- break;
- }
+ if (!inode) {
+ shmem_free_inode(sb, 0);
+ return ERR_PTR(-ENOSPC);
+ }
- lockdep_annotate_inode_mutex_key(inode);
- } else
- shmem_free_inode(sb);
+ inode->i_ino = ino;
+ inode_init_owner(idmap, inode, dir, mode);
+ inode->i_blocks = 0;
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+ inode->i_generation = get_random_u32();
+ info = SHMEM_I(inode);
+ memset(info, 0, (char *)inode - (char *)info);
+ spin_lock_init(&info->lock);
+ atomic_set(&info->stop_eviction, 0);
+ info->seals = F_SEAL_SEAL;
+ info->flags = flags & VM_NORESERVE;
+ info->i_crtime = inode->i_mtime;
+ info->fsflags = (dir == NULL) ? 0 :
+ SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
+ if (info->fsflags)
+ shmem_set_inode_flags(inode, info->fsflags);
+ INIT_LIST_HEAD(&info->shrinklist);
+ INIT_LIST_HEAD(&info->swaplist);
+ INIT_LIST_HEAD(&info->swaplist);
+ if (sbinfo->noswap)
+ mapping_set_unevictable(inode->i_mapping);
+ simple_xattrs_init(&info->xattrs);
+ cache_no_acl(inode);
+ mapping_set_large_folios(inode->i_mapping);
+
+ switch (mode & S_IFMT) {
+ default:
+ inode->i_op = &shmem_special_inode_operations;
+ init_special_inode(inode, mode, dev);
+ break;
+ case S_IFREG:
+ inode->i_mapping->a_ops = &shmem_aops;
+ inode->i_op = &shmem_inode_operations;
+ inode->i_fop = &shmem_file_operations;
+ mpol_shared_policy_init(&info->policy,
+ shmem_get_sbmpol(sbinfo));
+ break;
+ case S_IFDIR:
+ inc_nlink(inode);
+ /* Some things misbehave if size == 0 on a directory */
+ inode->i_size = 2 * BOGO_DIRENT_SIZE;
+ inode->i_op = &shmem_dir_inode_operations;
+ inode->i_fop = &simple_offset_dir_operations;
+ simple_offset_init(shmem_get_offset_ctx(inode));
+ break;
+ case S_IFLNK:
+ /*
+ * Must not load anything in the rbtree,
+ * mpol_free_shared_policy will not be called.
+ */
+ mpol_shared_policy_init(&info->policy, NULL);
+ break;
+ }
+
+ lockdep_annotate_inode_mutex_key(inode);
return inode;
}
+#ifdef CONFIG_TMPFS_QUOTA
+static struct inode *shmem_get_inode(struct mnt_idmap *idmap,
+ struct super_block *sb, struct inode *dir,
+ umode_t mode, dev_t dev, unsigned long flags)
+{
+ int err;
+ struct inode *inode;
+
+ inode = __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
+ if (IS_ERR(inode))
+ return inode;
+
+ err = dquot_initialize(inode);
+ if (err)
+ goto errout;
+
+ err = dquot_alloc_inode(inode);
+ if (err) {
+ dquot_drop(inode);
+ goto errout;
+ }
+ return inode;
+
+errout:
+ inode->i_flags |= S_NOQUOTA;
+ iput(inode);
+ return ERR_PTR(err);
+}
+#else
+static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
+ struct super_block *sb, struct inode *dir,
+ umode_t mode, dev_t dev, unsigned long flags)
+{
+ return __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
+}
+#endif /* CONFIG_TMPFS_QUOTA */
+
#ifdef CONFIG_USERFAULTFD
int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
@@ -2447,7 +2565,7 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
int ret;
pgoff_t max_off;
- if (!shmem_inode_acct_block(inode, 1)) {
+ if (shmem_inode_acct_block(inode, 1)) {
/*
* We may have got a page, returned -ENOENT triggering a retry,
* and now we find ourselves with -ENOMEM. Release the page, to
@@ -2529,12 +2647,7 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
if (ret)
goto out_delete_from_cache;
- spin_lock_irq(&info->lock);
- info->alloced++;
- inode->i_blocks += BLOCKS_PER_PAGE;
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
-
+ shmem_recalc_inode(inode, 1, 0);
folio_unlock(folio);
return 0;
out_delete_from_cache:
@@ -2733,6 +2846,28 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return retval ? retval : error;
}
+static ssize_t shmem_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ ssize_t ret;
+
+ inode_lock(inode);
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto unlock;
+ ret = file_remove_privs(file);
+ if (ret)
+ goto unlock;
+ ret = file_update_time(file);
+ if (ret)
+ goto unlock;
+ ret = generic_perform_write(iocb, from);
+unlock:
+ inode_unlock(inode);
+ return ret;
+}
+
static bool zero_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
@@ -3057,7 +3192,7 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
}
if (sbinfo->max_inodes) {
buf->f_files = sbinfo->max_inodes;
- buf->f_ffree = sbinfo->free_inodes;
+ buf->f_ffree = sbinfo->free_ispace / BOGO_INODE_SIZE;
}
/* else leave those fields 0 like simple_statfs */
@@ -3074,27 +3209,32 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t dev)
{
struct inode *inode;
- int error = -ENOSPC;
+ int error;
inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE);
- if (inode) {
- error = simple_acl_create(dir, inode);
- if (error)
- goto out_iput;
- error = security_inode_init_security(inode, dir,
- &dentry->d_name,
- shmem_initxattrs, NULL);
- if (error && error != -EOPNOTSUPP)
- goto out_iput;
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
- error = 0;
- dir->i_size += BOGO_DIRENT_SIZE;
- dir->i_mtime = inode_set_ctime_current(dir);
- inode_inc_iversion(dir);
- d_instantiate(dentry, inode);
- dget(dentry); /* Extra count - pin the dentry in core */
- }
+ error = simple_acl_create(dir, inode);
+ if (error)
+ goto out_iput;
+ error = security_inode_init_security(inode, dir,
+ &dentry->d_name,
+ shmem_initxattrs, NULL);
+ if (error && error != -EOPNOTSUPP)
+ goto out_iput;
+
+ error = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
+ if (error)
+ goto out_iput;
+
+ dir->i_size += BOGO_DIRENT_SIZE;
+ dir->i_mtime = inode_set_ctime_current(dir);
+ inode_inc_iversion(dir);
+ d_instantiate(dentry, inode);
+ dget(dentry); /* Extra count - pin the dentry in core */
return error;
+
out_iput:
iput(inode);
return error;
@@ -3105,20 +3245,26 @@ shmem_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
struct inode *inode;
- int error = -ENOSPC;
+ int error;
inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE);
- if (inode) {
- error = security_inode_init_security(inode, dir,
- NULL,
- shmem_initxattrs, NULL);
- if (error && error != -EOPNOTSUPP)
- goto out_iput;
- error = simple_acl_create(dir, inode);
- if (error)
- goto out_iput;
- d_tmpfile(file, inode);
+
+ if (IS_ERR(inode)) {
+ error = PTR_ERR(inode);
+ goto err_out;
}
+
+ error = security_inode_init_security(inode, dir,
+ NULL,
+ shmem_initxattrs, NULL);
+ if (error && error != -EOPNOTSUPP)
+ goto out_iput;
+ error = simple_acl_create(dir, inode);
+ if (error)
+ goto out_iput;
+ d_tmpfile(file, inode);
+
+err_out:
return finish_open_simple(file, error);
out_iput:
iput(inode);
@@ -3164,6 +3310,13 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
goto out;
}
+ ret = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
+ if (ret) {
+ if (inode->i_nlink)
+ shmem_free_inode(inode->i_sb, 0);
+ goto out;
+ }
+
dir->i_size += BOGO_DIRENT_SIZE;
dir->i_mtime = inode_set_ctime_to_ts(dir,
inode_set_ctime_current(inode));
@@ -3181,7 +3334,9 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
struct inode *inode = d_inode(dentry);
if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
- shmem_free_inode(inode->i_sb);
+ shmem_free_inode(inode->i_sb, 0);
+
+ simple_offset_remove(shmem_get_offset_ctx(dir), dentry);
dir->i_size -= BOGO_DIRENT_SIZE;
dir->i_mtime = inode_set_ctime_to_ts(dir,
@@ -3242,24 +3397,29 @@ static int shmem_rename2(struct mnt_idmap *idmap,
{
struct inode *inode = d_inode(old_dentry);
int they_are_dirs = S_ISDIR(inode->i_mode);
+ int error;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
if (flags & RENAME_EXCHANGE)
- return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
+ return simple_offset_rename_exchange(old_dir, old_dentry,
+ new_dir, new_dentry);
if (!simple_empty(new_dentry))
return -ENOTEMPTY;
if (flags & RENAME_WHITEOUT) {
- int error;
-
error = shmem_whiteout(idmap, old_dir, old_dentry);
if (error)
return error;
}
+ simple_offset_remove(shmem_get_offset_ctx(old_dir), old_dentry);
+ error = simple_offset_add(shmem_get_offset_ctx(new_dir), old_dentry);
+ if (error)
+ return error;
+
if (d_really_is_positive(new_dentry)) {
(void) shmem_unlink(new_dir, new_dentry);
if (they_are_dirs) {
@@ -3293,31 +3453,32 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
inode = shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0,
VM_NORESERVE);
- if (!inode)
- return -ENOSPC;
+
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
error = security_inode_init_security(inode, dir, &dentry->d_name,
shmem_initxattrs, NULL);
- if (error && error != -EOPNOTSUPP) {
- iput(inode);
- return error;
- }
+ if (error && error != -EOPNOTSUPP)
+ goto out_iput;
+
+ error = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
+ if (error)
+ goto out_iput;
inode->i_size = len-1;
if (len <= SHORT_SYMLINK_LEN) {
inode->i_link = kmemdup(symname, len, GFP_KERNEL);
if (!inode->i_link) {
- iput(inode);
- return -ENOMEM;
+ error = -ENOMEM;
+ goto out_remove_offset;
}
inode->i_op = &shmem_short_symlink_operations;
} else {
inode_nohighmem(inode);
error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
- if (error) {
- iput(inode);
- return error;
- }
+ if (error)
+ goto out_remove_offset;
inode->i_mapping->a_ops = &shmem_aops;
inode->i_op = &shmem_symlink_inode_operations;
memcpy(folio_address(folio), symname, len);
@@ -3332,6 +3493,12 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
d_instantiate(dentry, inode);
dget(dentry);
return 0;
+
+out_remove_offset:
+ simple_offset_remove(shmem_get_offset_ctx(dir), dentry);
+out_iput:
+ iput(inode);
+ return error;
}
static void shmem_put_link(void *arg)
@@ -3419,21 +3586,40 @@ static int shmem_initxattrs(struct inode *inode,
void *fs_info)
{
struct shmem_inode_info *info = SHMEM_I(inode);
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
const struct xattr *xattr;
struct simple_xattr *new_xattr;
+ size_t ispace = 0;
size_t len;
+ if (sbinfo->max_inodes) {
+ for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+ ispace += simple_xattr_space(xattr->name,
+ xattr->value_len + XATTR_SECURITY_PREFIX_LEN);
+ }
+ if (ispace) {
+ raw_spin_lock(&sbinfo->stat_lock);
+ if (sbinfo->free_ispace < ispace)
+ ispace = 0;
+ else
+ sbinfo->free_ispace -= ispace;
+ raw_spin_unlock(&sbinfo->stat_lock);
+ if (!ispace)
+ return -ENOSPC;
+ }
+ }
+
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
if (!new_xattr)
- return -ENOMEM;
+ break;
len = strlen(xattr->name) + 1;
new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!new_xattr->name) {
kvfree(new_xattr);
- return -ENOMEM;
+ break;
}
memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
@@ -3444,6 +3630,16 @@ static int shmem_initxattrs(struct inode *inode,
simple_xattr_add(&info->xattrs, new_xattr);
}
+ if (xattr->name != NULL) {
+ if (ispace) {
+ raw_spin_lock(&sbinfo->stat_lock);
+ sbinfo->free_ispace += ispace;
+ raw_spin_unlock(&sbinfo->stat_lock);
+ }
+ simple_xattrs_free(&info->xattrs, NULL);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -3464,15 +3660,40 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
size_t size, int flags)
{
struct shmem_inode_info *info = SHMEM_I(inode);
- int err;
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ struct simple_xattr *old_xattr;
+ size_t ispace = 0;
name = xattr_full_name(handler, name);
- err = simple_xattr_set(&info->xattrs, name, value, size, flags, NULL);
- if (!err) {
+ if (value && sbinfo->max_inodes) {
+ ispace = simple_xattr_space(name, size);
+ raw_spin_lock(&sbinfo->stat_lock);
+ if (sbinfo->free_ispace < ispace)
+ ispace = 0;
+ else
+ sbinfo->free_ispace -= ispace;
+ raw_spin_unlock(&sbinfo->stat_lock);
+ if (!ispace)
+ return -ENOSPC;
+ }
+
+ old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags);
+ if (!IS_ERR(old_xattr)) {
+ ispace = 0;
+ if (old_xattr && sbinfo->max_inodes)
+ ispace = simple_xattr_space(old_xattr->name,
+ old_xattr->size);
+ simple_xattr_free(old_xattr);
+ old_xattr = NULL;
inode_set_ctime_current(inode);
inode_inc_iversion(inode);
}
- return err;
+ if (ispace) {
+ raw_spin_lock(&sbinfo->stat_lock);
+ sbinfo->free_ispace += ispace;
+ raw_spin_unlock(&sbinfo->stat_lock);
+ }
+ return PTR_ERR(old_xattr);
}
static const struct xattr_handler shmem_security_xattr_handler = {
@@ -3487,9 +3708,16 @@ static const struct xattr_handler shmem_trusted_xattr_handler = {
.set = shmem_xattr_handler_set,
};
+static const struct xattr_handler shmem_user_xattr_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = shmem_xattr_handler_get,
+ .set = shmem_xattr_handler_set,
+};
+
static const struct xattr_handler *shmem_xattr_handlers[] = {
&shmem_security_xattr_handler,
&shmem_trusted_xattr_handler,
+ &shmem_user_xattr_handler,
NULL
};
@@ -3502,6 +3730,7 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
static const struct inode_operations shmem_short_symlink_operations = {
.getattr = shmem_getattr,
+ .setattr = shmem_setattr,
.get_link = simple_get_link,
#ifdef CONFIG_TMPFS_XATTR
.listxattr = shmem_listxattr,
@@ -3510,6 +3739,7 @@ static const struct inode_operations shmem_short_symlink_operations = {
static const struct inode_operations shmem_symlink_inode_operations = {
.getattr = shmem_getattr,
+ .setattr = shmem_setattr,
.get_link = shmem_get_link,
#ifdef CONFIG_TMPFS_XATTR
.listxattr = shmem_listxattr,
@@ -3609,6 +3839,13 @@ enum shmem_param {
Opt_inode32,
Opt_inode64,
Opt_noswap,
+ Opt_quota,
+ Opt_usrquota,
+ Opt_grpquota,
+ Opt_usrquota_block_hardlimit,
+ Opt_usrquota_inode_hardlimit,
+ Opt_grpquota_block_hardlimit,
+ Opt_grpquota_inode_hardlimit,
};
static const struct constant_table shmem_param_enums_huge[] = {
@@ -3631,6 +3868,15 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
fsparam_flag ("inode32", Opt_inode32),
fsparam_flag ("inode64", Opt_inode64),
fsparam_flag ("noswap", Opt_noswap),
+#ifdef CONFIG_TMPFS_QUOTA
+ fsparam_flag ("quota", Opt_quota),
+ fsparam_flag ("usrquota", Opt_usrquota),
+ fsparam_flag ("grpquota", Opt_grpquota),
+ fsparam_string("usrquota_block_hardlimit", Opt_usrquota_block_hardlimit),
+ fsparam_string("usrquota_inode_hardlimit", Opt_usrquota_inode_hardlimit),
+ fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
+ fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
+#endif
{}
};
@@ -3641,6 +3887,8 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
unsigned long long size;
char *rest;
int opt;
+ kuid_t kuid;
+ kgid_t kgid;
opt = fs_parse(fc, shmem_fs_parameters, param, &result);
if (opt < 0)
@@ -3662,13 +3910,13 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
break;
case Opt_nr_blocks:
ctx->blocks = memparse(param->string, &rest);
- if (*rest || ctx->blocks > S64_MAX)
+ if (*rest || ctx->blocks > LONG_MAX)
goto bad_value;
ctx->seen |= SHMEM_SEEN_BLOCKS;
break;
case Opt_nr_inodes:
ctx->inodes = memparse(param->string, &rest);
- if (*rest)
+ if (*rest || ctx->inodes > ULONG_MAX / BOGO_INODE_SIZE)
goto bad_value;
ctx->seen |= SHMEM_SEEN_INODES;
break;
@@ -3676,14 +3924,32 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
ctx->mode = result.uint_32 & 07777;
break;
case Opt_uid:
- ctx->uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(ctx->uid))
+ kuid = make_kuid(current_user_ns(), result.uint_32);
+ if (!uid_valid(kuid))
+ goto bad_value;
+
+ /*
+ * The requested uid must be representable in the
+ * filesystem's idmapping.
+ */
+ if (!kuid_has_mapping(fc->user_ns, kuid))
goto bad_value;
+
+ ctx->uid = kuid;
break;
case Opt_gid:
- ctx->gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(ctx->gid))
+ kgid = make_kgid(current_user_ns(), result.uint_32);
+ if (!gid_valid(kgid))
+ goto bad_value;
+
+ /*
+ * The requested gid must be representable in the
+ * filesystem's idmapping.
+ */
+ if (!kgid_has_mapping(fc->user_ns, kgid))
goto bad_value;
+
+ ctx->gid = kgid;
break;
case Opt_huge:
ctx->huge = result.uint_32;
@@ -3722,6 +3988,60 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
ctx->noswap = true;
ctx->seen |= SHMEM_SEEN_NOSWAP;
break;
+ case Opt_quota:
+ if (fc->user_ns != &init_user_ns)
+ return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
+ ctx->seen |= SHMEM_SEEN_QUOTA;
+ ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
+ break;
+ case Opt_usrquota:
+ if (fc->user_ns != &init_user_ns)
+ return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
+ ctx->seen |= SHMEM_SEEN_QUOTA;
+ ctx->quota_types |= QTYPE_MASK_USR;
+ break;
+ case Opt_grpquota:
+ if (fc->user_ns != &init_user_ns)
+ return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
+ ctx->seen |= SHMEM_SEEN_QUOTA;
+ ctx->quota_types |= QTYPE_MASK_GRP;
+ break;
+ case Opt_usrquota_block_hardlimit:
+ size = memparse(param->string, &rest);
+ if (*rest || !size)
+ goto bad_value;
+ if (size > SHMEM_QUOTA_MAX_SPC_LIMIT)
+ return invalfc(fc,
+ "User quota block hardlimit too large.");
+ ctx->qlimits.usrquota_bhardlimit = size;
+ break;
+ case Opt_grpquota_block_hardlimit:
+ size = memparse(param->string, &rest);
+ if (*rest || !size)
+ goto bad_value;
+ if (size > SHMEM_QUOTA_MAX_SPC_LIMIT)
+ return invalfc(fc,
+ "Group quota block hardlimit too large.");
+ ctx->qlimits.grpquota_bhardlimit = size;
+ break;
+ case Opt_usrquota_inode_hardlimit:
+ size = memparse(param->string, &rest);
+ if (*rest || !size)
+ goto bad_value;
+ if (size > SHMEM_QUOTA_MAX_INO_LIMIT)
+ return invalfc(fc,
+ "User quota inode hardlimit too large.");
+ ctx->qlimits.usrquota_ihardlimit = size;
+ break;
+ case Opt_grpquota_inode_hardlimit:
+ size = memparse(param->string, &rest);
+ if (*rest || !size)
+ goto bad_value;
+ if (size > SHMEM_QUOTA_MAX_INO_LIMIT)
+ return invalfc(fc,
+ "Group quota inode hardlimit too large.");
+ ctx->qlimits.grpquota_ihardlimit = size;
+ break;
}
return 0;
@@ -3777,21 +4097,17 @@ static int shmem_parse_options(struct fs_context *fc, void *data)
/*
* Reconfigure a shmem filesystem.
- *
- * Note that we disallow change from limited->unlimited blocks/inodes while any
- * are in use; but we must separately disallow unlimited->limited, because in
- * that case we have no record of how much is already in use.
*/
static int shmem_reconfigure(struct fs_context *fc)
{
struct shmem_options *ctx = fc->fs_private;
struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
- unsigned long inodes;
+ unsigned long used_isp;
struct mempolicy *mpol = NULL;
const char *err;
raw_spin_lock(&sbinfo->stat_lock);
- inodes = sbinfo->max_inodes - sbinfo->free_inodes;
+ used_isp = sbinfo->max_inodes * BOGO_INODE_SIZE - sbinfo->free_ispace;
if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
if (!sbinfo->max_blocks) {
@@ -3809,7 +4125,7 @@ static int shmem_reconfigure(struct fs_context *fc)
err = "Cannot retroactively limit inodes";
goto out;
}
- if (ctx->inodes < inodes) {
+ if (ctx->inodes * BOGO_INODE_SIZE < used_isp) {
err = "Too few inodes for current use";
goto out;
}
@@ -3829,6 +4145,24 @@ static int shmem_reconfigure(struct fs_context *fc)
goto out;
}
+ if (ctx->seen & SHMEM_SEEN_QUOTA &&
+ !sb_any_quota_loaded(fc->root->d_sb)) {
+ err = "Cannot enable quota on remount";
+ goto out;
+ }
+
+#ifdef CONFIG_TMPFS_QUOTA
+#define CHANGED_LIMIT(name) \
+ (ctx->qlimits.name## hardlimit && \
+ (ctx->qlimits.name## hardlimit != sbinfo->qlimits.name## hardlimit))
+
+ if (CHANGED_LIMIT(usrquota_b) || CHANGED_LIMIT(usrquota_i) ||
+ CHANGED_LIMIT(grpquota_b) || CHANGED_LIMIT(grpquota_i)) {
+ err = "Cannot change global quota limit on remount";
+ goto out;
+ }
+#endif /* CONFIG_TMPFS_QUOTA */
+
if (ctx->seen & SHMEM_SEEN_HUGE)
sbinfo->huge = ctx->huge;
if (ctx->seen & SHMEM_SEEN_INUMS)
@@ -3837,7 +4171,7 @@ static int shmem_reconfigure(struct fs_context *fc)
sbinfo->max_blocks = ctx->blocks;
if (ctx->seen & SHMEM_SEEN_INODES) {
sbinfo->max_inodes = ctx->inodes;
- sbinfo->free_inodes = ctx->inodes - inodes;
+ sbinfo->free_ispace = ctx->inodes * BOGO_INODE_SIZE - used_isp;
}
/*
@@ -3920,6 +4254,9 @@ static void shmem_put_super(struct super_block *sb)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+#ifdef CONFIG_TMPFS_QUOTA
+ shmem_disable_quotas(sb);
+#endif
free_percpu(sbinfo->ino_batch);
percpu_counter_destroy(&sbinfo->used_blocks);
mpol_put(sbinfo->mpol);
@@ -3932,12 +4269,13 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
struct shmem_options *ctx = fc->fs_private;
struct inode *inode;
struct shmem_sb_info *sbinfo;
+ int error = -ENOMEM;
/* Round up to L1_CACHE_BYTES to resist false sharing */
sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
L1_CACHE_BYTES), GFP_KERNEL);
if (!sbinfo)
- return -ENOMEM;
+ return error;
sb->s_fs_info = sbinfo;
@@ -3964,7 +4302,8 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_flags |= SB_NOUSER;
#endif
sbinfo->max_blocks = ctx->blocks;
- sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes;
+ sbinfo->max_inodes = ctx->inodes;
+ sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE;
if (sb->s_flags & SB_KERNMOUNT) {
sbinfo->ino_batch = alloc_percpu(ino_t);
if (!sbinfo->ino_batch)
@@ -3998,10 +4337,27 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
#endif
uuid_gen(&sb->s_uuid);
+#ifdef CONFIG_TMPFS_QUOTA
+ if (ctx->seen & SHMEM_SEEN_QUOTA) {
+ sb->dq_op = &shmem_quota_operations;
+ sb->s_qcop = &dquot_quotactl_sysfile_ops;
+ sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+
+ /* Copy the default limits from ctx into sbinfo */
+ memcpy(&sbinfo->qlimits, &ctx->qlimits,
+ sizeof(struct shmem_quota_limits));
+
+ if (shmem_enable_quotas(sb, ctx->quota_types))
+ goto failed;
+ }
+#endif /* CONFIG_TMPFS_QUOTA */
+
inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL, S_IFDIR | sbinfo->mode, 0,
VM_NORESERVE);
- if (!inode)
+ if (IS_ERR(inode)) {
+ error = PTR_ERR(inode);
goto failed;
+ }
inode->i_uid = sbinfo->uid;
inode->i_gid = sbinfo->gid;
sb->s_root = d_make_root(inode);
@@ -4011,7 +4367,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
failed:
shmem_put_super(sb);
- return -ENOMEM;
+ return error;
}
static int shmem_get_tree(struct fs_context *fc)
@@ -4061,6 +4417,8 @@ static void shmem_destroy_inode(struct inode *inode)
{
if (S_ISREG(inode->i_mode))
mpol_free_shared_policy(&SHMEM_I(inode)->policy);
+ if (S_ISDIR(inode->i_mode))
+ simple_offset_destroy(shmem_get_offset_ctx(inode));
}
static void shmem_init_inode(void *foo)
@@ -4104,12 +4462,12 @@ EXPORT_SYMBOL(shmem_aops);
static const struct file_operations shmem_file_operations = {
.mmap = shmem_mmap,
- .open = generic_file_open,
+ .open = shmem_file_open,
.get_unmapped_area = shmem_get_unmapped_area,
#ifdef CONFIG_TMPFS
.llseek = shmem_file_llseek,
.read_iter = shmem_file_read_iter,
- .write_iter = generic_file_write_iter,
+ .write_iter = shmem_file_write_iter,
.fsync = noop_fsync,
.splice_read = shmem_file_splice_read,
.splice_write = iter_file_splice_write,
@@ -4141,6 +4499,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
.mknod = shmem_mknod,
.rename = shmem_rename2,
.tmpfile = shmem_tmpfile,
+ .get_offset_ctx = shmem_get_offset_ctx,
#endif
#ifdef CONFIG_TMPFS_XATTR
.listxattr = shmem_listxattr,
@@ -4172,6 +4531,9 @@ static const struct super_operations shmem_ops = {
.statfs = shmem_statfs,
.show_options = shmem_show_options,
#endif
+#ifdef CONFIG_TMPFS_QUOTA
+ .get_dquots = shmem_get_dquots,
+#endif
.evict_inode = shmem_evict_inode,
.drop_inode = generic_delete_inode,
.put_super = shmem_put_super,
@@ -4237,6 +4599,14 @@ void __init shmem_init(void)
shmem_init_inodecache();
+#ifdef CONFIG_TMPFS_QUOTA
+ error = register_quota_format(&shmem_quota_format);
+ if (error < 0) {
+ pr_err("Could not register quota format\n");
+ goto out3;
+ }
+#endif
+
error = register_filesystem(&shmem_fs_type);
if (error) {
pr_err("Could not register tmpfs\n");
@@ -4261,6 +4631,10 @@ void __init shmem_init(void)
out1:
unregister_filesystem(&shmem_fs_type);
out2:
+#ifdef CONFIG_TMPFS_QUOTA
+ unregister_quota_format(&shmem_quota_format);
+out3:
+#endif
shmem_destroy_inodecache();
shm_mnt = ERR_PTR(error);
}
@@ -4380,10 +4754,16 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
#define shmem_vm_ops generic_file_vm_ops
#define shmem_anon_vm_ops generic_file_vm_ops
#define shmem_file_operations ramfs_file_operations
-#define shmem_get_inode(idmap, sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
#define shmem_acct_size(flags, size) 0
#define shmem_unacct_size(flags, size) do {} while (0)
+static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb, struct inode *dir,
+ umode_t mode, dev_t dev, unsigned long flags)
+{
+ struct inode *inode = ramfs_get_inode(sb, dir, mode, dev);
+ return inode ? inode : ERR_PTR(-ENOSPC);
+}
+
#endif /* CONFIG_SHMEM */
/* common code */
@@ -4408,9 +4788,10 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, l
inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
S_IFREG | S_IRWXUGO, 0, flags);
- if (unlikely(!inode)) {
+
+ if (IS_ERR(inode)) {
shmem_unacct_size(flags, size);
- return ERR_PTR(-ENOSPC);
+ return ERR_CAST(inode);
}
inode->i_flags |= i_flags;
inode->i_size = size;