diff options
Diffstat (limited to 'fs/hugetlbfs/inode.c')
-rw-r--r-- | fs/hugetlbfs/inode.c | 173 |
1 files changed, 52 insertions, 121 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d746866ae3b6..0fc179a59830 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -39,8 +39,11 @@ #include <linux/uaccess.h> #include <linux/sched/mm.h> +#define CREATE_TRACE_POINTS +#include <trace/events/hugetlbfs.h> + static const struct address_space_operations hugetlbfs_aops; -const struct file_operations hugetlbfs_file_operations; +static const struct file_operations hugetlbfs_file_operations; static const struct inode_operations hugetlbfs_dir_inode_operations; static const struct inode_operations hugetlbfs_inode_operations; @@ -73,13 +76,13 @@ enum hugetlb_param { }; static const struct fs_parameter_spec hugetlb_fs_parameters[] = { - fsparam_u32 ("gid", Opt_gid), + fsparam_gid ("gid", Opt_gid), fsparam_string("min_size", Opt_min_size), fsparam_u32oct("mode", Opt_mode), fsparam_string("nr_inodes", Opt_nr_inodes), fsparam_string("pagesize", Opt_pagesize), fsparam_string("size", Opt_size), - fsparam_u32 ("uid", Opt_uid), + fsparam_uid ("uid", Opt_uid), {} }; @@ -96,7 +99,6 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = { static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); - struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); loff_t len, vma_len; int ret; struct hstate *h = hstate_file(file); @@ -113,10 +115,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND); vma->vm_ops = &hugetlb_vm_ops; - ret = seal_check_write(info->seals, vma); - if (ret) - return ret; - /* * page based offset in vm_pgoff could be sufficiently large to * overflow a loff_t when converted to byte offset. This can @@ -171,105 +169,29 @@ out: * Called under mmap_write_lock(mm). */ -static unsigned long -hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct vm_unmapped_area_info info; - - info.flags = 0; - info.length = len; - info.low_limit = current->mm->mmap_base; - info.high_limit = arch_get_mmap_end(addr, len, flags); - info.align_mask = PAGE_MASK & ~huge_page_mask(h); - info.align_offset = 0; - return vm_unmapped_area(&info); -} - -static unsigned long -hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct vm_unmapped_area_info info; - - info.flags = VM_UNMAPPED_AREA_TOPDOWN; - info.length = len; - info.low_limit = PAGE_SIZE; - info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base); - info.align_mask = PAGE_MASK & ~huge_page_mask(h); - info.align_offset = 0; - addr = vm_unmapped_area(&info); - - /* - * A failed mmap() very likely causes application failure, - * so fall back to the bottom-up function here. This scenario - * can happen with large stack limits and large mmap() - * allocations. - */ - if (unlikely(offset_in_page(addr))) { - VM_BUG_ON(addr != -ENOMEM); - info.flags = 0; - info.low_limit = current->mm->mmap_base; - info.high_limit = arch_get_mmap_end(addr, len, flags); - addr = vm_unmapped_area(&info); - } - - return addr; -} - unsigned long -generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags) +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; + unsigned long addr0 = 0; struct hstate *h = hstate_file(file); - const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags); if (len & ~huge_page_mask(h)) return -EINVAL; - if (len > TASK_SIZE) - return -ENOMEM; - if (flags & MAP_FIXED) { + if (addr & ~huge_page_mask(h)) + return -EINVAL; if (prepare_hugepage_range(file, addr, len)) return -EINVAL; - return addr; - } - - if (addr) { - addr = ALIGN(addr, huge_page_size(h)); - vma = find_vma(mm, addr); - if (mmap_end - len >= addr && - (!vma || addr + len <= vm_start_gap(vma))) - return addr; } + if (addr) + addr0 = ALIGN(addr, huge_page_size(h)); - /* - * Use mm->get_unmapped_area value as a hint to use topdown routine. - * If architectures have special needs, they should define their own - * version of hugetlb_get_unmapped_area. - */ - if (mm->get_unmapped_area == arch_get_unmapped_area_topdown) - return hugetlb_get_unmapped_area_topdown(file, addr, len, - pgoff, flags); - return hugetlb_get_unmapped_area_bottomup(file, addr, len, - pgoff, flags); + return mm_get_unmapped_area_vmflags(current->mm, file, addr0, len, pgoff, + flags, 0); } -#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA -static unsigned long -hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags) -{ - return generic_hugetlb_get_unmapped_area(file, addr, len, pgoff, flags); -} -#endif - /* * Someone wants to read @bytes from a HWPOISON hugetlb @page from @offset. * Returns the maximum number of bytes one can read without touching the 1st raw @@ -390,14 +312,14 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) static int hugetlbfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata) + struct folio **foliop, void **fsdata) { return -EINVAL; } static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { BUG(); return -EINVAL; @@ -425,7 +347,7 @@ static bool hugetlb_vma_maps_page(struct vm_area_struct *vma, if (!ptep) return false; - pte = huge_ptep_get(ptep); + pte = huge_ptep_get(vma->vm_mm, addr, ptep); if (huge_pte_none(pte) || !pte_present(pte)) return false; @@ -689,6 +611,7 @@ static void hugetlbfs_evict_inode(struct inode *inode) { struct resv_map *resv_map; + trace_hugetlbfs_evict_inode(inode); remove_inode_hugepages(inode, 0, LLONG_MAX); /* @@ -816,8 +739,10 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; - if (mode & FALLOC_FL_PUNCH_HOLE) - return hugetlbfs_punch_hole(inode, offset, len); + if (mode & FALLOC_FL_PUNCH_HOLE) { + error = hugetlbfs_punch_hole(inode, offset, len); + goto out_nolock; + } /* * Default preallocate case. @@ -889,13 +814,13 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, * folios in these areas, we need to consume the reserves * to keep reservation accounting consistent. */ - folio = alloc_hugetlb_folio(&pseudo_vma, addr, 0); + folio = alloc_hugetlb_folio(&pseudo_vma, addr, false); if (IS_ERR(folio)) { mutex_unlock(&hugetlb_fault_mutex_table[hash]); error = PTR_ERR(folio); goto out; } - clear_huge_page(&folio->page, addr, pages_per_huge_page(h)); + folio_zero_user(folio, addr); __folio_mark_uptodate(folio); error = hugetlb_add_to_page_cache(folio, mapping, index); if (unlikely(error)) { @@ -921,6 +846,9 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, inode_set_ctime_current(inode); out: inode_unlock(inode); + +out_nolock: + trace_hugetlbfs_fallocate(inode, mode, offset, len, error); return error; } @@ -933,10 +861,12 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap, unsigned int ia_valid = attr->ia_valid; struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); - error = setattr_prepare(&nop_mnt_idmap, dentry, attr); + error = setattr_prepare(idmap, dentry, attr); if (error) return error; + trace_hugetlbfs_setattr(inode, dentry, attr); + if (ia_valid & ATTR_SIZE) { loff_t oldsize = inode->i_size; loff_t newsize = attr->ia_size; @@ -950,7 +880,7 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap, hugetlb_vmtruncate(inode, newsize); } - setattr_copy(&nop_mnt_idmap, inode, attr); + setattr_copy(idmap, inode, attr); mark_inode_dirty(inode); return 0; } @@ -985,6 +915,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, static struct lock_class_key hugetlbfs_i_mmap_rwsem_key; static struct inode *hugetlbfs_get_inode(struct super_block *sb, + struct mnt_idmap *idmap, struct inode *dir, umode_t mode, dev_t dev) { @@ -1006,7 +937,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); inode->i_ino = get_next_ino(); - inode_init_owner(&nop_mnt_idmap, inode, dir, mode); + inode_init_owner(idmap, inode, dir, mode); lockdep_set_class(&inode->i_mapping->i_mmap_rwsem, &hugetlbfs_i_mmap_rwsem_key); inode->i_mapping->a_ops = &hugetlbfs_aops; @@ -1034,6 +965,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, break; } lockdep_annotate_inode_mutex_key(inode); + trace_hugetlbfs_alloc_inode(inode, dir, mode); } else { if (resv_map) kref_put(&resv_map->refs, resv_map_release); @@ -1050,7 +982,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir, { struct inode *inode; - inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev); + inode = hugetlbfs_get_inode(dir->i_sb, idmap, dir, mode, dev); if (!inode) return -ENOSPC; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); @@ -1062,7 +994,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir, static int hugetlbfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { - int retval = hugetlbfs_mknod(&nop_mnt_idmap, dir, dentry, + int retval = hugetlbfs_mknod(idmap, dir, dentry, mode | S_IFDIR, 0); if (!retval) inc_nlink(dir); @@ -1073,7 +1005,7 @@ static int hugetlbfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - return hugetlbfs_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFREG, 0); + return hugetlbfs_mknod(idmap, dir, dentry, mode | S_IFREG, 0); } static int hugetlbfs_tmpfile(struct mnt_idmap *idmap, @@ -1082,7 +1014,7 @@ static int hugetlbfs_tmpfile(struct mnt_idmap *idmap, { struct inode *inode; - inode = hugetlbfs_get_inode(dir->i_sb, dir, mode | S_IFREG, 0); + inode = hugetlbfs_get_inode(dir->i_sb, idmap, dir, mode | S_IFREG, 0); if (!inode) return -ENOSPC; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); @@ -1094,10 +1026,11 @@ static int hugetlbfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { + const umode_t mode = S_IFLNK|S_IRWXUGO; struct inode *inode; int error = -ENOSPC; - inode = hugetlbfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0); + inode = hugetlbfs_get_inode(dir->i_sb, idmap, dir, mode, 0); if (inode) { int l = strlen(symname)+1; error = page_symlink(inode, symname, l); @@ -1129,10 +1062,7 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping, hugetlb_set_folio_subpool(src, NULL); } - if (mode != MIGRATE_SYNC_NO_COPY) - folio_migrate_copy(dst, src); - else - folio_migrate_flags(dst, src); + folio_migrate_flags(dst, src); return MIGRATEPAGE_SUCCESS; } @@ -1275,6 +1205,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) static void hugetlbfs_free_inode(struct inode *inode) { + trace_hugetlbfs_free_inode(inode); kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); } @@ -1299,13 +1230,14 @@ static void init_once(void *foo) inode_init_once(&ei->vfs_inode); } -const struct file_operations hugetlbfs_file_operations = { +static const struct file_operations hugetlbfs_file_operations = { .read_iter = hugetlbfs_read_iter, .mmap = hugetlbfs_file_mmap, .fsync = noop_fsync, .get_unmapped_area = hugetlb_get_unmapped_area, .llseek = default_llseek, .fallocate = hugetlbfs_fallocate, + .fop_flags = FOP_HUGE_PAGES, }; static const struct inode_operations hugetlbfs_dir_inode_operations = { @@ -1376,15 +1308,11 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par switch (opt) { case Opt_uid: - ctx->uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(ctx->uid)) - goto bad_val; + ctx->uid = result.uid; return 0; case Opt_gid: - ctx->gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(ctx->gid)) - goto bad_val; + ctx->gid = result.gid; return 0; case Opt_mode: @@ -1566,6 +1494,7 @@ static struct file_system_type hugetlbfs_fs_type = { .init_fs_context = hugetlbfs_init_fs_context, .parameters = hugetlb_fs_parameters, .kill_sb = kill_litter_super, + .fs_flags = FS_ALLOW_IDMAP, }; static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; @@ -1619,7 +1548,9 @@ struct file *hugetlb_file_setup(const char *name, size_t size, } file = ERR_PTR(-ENOSPC); - inode = hugetlbfs_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0); + /* hugetlbfs_vfsmount[] mounts do not use idmapped mounts. */ + inode = hugetlbfs_get_inode(mnt->mnt_sb, &nop_mnt_idmap, NULL, + S_IFREG | S_IRWXUGO, 0); if (!inode) goto out; if (creat_flags == HUGETLB_SHMFS_INODE) |