summaryrefslogtreecommitdiff
path: root/fs/hugetlbfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/hugetlbfs/inode.c')
-rw-r--r--fs/hugetlbfs/inode.c184
1 files changed, 56 insertions, 128 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6502c7e776d1..e4de5425838d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -39,8 +39,11 @@
#include <linux/uaccess.h>
#include <linux/sched/mm.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/hugetlbfs.h>
+
static const struct address_space_operations hugetlbfs_aops;
-const struct file_operations hugetlbfs_file_operations;
+static const struct file_operations hugetlbfs_file_operations;
static const struct inode_operations hugetlbfs_dir_inode_operations;
static const struct inode_operations hugetlbfs_inode_operations;
@@ -73,13 +76,13 @@ enum hugetlb_param {
};
static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_gid ("gid", Opt_gid),
fsparam_string("min_size", Opt_min_size),
fsparam_u32oct("mode", Opt_mode),
fsparam_string("nr_inodes", Opt_nr_inodes),
fsparam_string("pagesize", Opt_pagesize),
fsparam_string("size", Opt_size),
- fsparam_u32 ("uid", Opt_uid),
+ fsparam_uid ("uid", Opt_uid),
{}
};
@@ -96,7 +99,6 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
- struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
loff_t len, vma_len;
int ret;
struct hstate *h = hstate_file(file);
@@ -113,10 +115,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND);
vma->vm_ops = &hugetlb_vm_ops;
- ret = seal_check_write(info->seals, vma);
- if (ret)
- return ret;
-
/*
* page based offset in vm_pgoff could be sufficiently large to
* overflow a loff_t when converted to byte offset. This can
@@ -171,119 +169,45 @@ out:
* Called under mmap_write_lock(mm).
*/
-static unsigned long
-hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
-{
- struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
-
- info.flags = 0;
- info.length = len;
- info.low_limit = current->mm->mmap_base;
- info.high_limit = arch_get_mmap_end(addr, len, flags);
- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
- return vm_unmapped_area(&info);
-}
-
-static unsigned long
-hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
-{
- struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
-
- info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- info.length = len;
- info.low_limit = PAGE_SIZE;
- info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
- addr = vm_unmapped_area(&info);
-
- /*
- * A failed mmap() very likely causes application failure,
- * so fall back to the bottom-up function here. This scenario
- * can happen with large stack limits and large mmap()
- * allocations.
- */
- if (unlikely(offset_in_page(addr))) {
- VM_BUG_ON(addr != -ENOMEM);
- info.flags = 0;
- info.low_limit = current->mm->mmap_base;
- info.high_limit = arch_get_mmap_end(addr, len, flags);
- addr = vm_unmapped_area(&info);
- }
-
- return addr;
-}
-
unsigned long
-generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff,
- unsigned long flags)
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
+ unsigned long addr0 = 0;
struct hstate *h = hstate_file(file);
- const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
if (len & ~huge_page_mask(h))
return -EINVAL;
- if (len > TASK_SIZE)
- return -ENOMEM;
-
if (flags & MAP_FIXED) {
+ if (addr & ~huge_page_mask(h))
+ return -EINVAL;
if (prepare_hugepage_range(file, addr, len))
return -EINVAL;
- return addr;
- }
-
- if (addr) {
- addr = ALIGN(addr, huge_page_size(h));
- vma = find_vma(mm, addr);
- if (mmap_end - len >= addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
- return addr;
}
+ if (addr)
+ addr0 = ALIGN(addr, huge_page_size(h));
- /*
- * Use mm->get_unmapped_area value as a hint to use topdown routine.
- * If architectures have special needs, they should define their own
- * version of hugetlb_get_unmapped_area.
- */
- if (mm->get_unmapped_area == arch_get_unmapped_area_topdown)
- return hugetlb_get_unmapped_area_topdown(file, addr, len,
- pgoff, flags);
- return hugetlb_get_unmapped_area_bottomup(file, addr, len,
- pgoff, flags);
+ return mm_get_unmapped_area_vmflags(current->mm, file, addr0, len, pgoff,
+ flags, 0);
}
-#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-static unsigned long
-hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff,
- unsigned long flags)
-{
- return generic_hugetlb_get_unmapped_area(file, addr, len, pgoff, flags);
-}
-#endif
-
/*
- * Someone wants to read @bytes from a HWPOISON hugetlb @page from @offset.
+ * Someone wants to read @bytes from a HWPOISON hugetlb @folio from @offset.
* Returns the maximum number of bytes one can read without touching the 1st raw
- * HWPOISON subpage.
+ * HWPOISON page.
*
* The implementation borrows the iteration logic from copy_page_to_iter*.
*/
-static size_t adjust_range_hwpoison(struct page *page, size_t offset, size_t bytes)
+static size_t adjust_range_hwpoison(struct folio *folio, size_t offset,
+ size_t bytes)
{
+ struct page *page;
size_t n = 0;
size_t res = 0;
- /* First subpage to start the loop. */
- page = nth_page(page, offset / PAGE_SIZE);
+ /* First page to start the loop. */
+ page = folio_page(folio, offset / PAGE_SIZE);
offset %= PAGE_SIZE;
while (1) {
if (is_raw_hwpoison_page_in_hugepage(page))
@@ -356,10 +280,10 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
else {
/*
* Adjust how many bytes safe to read without
- * touching the 1st raw HWPOISON subpage after
+ * touching the 1st raw HWPOISON page after
* offset.
*/
- want = adjust_range_hwpoison(&folio->page, offset, nr);
+ want = adjust_range_hwpoison(folio, offset, nr);
if (want == 0) {
folio_put(folio);
retval = -EIO;
@@ -390,14 +314,14 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
static int hugetlbfs_write_begin(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
return -EINVAL;
}
static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
BUG();
return -EINVAL;
@@ -416,8 +340,8 @@ static void hugetlb_delete_from_page_cache(struct folio *folio)
* mutex for the page in the mapping. So, we can not race with page being
* faulted into the vma.
*/
-static bool hugetlb_vma_maps_page(struct vm_area_struct *vma,
- unsigned long addr, struct page *page)
+static bool hugetlb_vma_maps_pfn(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long pfn)
{
pte_t *ptep, pte;
@@ -425,11 +349,11 @@ static bool hugetlb_vma_maps_page(struct vm_area_struct *vma,
if (!ptep)
return false;
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(vma->vm_mm, addr, ptep);
if (huge_pte_none(pte) || !pte_present(pte))
return false;
- if (pte_page(pte) == page)
+ if (pte_pfn(pte) == pfn)
return true;
return false;
@@ -474,7 +398,7 @@ static void hugetlb_unmap_file_folio(struct hstate *h,
{
struct rb_root_cached *root = &mapping->i_mmap;
struct hugetlb_vma_lock *vma_lock;
- struct page *page = &folio->page;
+ unsigned long pfn = folio_pfn(folio);
struct vm_area_struct *vma;
unsigned long v_start;
unsigned long v_end;
@@ -490,7 +414,7 @@ retry:
v_start = vma_offset_start(vma, start);
v_end = vma_offset_end(vma, end);
- if (!hugetlb_vma_maps_page(vma, v_start, page))
+ if (!hugetlb_vma_maps_pfn(vma, v_start, pfn))
continue;
if (!hugetlb_vma_trylock_write(vma)) {
@@ -540,7 +464,7 @@ retry:
*/
v_start = vma_offset_start(vma, start);
v_end = vma_offset_end(vma, end);
- if (hugetlb_vma_maps_page(vma, v_start, page))
+ if (hugetlb_vma_maps_pfn(vma, v_start, pfn))
unmap_hugepage_range(vma, v_start, v_end, NULL,
ZAP_FLAG_DROP_MARKER);
@@ -689,6 +613,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
{
struct resv_map *resv_map;
+ trace_hugetlbfs_evict_inode(inode);
remove_inode_hugepages(inode, 0, LLONG_MAX);
/*
@@ -816,8 +741,10 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
- if (mode & FALLOC_FL_PUNCH_HOLE)
- return hugetlbfs_punch_hole(inode, offset, len);
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ error = hugetlbfs_punch_hole(inode, offset, len);
+ goto out_nolock;
+ }
/*
* Default preallocate case.
@@ -889,13 +816,13 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
* folios in these areas, we need to consume the reserves
* to keep reservation accounting consistent.
*/
- folio = alloc_hugetlb_folio(&pseudo_vma, addr, 0);
+ folio = alloc_hugetlb_folio(&pseudo_vma, addr, false);
if (IS_ERR(folio)) {
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
error = PTR_ERR(folio);
goto out;
}
- clear_huge_page(&folio->page, addr, pages_per_huge_page(h));
+ folio_zero_user(folio, addr);
__folio_mark_uptodate(folio);
error = hugetlb_add_to_page_cache(folio, mapping, index);
if (unlikely(error)) {
@@ -921,6 +848,9 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
inode_set_ctime_current(inode);
out:
inode_unlock(inode);
+
+out_nolock:
+ trace_hugetlbfs_fallocate(inode, mode, offset, len, error);
return error;
}
@@ -937,6 +867,8 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,
if (error)
return error;
+ trace_hugetlbfs_setattr(inode, dentry, attr);
+
if (ia_valid & ATTR_SIZE) {
loff_t oldsize = inode->i_size;
loff_t newsize = attr->ia_size;
@@ -1035,6 +967,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
break;
}
lockdep_annotate_inode_mutex_key(inode);
+ trace_hugetlbfs_alloc_inode(inode, dir, mode);
} else {
if (resv_map)
kref_put(&resv_map->refs, resv_map_release);
@@ -1060,14 +993,14 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
return 0;
}
-static int hugetlbfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+static struct dentry *hugetlbfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
int retval = hugetlbfs_mknod(idmap, dir, dentry,
mode | S_IFDIR, 0);
if (!retval)
inc_nlink(dir);
- return retval;
+ return ERR_PTR(retval);
}
static int hugetlbfs_create(struct mnt_idmap *idmap,
@@ -1131,10 +1064,7 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping,
hugetlb_set_folio_subpool(src, NULL);
}
- if (mode != MIGRATE_SYNC_NO_COPY)
- folio_migrate_copy(dst, src);
- else
- folio_migrate_flags(dst, src);
+ folio_migrate_flags(dst, src);
return MIGRATEPAGE_SUCCESS;
}
@@ -1277,6 +1207,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
static void hugetlbfs_free_inode(struct inode *inode)
{
+ trace_hugetlbfs_free_inode(inode);
kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
}
@@ -1301,13 +1232,14 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
}
-const struct file_operations hugetlbfs_file_operations = {
+static const struct file_operations hugetlbfs_file_operations = {
.read_iter = hugetlbfs_read_iter,
.mmap = hugetlbfs_file_mmap,
.fsync = noop_fsync,
.get_unmapped_area = hugetlb_get_unmapped_area,
.llseek = default_llseek,
.fallocate = hugetlbfs_fallocate,
+ .fop_flags = FOP_HUGE_PAGES,
};
static const struct inode_operations hugetlbfs_dir_inode_operations = {
@@ -1378,15 +1310,11 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
switch (opt) {
case Opt_uid:
- ctx->uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(ctx->uid))
- goto bad_val;
+ ctx->uid = result.uid;
return 0;
case Opt_gid:
- ctx->gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(ctx->gid))
- goto bad_val;
+ ctx->gid = result.gid;
return 0;
case Opt_mode: