summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLiam R. Howlett <Liam.Howlett@Oracle.com>2022-09-06 19:48:45 +0000
committerAndrew Morton <akpm@linux-foundation.org>2022-09-26 19:46:14 -0700
commitd4af56c5c7c6781ca6ca8075e2cf5bc119ed33d1 (patch)
treee3c5b2600a2a9231ab6bbb85ede7b2be92f637f9 /include
parente15e06a8392321a19d8ebdbdd7643b7fa8874c17 (diff)
mm: start tracking VMAs with maple tree
Start tracking the VMAs with the new maple tree structure in parallel with the rb_tree. Add debug and trace events for maple tree operations and duplicate the rb_tree that is created on forks into the maple tree. The maple tree is added to the mm_struct including the mm_init struct, added support in required mm/mmap functions, added tracking in kernel/fork for process forking, and used to find the unmapped_area and checked against what the rbtree finds. This also moves the mmap_lock() in exit_mmap() since the oom reaper call does walk the VMAs. Otherwise lockdep will be unhappy if oom happens. When splitting a vma fails due to allocations of the maple tree nodes, the error path in __split_vma() calls new->vm_ops->close(new). The page accounting for hugetlb is actually in the close() operation, so it accounts for the removal of 1/2 of the VMA which was not adjusted. This results in a negative exit value. To avoid the negative charge, set vm_start = vm_end and vm_pgoff = 0. There is also a potential accounting issue in special mappings from insert_vm_struct() failing to allocate, so reverse the charge there in the failure scenario. Link: https://lkml.kernel.org/r/20220906194824.2110408-9-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Tested-by: Yu Zhao <yuzhao@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: SeongJae Park <sj@kernel.org> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/mm.h5
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/trace/events/mmap.h73
3 files changed, 81 insertions, 0 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7cc9ffc19e7f..896d04248e66 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2567,6 +2567,8 @@ extern bool arch_has_descending_max_zone_pfns(void);
/* nommu.c */
extern atomic_long_t mmap_pages_allocated;
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
+/* mmap.c */
+void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas);
/* interval_tree.c */
void vma_interval_tree_insert(struct vm_area_struct *node,
@@ -2630,6 +2632,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
bool *need_rmap_locks);
extern void exit_mmap(struct mm_struct *);
+void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas);
+void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas);
+
static inline int check_data_rlimit(unsigned long rlim,
unsigned long new,
unsigned long start,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e1797813cc2c..425bc5f7d477 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -9,6 +9,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/rbtree.h>
+#include <linux/maple_tree.h>
#include <linux/rwsem.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
@@ -486,6 +487,7 @@ struct kioctx_table;
struct mm_struct {
struct {
struct vm_area_struct *mmap; /* list of VMAs */
+ struct maple_tree mm_mt;
struct rb_root mm_rb;
u64 vmacache_seqnum; /* per-thread vmacache */
#ifdef CONFIG_MMU
@@ -697,6 +699,7 @@ struct mm_struct {
unsigned long cpu_bitmap[];
};
+#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN)
extern struct mm_struct init_mm;
/* Pointer magic because the dynamic array size confuses some compilers. */
diff --git a/include/trace/events/mmap.h b/include/trace/events/mmap.h
index 4661f7ba07c0..216de5f03621 100644
--- a/include/trace/events/mmap.h
+++ b/include/trace/events/mmap.h
@@ -42,6 +42,79 @@ TRACE_EVENT(vm_unmapped_area,
__entry->low_limit, __entry->high_limit, __entry->align_mask,
__entry->align_offset)
);
+
+TRACE_EVENT(vma_mas_szero,
+ TP_PROTO(struct maple_tree *mt, unsigned long start,
+ unsigned long end),
+
+ TP_ARGS(mt, start, end),
+
+ TP_STRUCT__entry(
+ __field(struct maple_tree *, mt)
+ __field(unsigned long, start)
+ __field(unsigned long, end)
+ ),
+
+ TP_fast_assign(
+ __entry->mt = mt;
+ __entry->start = start;
+ __entry->end = end;
+ ),
+
+ TP_printk("mt_mod %p, (NULL), SNULL, %lu, %lu,",
+ __entry->mt,
+ (unsigned long) __entry->start,
+ (unsigned long) __entry->end
+ )
+);
+
+TRACE_EVENT(vma_store,
+ TP_PROTO(struct maple_tree *mt, struct vm_area_struct *vma),
+
+ TP_ARGS(mt, vma),
+
+ TP_STRUCT__entry(
+ __field(struct maple_tree *, mt)
+ __field(struct vm_area_struct *, vma)
+ __field(unsigned long, vm_start)
+ __field(unsigned long, vm_end)
+ ),
+
+ TP_fast_assign(
+ __entry->mt = mt;
+ __entry->vma = vma;
+ __entry->vm_start = vma->vm_start;
+ __entry->vm_end = vma->vm_end - 1;
+ ),
+
+ TP_printk("mt_mod %p, (%p), STORE, %lu, %lu,",
+ __entry->mt, __entry->vma,
+ (unsigned long) __entry->vm_start,
+ (unsigned long) __entry->vm_end
+ )
+);
+
+
+TRACE_EVENT(exit_mmap,
+ TP_PROTO(struct mm_struct *mm),
+
+ TP_ARGS(mm),
+
+ TP_STRUCT__entry(
+ __field(struct mm_struct *, mm)
+ __field(struct maple_tree *, mt)
+ ),
+
+ TP_fast_assign(
+ __entry->mm = mm;
+ __entry->mt = &mm->mm_mt;
+ ),
+
+ TP_printk("mt_mod %p, DESTROY\n",
+ __entry->mt
+ )
+);
+
#endif
/* This part must be outside protection */