summaryrefslogtreecommitdiff
path: root/mm/kasan/common.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/kasan/common.c')
-rw-r--r--mm/kasan/common.c233
1 files changed, 233 insertions, 0 deletions
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 6814d6d6a023..df3371d5c572 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -36,6 +36,8 @@
#include <linux/bug.h>
#include <linux/uaccess.h>
+#include <asm/tlbflush.h>
+
#include "kasan.h"
#include "../slab.h"
@@ -590,6 +592,7 @@ void kasan_kfree_large(void *ptr, unsigned long ip)
/* The object will be poisoned by page_alloc. */
}
+#ifndef CONFIG_KASAN_VMALLOC
int kasan_module_alloc(void *addr, size_t size)
{
void *ret;
@@ -625,6 +628,7 @@ void kasan_free_shadow(const struct vm_struct *vm)
if (vm->flags & VM_KASAN)
vfree(kasan_mem_to_shadow(vm->addr));
}
+#endif
extern void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip);
@@ -744,3 +748,232 @@ static int __init kasan_memhotplug_init(void)
core_initcall(kasan_memhotplug_init);
#endif
+
+#ifdef CONFIG_KASAN_VMALLOC
+static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
+ void *unused)
+{
+ unsigned long page;
+ pte_t pte;
+
+ if (likely(!pte_none(*ptep)))
+ return 0;
+
+ page = __get_free_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE);
+ pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL);
+
+ spin_lock(&init_mm.page_table_lock);
+ if (likely(pte_none(*ptep))) {
+ set_pte_at(&init_mm, addr, ptep, pte);
+ page = 0;
+ }
+ spin_unlock(&init_mm.page_table_lock);
+ if (page)
+ free_page(page);
+ return 0;
+}
+
+int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
+{
+ unsigned long shadow_start, shadow_end;
+ int ret;
+
+ shadow_start = (unsigned long)kasan_mem_to_shadow(area->addr);
+ shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE);
+ shadow_end = (unsigned long)kasan_mem_to_shadow(area->addr +
+ area->size);
+ shadow_end = ALIGN(shadow_end, PAGE_SIZE);
+
+ ret = apply_to_page_range(&init_mm, shadow_start,
+ shadow_end - shadow_start,
+ kasan_populate_vmalloc_pte, NULL);
+ if (ret)
+ return ret;
+
+ flush_cache_vmap(shadow_start, shadow_end);
+
+ kasan_unpoison_shadow(area->addr, requested_size);
+
+ area->flags |= VM_KASAN;
+
+ /*
+ * We need to be careful about inter-cpu effects here. Consider:
+ *
+ * CPU#0 CPU#1
+ * WRITE_ONCE(p, vmalloc(100)); while (x = READ_ONCE(p)) ;
+ * p[99] = 1;
+ *
+ * With compiler instrumentation, that ends up looking like this:
+ *
+ * CPU#0 CPU#1
+ * // vmalloc() allocates memory
+ * // let a = area->addr
+ * // we reach kasan_populate_vmalloc
+ * // and call kasan_unpoison_shadow:
+ * STORE shadow(a), unpoison_val
+ * ...
+ * STORE shadow(a+99), unpoison_val x = LOAD p
+ * // rest of vmalloc process <data dependency>
+ * STORE p, a LOAD shadow(x+99)
+ *
+ * If there is no barrier between the end of unpoisioning the shadow
+ * and the store of the result to p, the stores could be committed
+ * in a different order by CPU#0, and CPU#1 could erroneously observe
+ * poison in the shadow.
+ *
+ * We need some sort of barrier between the stores.
+ *
+ * In the vmalloc() case, this is provided by a smp_wmb() in
+ * clear_vm_uninitialized_flag(). In the per-cpu allocator and in
+ * get_vm_area() and friends, the caller gets shadow allocated but
+ * doesn't have any pages mapped into the virtual address space that
+ * has been reserved. Mapping those pages in will involve taking and
+ * releasing a page-table lock, which will provide the barrier.
+ */
+
+ return 0;
+}
+
+/*
+ * Poison the shadow for a vmalloc region. Called as part of the
+ * freeing process at the time the region is freed.
+ */
+void kasan_poison_vmalloc(void *start, unsigned long size)
+{
+ size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
+ kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID);
+}
+
+static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
+ void *unused)
+{
+ unsigned long page;
+
+ page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT);
+
+ spin_lock(&init_mm.page_table_lock);
+
+ if (likely(!pte_none(*ptep))) {
+ pte_clear(&init_mm, addr, ptep);
+ free_page(page);
+ }
+ spin_unlock(&init_mm.page_table_lock);
+
+ return 0;
+}
+
+/*
+ * Release the backing for the vmalloc region [start, end), which
+ * lies within the free region [free_region_start, free_region_end).
+ *
+ * This can be run lazily, long after the region was freed. It runs
+ * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap
+ * infrastructure.
+ *
+ * How does this work?
+ * -------------------
+ *
+ * We have a region that is page aligned, labelled as A.
+ * That might not map onto the shadow in a way that is page-aligned:
+ *
+ * start end
+ * v v
+ * |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc
+ * -------- -------- -------- -------- --------
+ * | | | | |
+ * | | | /-------/ |
+ * \-------\|/------/ |/---------------/
+ * ||| ||
+ * |??AAAAAA|AAAAAAAA|AA??????| < shadow
+ * (1) (2) (3)
+ *
+ * First we align the start upwards and the end downwards, so that the
+ * shadow of the region aligns with shadow page boundaries. In the
+ * example, this gives us the shadow page (2). This is the shadow entirely
+ * covered by this allocation.
+ *
+ * Then we have the tricky bits. We want to know if we can free the
+ * partially covered shadow pages - (1) and (3) in the example. For this,
+ * we are given the start and end of the free region that contains this
+ * allocation. Extending our previous example, we could have:
+ *
+ * free_region_start free_region_end
+ * | start end |
+ * v v v v
+ * |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc
+ * -------- -------- -------- -------- --------
+ * | | | | |
+ * | | | /-------/ |
+ * \-------\|/------/ |/---------------/
+ * ||| ||
+ * |FFAAAAAA|AAAAAAAA|AAF?????| < shadow
+ * (1) (2) (3)
+ *
+ * Once again, we align the start of the free region up, and the end of
+ * the free region down so that the shadow is page aligned. So we can free
+ * page (1) - we know no allocation currently uses anything in that page,
+ * because all of it is in the vmalloc free region. But we cannot free
+ * page (3), because we can't be sure that the rest of it is unused.
+ *
+ * We only consider pages that contain part of the original region for
+ * freeing: we don't try to free other pages from the free region or we'd
+ * end up trying to free huge chunks of virtual address space.
+ *
+ * Concurrency
+ * -----------
+ *
+ * How do we know that we're not freeing a page that is simultaneously
+ * being used for a fresh allocation in kasan_populate_vmalloc(_pte)?
+ *
+ * We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running
+ * at the same time. While we run under free_vmap_area_lock, the population
+ * code does not.
+ *
+ * free_vmap_area_lock instead operates to ensure that the larger range
+ * [free_region_start, free_region_end) is safe: because __alloc_vmap_area and
+ * the per-cpu region-finding algorithm both run under free_vmap_area_lock,
+ * no space identified as free will become used while we are running. This
+ * means that so long as we are careful with alignment and only free shadow
+ * pages entirely covered by the free region, we will not run in to any
+ * trouble - any simultaneous allocations will be for disjoint regions.
+ */
+void kasan_release_vmalloc(unsigned long start, unsigned long end,
+ unsigned long free_region_start,
+ unsigned long free_region_end)
+{
+ void *shadow_start, *shadow_end;
+ unsigned long region_start, region_end;
+
+ region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
+ region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
+
+ free_region_start = ALIGN(free_region_start,
+ PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
+
+ if (start != region_start &&
+ free_region_start < region_start)
+ region_start -= PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE;
+
+ free_region_end = ALIGN_DOWN(free_region_end,
+ PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
+
+ if (end != region_end &&
+ free_region_end > region_end)
+ region_end += PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE;
+
+ shadow_start = kasan_mem_to_shadow((void *)region_start);
+ shadow_end = kasan_mem_to_shadow((void *)region_end);
+
+ if (shadow_end > shadow_start) {
+ apply_to_page_range(&init_mm, (unsigned long)shadow_start,
+ (unsigned long)(shadow_end - shadow_start),
+ kasan_depopulate_vmalloc_pte, NULL);
+ flush_tlb_kernel_range((unsigned long)shadow_start,
+ (unsigned long)shadow_end);
+ }
+}
+#endif