diff options
Diffstat (limited to 'mm/highmem.c')
| -rw-r--r-- | mm/highmem.c | 509 |
1 files changed, 455 insertions, 54 deletions
diff --git a/mm/highmem.c b/mm/highmem.c index b32b70cdaed6..b5c8e4c2d5d4 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * High memory handling common code and variables. * @@ -22,17 +23,23 @@ #include <linux/bio.h> #include <linux/pagemap.h> #include <linux/mempool.h> -#include <linux/blkdev.h> #include <linux/init.h> #include <linux/hash.h> #include <linux/highmem.h> #include <linux/kgdb.h> #include <asm/tlbflush.h> +#include <linux/vmalloc.h> +#ifdef CONFIG_KMAP_LOCAL +static inline int kmap_local_calc_idx(int idx) +{ + return idx + KM_MAX_IDX * smp_processor_id(); +} -#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) -DEFINE_PER_CPU(int, __kmap_atomic_idx); +#ifndef arch_kmap_local_map_idx +#define arch_kmap_local_map_idx(idx, pfn) kmap_local_calc_idx(idx) #endif +#endif /* CONFIG_KMAP_LOCAL */ /* * Virtual_count is not a pure "count". @@ -44,36 +51,97 @@ DEFINE_PER_CPU(int, __kmap_atomic_idx); */ #ifdef CONFIG_HIGHMEM -unsigned long totalhigh_pages __read_mostly; -EXPORT_SYMBOL(totalhigh_pages); +/* + * Architecture with aliasing data cache may define the following family of + * helper functions in its asm/highmem.h to control cache color of virtual + * addresses where physical memory pages are mapped by kmap. + */ +#ifndef get_pkmap_color +/* + * Determine color of virtual address where the page should be mapped. + */ +static inline unsigned int get_pkmap_color(const struct page *page) +{ + return 0; +} +#define get_pkmap_color get_pkmap_color -EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx); +/* + * Get next index for mapping inside PKMAP region for page with given color. + */ +static inline unsigned int get_next_pkmap_nr(unsigned int color) +{ + static unsigned int last_pkmap_nr; -unsigned int nr_free_highpages (void) + last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; + return last_pkmap_nr; +} + +/* + * Determine if page index inside PKMAP region (pkmap_nr) of given color + * has wrapped around PKMAP region end. When this happens an attempt to + * flush all unused PKMAP slots is made. + */ +static inline int no_more_pkmaps(unsigned int pkmap_nr, unsigned int color) +{ + return pkmap_nr == 0; +} + +/* + * Get the number of PKMAP entries of the given color. If no free slot is + * found after checking that many entries, kmap will sleep waiting for + * someone to call kunmap and free PKMAP slot. + */ +static inline int get_pkmap_entries_count(unsigned int color) { - pg_data_t *pgdat; - unsigned int pages = 0; + return LAST_PKMAP; +} - for_each_online_pgdat(pgdat) { - pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM], - NR_FREE_PAGES); - if (zone_movable_is_highmem()) - pages += zone_page_state( - &pgdat->node_zones[ZONE_MOVABLE], - NR_FREE_PAGES); +/* + * Get head of a wait queue for PKMAP entries of the given color. + * Wait queues for different mapping colors should be independent to avoid + * unnecessary wakeups caused by freeing of slots of other colors. + */ +static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color) +{ + static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); + + return &pkmap_map_wait; +} +#endif + +unsigned long __nr_free_highpages(void) +{ + unsigned long pages = 0; + struct zone *zone; + + for_each_populated_zone(zone) { + if (is_highmem(zone)) + pages += zone_page_state(zone, NR_FREE_PAGES); + } + + return pages; +} + +unsigned long __totalhigh_pages(void) +{ + unsigned long pages = 0; + struct zone *zone; + + for_each_populated_zone(zone) { + if (is_highmem(zone)) + pages += zone_managed_pages(zone); } return pages; } +EXPORT_SYMBOL(__totalhigh_pages); static int pkmap_count[LAST_PKMAP]; -static unsigned int last_pkmap_nr; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); -pte_t * pkmap_page_table; - -static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); +pte_t *pkmap_page_table; /* * Most architectures have no use for kmap_high_get(), so let's abstract @@ -94,18 +162,36 @@ static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); do { spin_unlock(&kmap_lock); (void)(flags); } while (0) #endif -struct page *kmap_to_page(void *vaddr) +struct page *__kmap_to_page(void *vaddr) { + unsigned long base = (unsigned long) vaddr & PAGE_MASK; + struct kmap_ctrl *kctrl = ¤t->kmap_ctrl; unsigned long addr = (unsigned long)vaddr; + int i; + + /* kmap() mappings */ + if (WARN_ON_ONCE(addr >= PKMAP_ADDR(0) && + addr < PKMAP_ADDR(LAST_PKMAP))) + return pte_page(ptep_get(&pkmap_page_table[PKMAP_NR(addr)])); + + /* kmap_local_page() mappings */ + if (WARN_ON_ONCE(base >= __fix_to_virt(FIX_KMAP_END) && + base < __fix_to_virt(FIX_KMAP_BEGIN))) { + for (i = 0; i < kctrl->idx; i++) { + unsigned long base_addr; + int idx; - if (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) { - int i = PKMAP_NR(addr); - return pte_page(pkmap_page_table[i]); + idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); + base_addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + + if (base_addr == base) + return pte_page(kctrl->pteval[i]); + } } - return virt_to_page(addr); + return virt_to_page(vaddr); } -EXPORT_SYMBOL(kmap_to_page); +EXPORT_SYMBOL(__kmap_to_page); static void flush_all_zero_pkmaps(void) { @@ -116,6 +202,7 @@ static void flush_all_zero_pkmaps(void) for (i = 0; i < LAST_PKMAP; i++) { struct page *page; + pte_t ptent; /* * zero means we don't have anything to do, @@ -128,7 +215,8 @@ static void flush_all_zero_pkmaps(void) pkmap_count[i] = 0; /* sanity check */ - BUG_ON(pte_none(pkmap_page_table[i])); + ptent = ptep_get(&pkmap_page_table[i]); + BUG_ON(pte_none(ptent)); /* * Don't need an atomic fetch-and-clear op here; @@ -137,7 +225,7 @@ static void flush_all_zero_pkmaps(void) * getting the kmap_lock (which is held here). * So no dangers, even with speculative execution. */ - page = pte_page(pkmap_page_table[i]); + page = pte_page(ptent); pte_clear(&init_mm, PKMAP_ADDR(i), &pkmap_page_table[i]); set_page_address(page, NULL); @@ -147,10 +235,7 @@ static void flush_all_zero_pkmaps(void) flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); } -/** - * kmap_flush_unused - flush all unused kmap mappings in order to remove stray mappings - */ -void kmap_flush_unused(void) +void __kmap_flush_unused(void) { lock_kmap(); flush_all_zero_pkmaps(); @@ -161,15 +246,17 @@ static inline unsigned long map_new_virtual(struct page *page) { unsigned long vaddr; int count; + unsigned int last_pkmap_nr; + unsigned int color = get_pkmap_color(page); start: - count = LAST_PKMAP; + count = get_pkmap_entries_count(color); /* Find an empty entry */ for (;;) { - last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; - if (!last_pkmap_nr) { + last_pkmap_nr = get_next_pkmap_nr(color); + if (no_more_pkmaps(last_pkmap_nr, color)) { flush_all_zero_pkmaps(); - count = LAST_PKMAP; + count = get_pkmap_entries_count(color); } if (!pkmap_count[last_pkmap_nr]) break; /* Found a usable entry */ @@ -181,12 +268,14 @@ start: */ { DECLARE_WAITQUEUE(wait, current); + wait_queue_head_t *pkmap_map_wait = + get_pkmap_wait_queue_head(color); __set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&pkmap_map_wait, &wait); + add_wait_queue(pkmap_map_wait, &wait); unlock_kmap(); schedule(); - remove_wait_queue(&pkmap_map_wait, &wait); + remove_wait_queue(pkmap_map_wait, &wait); lock_kmap(); /* Somebody else might have mapped it while we slept */ @@ -230,9 +319,8 @@ void *kmap_high(struct page *page) pkmap_count[PKMAP_NR(vaddr)]++; BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2); unlock_kmap(); - return (void*) vaddr; + return (void *) vaddr; } - EXPORT_SYMBOL(kmap_high); #ifdef ARCH_NEEDS_KMAP_HIGH_GET @@ -246,7 +334,7 @@ EXPORT_SYMBOL(kmap_high); * * This can be called from any context. */ -void *kmap_high_get(struct page *page) +void *kmap_high_get(const struct page *page) { unsigned long vaddr, flags; @@ -257,7 +345,7 @@ void *kmap_high_get(struct page *page) pkmap_count[PKMAP_NR(vaddr)]++; } unlock_kmap_any(flags); - return (void*) vaddr; + return (void *) vaddr; } #endif @@ -268,12 +356,14 @@ void *kmap_high_get(struct page *page) * If ARCH_NEEDS_KMAP_HIGH_GET is not defined then this may be called * only from user context. */ -void kunmap_high(struct page *page) +void kunmap_high(const struct page *page) { unsigned long vaddr; unsigned long nr; unsigned long flags; int need_wakeup; + unsigned int color = get_pkmap_color(page); + wait_queue_head_t *pkmap_map_wait; lock_kmap_any(flags); vaddr = (unsigned long)page_address(page); @@ -299,16 +389,331 @@ void kunmap_high(struct page *page) * no need for the wait-queue-head's lock. Simply * test if the queue is empty. */ - need_wakeup = waitqueue_active(&pkmap_map_wait); + pkmap_map_wait = get_pkmap_wait_queue_head(color); + need_wakeup = waitqueue_active(pkmap_map_wait); } unlock_kmap_any(flags); /* do wake-up, if needed, race-free outside of the spin lock */ if (need_wakeup) - wake_up(&pkmap_map_wait); + wake_up(pkmap_map_wait); } - EXPORT_SYMBOL(kunmap_high); + +void zero_user_segments(struct page *page, unsigned start1, unsigned end1, + unsigned start2, unsigned end2) +{ + unsigned int i; + + BUG_ON(end1 > page_size(page) || end2 > page_size(page)); + + if (start1 >= end1) + start1 = end1 = 0; + if (start2 >= end2) + start2 = end2 = 0; + + for (i = 0; i < compound_nr(page); i++) { + void *kaddr = NULL; + + if (start1 >= PAGE_SIZE) { + start1 -= PAGE_SIZE; + end1 -= PAGE_SIZE; + } else { + unsigned this_end = min_t(unsigned, end1, PAGE_SIZE); + + if (end1 > start1) { + kaddr = kmap_local_page(page + i); + memset(kaddr + start1, 0, this_end - start1); + } + end1 -= this_end; + start1 = 0; + } + + if (start2 >= PAGE_SIZE) { + start2 -= PAGE_SIZE; + end2 -= PAGE_SIZE; + } else { + unsigned this_end = min_t(unsigned, end2, PAGE_SIZE); + + if (end2 > start2) { + if (!kaddr) + kaddr = kmap_local_page(page + i); + memset(kaddr + start2, 0, this_end - start2); + } + end2 -= this_end; + start2 = 0; + } + + if (kaddr) { + kunmap_local(kaddr); + flush_dcache_page(page + i); + } + + if (!end1 && !end2) + break; + } + + BUG_ON((start1 | start2 | end1 | end2) != 0); +} +EXPORT_SYMBOL(zero_user_segments); +#endif /* CONFIG_HIGHMEM */ + +#ifdef CONFIG_KMAP_LOCAL + +#include <asm/kmap_size.h> + +/* + * With DEBUG_KMAP_LOCAL the stack depth is doubled and every second + * slot is unused which acts as a guard page + */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL +# define KM_INCR 2 +#else +# define KM_INCR 1 +#endif + +static inline int kmap_local_idx_push(void) +{ + WARN_ON_ONCE(in_hardirq() && !irqs_disabled()); + current->kmap_ctrl.idx += KM_INCR; + BUG_ON(current->kmap_ctrl.idx >= KM_MAX_IDX); + return current->kmap_ctrl.idx - 1; +} + +static inline int kmap_local_idx(void) +{ + return current->kmap_ctrl.idx - 1; +} + +static inline void kmap_local_idx_pop(void) +{ + current->kmap_ctrl.idx -= KM_INCR; + BUG_ON(current->kmap_ctrl.idx < 0); +} + +#ifndef arch_kmap_local_post_map +# define arch_kmap_local_post_map(vaddr, pteval) do { } while (0) +#endif + +#ifndef arch_kmap_local_pre_unmap +# define arch_kmap_local_pre_unmap(vaddr) do { } while (0) +#endif + +#ifndef arch_kmap_local_post_unmap +# define arch_kmap_local_post_unmap(vaddr) do { } while (0) +#endif + +#ifndef arch_kmap_local_unmap_idx +#define arch_kmap_local_unmap_idx(idx, vaddr) kmap_local_calc_idx(idx) +#endif + +#ifndef arch_kmap_local_high_get +static inline void *arch_kmap_local_high_get(const struct page *page) +{ + return NULL; +} +#endif + +#ifndef arch_kmap_local_set_pte +#define arch_kmap_local_set_pte(mm, vaddr, ptep, ptev) \ + set_pte_at(mm, vaddr, ptep, ptev) +#endif + +/* Unmap a local mapping which was obtained by kmap_high_get() */ +static inline bool kmap_high_unmap_local(unsigned long vaddr) +{ +#ifdef ARCH_NEEDS_KMAP_HIGH_GET + if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { + kunmap_high(pte_page(ptep_get(&pkmap_page_table[PKMAP_NR(vaddr)]))); + return true; + } +#endif + return false; +} + +static pte_t *__kmap_pte; + +static pte_t *kmap_get_pte(unsigned long vaddr, int idx) +{ + if (IS_ENABLED(CONFIG_KMAP_LOCAL_NON_LINEAR_PTE_ARRAY)) + /* + * Set by the arch if __kmap_pte[-idx] does not produce + * the correct entry. + */ + return virt_to_kpte(vaddr); + if (!__kmap_pte) + __kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); + return &__kmap_pte[-idx]; +} + +void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot) +{ + pte_t pteval, *kmap_pte; + unsigned long vaddr; + int idx; + + /* + * Disable migration so resulting virtual address is stable + * across preemption. + */ + migrate_disable(); + preempt_disable(); + idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + kmap_pte = kmap_get_pte(vaddr, idx); + BUG_ON(!pte_none(ptep_get(kmap_pte))); + pteval = pfn_pte(pfn, prot); + arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte, pteval); + arch_kmap_local_post_map(vaddr, pteval); + current->kmap_ctrl.pteval[kmap_local_idx()] = pteval; + preempt_enable(); + + return (void *)vaddr; +} +EXPORT_SYMBOL_GPL(__kmap_local_pfn_prot); + +void *__kmap_local_page_prot(const struct page *page, pgprot_t prot) +{ + void *kmap; + + /* + * To broaden the usage of the actual kmap_local() machinery always map + * pages when debugging is enabled and the architecture has no problems + * with alias mappings. + */ + if (!IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) && !PageHighMem(page)) + return page_address(page); + + /* Try kmap_high_get() if architecture has it enabled */ + kmap = arch_kmap_local_high_get(page); + if (kmap) + return kmap; + + return __kmap_local_pfn_prot(page_to_pfn(page), prot); +} +EXPORT_SYMBOL(__kmap_local_page_prot); + +void kunmap_local_indexed(const void *vaddr) +{ + unsigned long addr = (unsigned long) vaddr & PAGE_MASK; + pte_t *kmap_pte; + int idx; + + if (addr < __fix_to_virt(FIX_KMAP_END) || + addr > __fix_to_virt(FIX_KMAP_BEGIN)) { + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP)) { + /* This _should_ never happen! See above. */ + WARN_ON_ONCE(1); + return; + } + /* + * Handle mappings which were obtained by kmap_high_get() + * first as the virtual address of such mappings is below + * PAGE_OFFSET. Warn for all other addresses which are in + * the user space part of the virtual address space. + */ + if (!kmap_high_unmap_local(addr)) + WARN_ON_ONCE(addr < PAGE_OFFSET); + return; + } + + preempt_disable(); + idx = arch_kmap_local_unmap_idx(kmap_local_idx(), addr); + WARN_ON_ONCE(addr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); + + kmap_pte = kmap_get_pte(addr, idx); + arch_kmap_local_pre_unmap(addr); + pte_clear(&init_mm, addr, kmap_pte); + arch_kmap_local_post_unmap(addr); + current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0); + kmap_local_idx_pop(); + preempt_enable(); + migrate_enable(); +} +EXPORT_SYMBOL(kunmap_local_indexed); + +/* + * Invoked before switch_to(). This is safe even when during or after + * clearing the maps an interrupt which needs a kmap_local happens because + * the task::kmap_ctrl.idx is not modified by the unmapping code so a + * nested kmap_local will use the next unused index and restore the index + * on unmap. The already cleared kmaps of the outgoing task are irrelevant + * because the interrupt context does not know about them. The same applies + * when scheduling back in for an interrupt which happens before the + * restore is complete. + */ +void __kmap_local_sched_out(void) +{ + struct task_struct *tsk = current; + pte_t *kmap_pte; + int i; + + /* Clear kmaps */ + for (i = 0; i < tsk->kmap_ctrl.idx; i++) { + pte_t pteval = tsk->kmap_ctrl.pteval[i]; + unsigned long addr; + int idx; + + /* With debug all even slots are unmapped and act as guard */ + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { + WARN_ON_ONCE(pte_val(pteval) != 0); + continue; + } + if (WARN_ON_ONCE(pte_none(pteval))) + continue; + + /* + * This is a horrible hack for XTENSA to calculate the + * coloured PTE index. Uses the PFN encoded into the pteval + * and the map index calculation because the actual mapped + * virtual address is not stored in task::kmap_ctrl. + * For any sane architecture this is optimized out. + */ + idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); + + addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + kmap_pte = kmap_get_pte(addr, idx); + arch_kmap_local_pre_unmap(addr); + pte_clear(&init_mm, addr, kmap_pte); + arch_kmap_local_post_unmap(addr); + } +} + +void __kmap_local_sched_in(void) +{ + struct task_struct *tsk = current; + pte_t *kmap_pte; + int i; + + /* Restore kmaps */ + for (i = 0; i < tsk->kmap_ctrl.idx; i++) { + pte_t pteval = tsk->kmap_ctrl.pteval[i]; + unsigned long addr; + int idx; + + /* With debug all even slots are unmapped and act as guard */ + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { + WARN_ON_ONCE(pte_val(pteval) != 0); + continue; + } + if (WARN_ON_ONCE(pte_none(pteval))) + continue; + + /* See comment in __kmap_local_sched_out() */ + idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); + addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + kmap_pte = kmap_get_pte(addr, idx); + set_pte_at(&init_mm, addr, kmap_pte, pteval); + arch_kmap_local_post_map(addr, pteval); + } +} + +void kmap_local_fork(struct task_struct *tsk) +{ + if (WARN_ON_ONCE(tsk->kmap_ctrl.idx)) + memset(&tsk->kmap_ctrl, 0, sizeof(tsk->kmap_ctrl)); +} + #endif #if defined(HASHED_PAGE_VIRTUAL) @@ -363,15 +768,14 @@ void *page_address(const struct page *page) list_for_each_entry(pam, &pas->lh, list) { if (pam->page == page) { ret = pam->virtual; - goto done; + break; } } } -done: + spin_unlock_irqrestore(&pas->lock, flags); return ret; } - EXPORT_SYMBOL(page_address); /** @@ -401,14 +805,11 @@ void set_page_address(struct page *page, void *virtual) list_for_each_entry(pam, &pas->lh, list) { if (pam->page == page) { list_del(&pam->list); - spin_unlock_irqrestore(&pas->lock, flags); - goto done; + break; } } spin_unlock_irqrestore(&pas->lock, flags); } -done: - return; } void __init page_address_init(void) @@ -421,4 +822,4 @@ void __init page_address_init(void) } } -#endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */ +#endif /* defined(HASHED_PAGE_VIRTUAL) */ |
