summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 16:23:15 +0900
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 16:23:15 +0900
commit9e2d8656f5e8aa214e66b462680cf86b210b74a8 (patch)
treef67d62e896cedf75599ea45f9ecf9999c6ad24cd /arch
parent1ea4f4f8405cc1ceec23f2d261bc3775785e6712 (diff)
parent9e695d2ecc8451cc2c1603d60b5c8e7f5581923a (diff)
Merge branch 'akpm' (Andrew's patch-bomb)
Merge patches from Andrew Morton: "A few misc things and very nearly all of the MM tree. A tremendous amount of stuff (again), including a significant rbtree library rework." * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (160 commits) sparc64: Support transparent huge pages. mm: thp: Use more portable PMD clearing sequenece in zap_huge_pmd(). mm: Add and use update_mmu_cache_pmd() in transparent huge page code. sparc64: Document PGD and PMD layout. sparc64: Eliminate PTE table memory wastage. sparc64: Halve the size of PTE tables sparc64: Only support 4MB huge pages and 8KB base pages. memory-hotplug: suppress "Trying to free nonexistent resource <XXXXXXXXXXXXXXXX-YYYYYYYYYYYYYYYY>" warning mm: memcg: clean up mm_match_cgroup() signature mm: document PageHuge somewhat mm: use %pK for /proc/vmallocinfo mm, thp: fix mlock statistics mm, thp: fix mapped pages avoiding unevictable list on mlock memory-hotplug: update memory block's state and notify userspace memory-hotplug: preparation to notify memory block's state at memory hot remove mm: avoid section mismatch warning for memblock_type_name make GFP_NOTRACK definition unconditional cma: decrease cc.nr_migratepages after reclaiming pagelist CMA: migrate mlocked pages kpageflags: fix wrong KPF_THP on non-huge compound pages ...
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/alpha/kernel/pci-sysfs.c2
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/mm/fault-armv.c3
-rw-r--r--arch/arm/mm/fault.c1
-rw-r--r--arch/arm/mm/flush.c3
-rw-r--r--arch/arm64/Kconfig4
-rw-r--r--arch/avr32/mm/fault.c1
-rw-r--r--arch/blackfin/Kconfig1
-rw-r--r--arch/cris/Kconfig1
-rw-r--r--arch/cris/mm/fault.c1
-rw-r--r--arch/frv/Kconfig2
-rw-r--r--arch/h8300/Kconfig1
-rw-r--r--arch/hexagon/mm/vm_fault.c1
-rw-r--r--arch/ia64/include/asm/hugetlb.h4
-rw-r--r--arch/ia64/kernel/perfmon.c2
-rw-r--r--arch/ia64/mm/fault.c1
-rw-r--r--arch/ia64/mm/init.c4
-rw-r--r--arch/m32r/Kconfig1
-rw-r--r--arch/m68k/Kconfig2
-rw-r--r--arch/m68k/mm/fault.c1
-rw-r--r--arch/microblaze/Kconfig1
-rw-r--r--arch/microblaze/include/asm/atomic.h1
-rw-r--r--arch/microblaze/mm/fault.c1
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/include/asm/hugetlb.h4
-rw-r--r--arch/mips/mm/fault.c1
-rw-r--r--arch/openrisc/mm/fault.c1
-rw-r--r--arch/parisc/kernel/cache.c3
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/include/asm/atomic.h1
-rw-r--r--arch/powerpc/include/asm/hugetlb.h4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c2
-rw-r--r--arch/powerpc/mm/fault.c1
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c15
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c13
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/s390/include/asm/hugetlb.h19
-rw-r--r--arch/s390/include/asm/pgtable.h210
-rw-r--r--arch/s390/include/asm/setup.h5
-rw-r--r--arch/s390/include/asm/tlb.h1
-rw-r--r--arch/s390/kernel/early.c2
-rw-r--r--arch/s390/mm/fault.c1
-rw-r--r--arch/s390/mm/gup.c11
-rw-r--r--arch/s390/mm/pgtable.c108
-rw-r--r--arch/sh/Kconfig3
-rw-r--r--arch/sh/include/asm/hugetlb.h6
-rw-r--r--arch/sh/mm/fault.c1
-rw-r--r--arch/sparc/Kconfig41
-rw-r--r--arch/sparc/include/asm/hugetlb.h9
-rw-r--r--arch/sparc/include/asm/mmu_64.h19
-rw-r--r--arch/sparc/include/asm/mmu_context_64.h2
-rw-r--r--arch/sparc/include/asm/page_64.h21
-rw-r--r--arch/sparc/include/asm/pgalloc_64.h56
-rw-r--r--arch/sparc/include/asm/pgtable_64.h253
-rw-r--r--arch/sparc/include/asm/tsb.h106
-rw-r--r--arch/sparc/kernel/pci.c2
-rw-r--r--arch/sparc/kernel/sun4v_tlb_miss.S2
-rw-r--r--arch/sparc/kernel/tsb.S9
-rw-r--r--arch/sparc/mm/fault_32.c1
-rw-r--r--arch/sparc/mm/fault_64.c5
-rw-r--r--arch/sparc/mm/hugetlbpage.c50
-rw-r--r--arch/sparc/mm/init_64.c314
-rw-r--r--arch/sparc/mm/tlb.c118
-rw-r--r--arch/sparc/mm/tsb.c40
-rw-r--r--arch/tile/Kconfig3
-rw-r--r--arch/tile/include/asm/hugetlb.h4
-rw-r--r--arch/tile/mm/elf.c19
-rw-r--r--arch/tile/mm/fault.c1
-rw-r--r--arch/um/Kconfig.common1
-rw-r--r--arch/um/kernel/trap.c1
-rw-r--r--arch/unicore32/kernel/process.c2
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/include/asm/atomic.h24
-rw-r--r--arch/x86/include/asm/hugetlb.h4
-rw-r--r--arch/x86/include/asm/pgtable.h11
-rw-r--r--arch/x86/include/asm/pgtable_32.h1
-rw-r--r--arch/x86/include/asm/pgtable_64.h1
-rw-r--r--arch/x86/mm/fault.c1
-rw-r--r--arch/x86/mm/hugetlbpage.c3
-rw-r--r--arch/x86/mm/pat.c87
-rw-r--r--arch/x86/mm/pat_rbtree.c34
-rw-r--r--arch/x86/xen/mmu.c3
-rw-r--r--arch/xtensa/mm/fault.c1
84 files changed, 1283 insertions, 433 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index a62965d057f6..550cce4dd648 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -313,4 +313,7 @@ config HAVE_IRQ_TIME_ACCOUNTING
Archs need to ensure they use a high enough resolution clock to
support irq time accounting and then call enable_sched_clock_irqtime().
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ bool
+
source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
index 53649c7d0068..b51f7b4818cd 100644
--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -26,7 +26,7 @@ static int hose_mmap_page_range(struct pci_controller *hose,
base = sparse ? hose->sparse_io_base : hose->dense_io_base;
vma->vm_pgoff += base >> PAGE_SHIFT;
- vma->vm_flags |= (VM_IO | VM_RESERVED);
+ vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
vma->vm_end - vma->vm_start,
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 6d2f7f5c0036..2867a7742306 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -25,6 +25,7 @@ config ARM
select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
select ARCH_BINFMT_ELF_RANDOMIZE_PIE
select HAVE_GENERIC_DMA_COHERENT
+ select HAVE_DEBUG_KMEMLEAK
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZO
select HAVE_KERNEL_LZMA
@@ -39,6 +40,7 @@ config ARM
select HARDIRQS_SW_RESEND
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
+ select HAVE_UID16
select ARCH_WANT_IPC_PARSE_VERSION
select HARDIRQS_SW_RESEND
select CPU_PM if (SUSPEND || CPU_IDLE)
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 7599e2625c7d..2a5907b5c8d2 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -134,7 +134,6 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *mpnt;
- struct prio_tree_iter iter;
unsigned long offset;
pgoff_t pgoff;
int aliases = 0;
@@ -147,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
* cache coherency.
*/
flush_dcache_mmap_lock(mapping);
- vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
+ vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
/*
* If this VMA is not in our MM, we can ignore it.
* Note that we intentionally mask out the VMA
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index c3bd83450227..5dbf13f954f6 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -336,6 +336,7 @@ retry:
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 40ca11ed6e5f..1c8f7f564175 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -196,7 +196,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
{
struct mm_struct *mm = current->active_mm;
struct vm_area_struct *mpnt;
- struct prio_tree_iter iter;
pgoff_t pgoff;
/*
@@ -208,7 +207,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
flush_dcache_mmap_lock(mapping);
- vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
+ vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
unsigned long offset;
/*
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 767ba5685454..7ff68c946073 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -10,6 +10,8 @@ config ARM64
select GENERIC_TIME_VSYSCALL
select HARDIRQS_SW_RESEND
select HAVE_ARCH_TRACEHOOK
+ select HAVE_DEBUG_BUGVERBOSE
+ select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_API_DEBUG
select HAVE_DMA_ATTRS
select HAVE_GENERIC_DMA_COHERENT
@@ -26,6 +28,7 @@ config ARM64
select PERF_USE_VMALLOC
select RTC_LIB
select SPARSE_IRQ
+ select SYSCTL_EXCEPTION_TRACE
help
ARM 64-bit (AArch64) Linux support.
@@ -193,6 +196,7 @@ config COMPAT
bool "Kernel support for 32-bit EL0"
depends on !ARM64_64K_PAGES
select COMPAT_BINFMT_ELF
+ select HAVE_UID16
help
This option enables support for a 32-bit EL0 running under a 64-bit
kernel at EL1. AArch32-specific components such as system calls,
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index b92e60958617..b2f2d2d66849 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -152,6 +152,7 @@ good_area:
tsk->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would have
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 99224c4eb86b..ccd9193932b2 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -33,6 +33,7 @@ config BLACKFIN
select HAVE_PERF_EVENTS
select ARCH_HAVE_CUSTOM_GPIO_H
select ARCH_WANT_OPTIONAL_GPIOLIB
+ select HAVE_UID16
select ARCH_WANT_IPC_PARSE_VERSION
select HAVE_GENERIC_HARDIRQS
select GENERIC_ATOMIC64
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 72bd5ae50a89..a118163b04ee 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -42,6 +42,7 @@ config CRIS
select HAVE_IDE
select GENERIC_ATOMIC64
select HAVE_GENERIC_HARDIRQS
+ select HAVE_UID16
select ARCH_WANT_IPC_PARSE_VERSION
select GENERIC_IRQ_SHOW
select GENERIC_IOMAP
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 45fd542cf173..73312ab6c696 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -186,6 +186,7 @@ retry:
tsk->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 971c0a19facb..9d262645f667 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -5,8 +5,10 @@ config FRV
select HAVE_ARCH_TRACEHOOK
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
+ select HAVE_UID16
select HAVE_GENERIC_HARDIRQS
select GENERIC_IRQ_SHOW
+ select HAVE_DEBUG_BUGVERBOSE
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_CPU_DEVICES
select ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 5e8a0d9a09ce..90462eb23d02 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -3,6 +3,7 @@ config H8300
default y
select HAVE_IDE
select HAVE_GENERIC_HARDIRQS
+ select HAVE_UID16
select ARCH_WANT_IPC_PARSE_VERSION
select GENERIC_IRQ_SHOW
select GENERIC_CPU_DEVICES
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index 06695cc4fe58..513b74cb397e 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -113,6 +113,7 @@ good_area:
current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
index da55c63728e0..94eaa5bd5d0c 100644
--- a/arch/ia64/include/asm/hugetlb.h
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -77,4 +77,8 @@ static inline void arch_release_hugepage(struct page *page)
{
}
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
#endif /* _ASM_IA64_HUGETLB_H */
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f388b4e18a37..ea39eba61ef5 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2307,7 +2307,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
*/
vma->vm_mm = mm;
vma->vm_file = get_file(filp);
- vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED;
+ vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP;
vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */
/*
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 8443daf4f515..6cf0341f978e 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -184,6 +184,7 @@ retry:
current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 0eab454867a2..acd5b68e8871 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -138,7 +138,8 @@ ia64_init_addr_space (void)
vma->vm_mm = current->mm;
vma->vm_end = PAGE_SIZE;
vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
- vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED;
+ vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO |
+ VM_DONTEXPAND | VM_DONTDUMP;
down_write(&current->mm->mmap_sem);
if (insert_vm_struct(current->mm, vma)) {
up_write(&current->mm->mmap_sem);
@@ -636,6 +637,7 @@ mem_init (void)
high_memory = __va(max_low_pfn * PAGE_SIZE);
+ reset_zone_present_pages();
for_each_online_pgdat(pgdat)
if (pgdat->bdata->node_bootmem_map)
totalram_pages += free_all_bootmem_node(pgdat);
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 49498bbb9616..e875fc3ce9cb 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -8,6 +8,7 @@ config M32R
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
select ARCH_WANT_IPC_PARSE_VERSION
+ select HAVE_DEBUG_BUGVERBOSE
select HAVE_GENERIC_HARDIRQS
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b22df9410dce..dae1e7e16a37 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -3,9 +3,11 @@ config M68K
default y
select HAVE_IDE
select HAVE_AOUT if MMU
+ select HAVE_DEBUG_BUGVERBOSE
select HAVE_GENERIC_HARDIRQS
select GENERIC_IRQ_SHOW
select GENERIC_ATOMIC64
+ select HAVE_UID16
select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
select GENERIC_CPU_DEVICES
select GENERIC_STRNCPY_FROM_USER if MMU
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index aeebbb7b30f0..a563727806bf 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -170,6 +170,7 @@ good_area:
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 6133bed2b855..53fd94ab60f0 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -16,6 +16,7 @@ config MICROBLAZE
select OF
select OF_EARLY_FLATTREE
select ARCH_WANT_IPC_PARSE_VERSION
+ select HAVE_DEBUG_KMEMLEAK
select IRQ_DOMAIN
select HAVE_GENERIC_HARDIRQS
select GENERIC_IRQ_PROBE
diff --git a/arch/microblaze/include/asm/atomic.h b/arch/microblaze/include/asm/atomic.h
index 472d8bf726df..42ac382a09da 100644
--- a/arch/microblaze/include/asm/atomic.h
+++ b/arch/microblaze/include/asm/atomic.h
@@ -22,5 +22,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
return res;
}
+#define atomic_dec_if_positive atomic_dec_if_positive
#endif /* _ASM_MICROBLAZE_ATOMIC_H */
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index eb365d6795fa..714b35a9c4f7 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -233,6 +233,7 @@ good_area:
current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 4cd538b42a3f..35453eaeffb5 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -17,6 +17,7 @@ config MIPS
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_KPROBES
select HAVE_KRETPROBES
+ select HAVE_DEBUG_KMEMLEAK
select ARCH_BINFMT_ELF_RANDOMIZE_PIE
select RTC_LIB if !MACH_LOONGSON
select GENERIC_ATOMIC64 if !64BIT
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index 58d36889f09b..bd94946a18f3 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -112,4 +112,8 @@ static inline void arch_release_hugepage(struct page *page)
{
}
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
#endif /* __ASM_HUGETLB_H */
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 7a19957735e9..ddcec1e1a0cd 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -171,6 +171,7 @@ good_area:
}
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 40f850e9766c..e2bfafce66c5 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -183,6 +183,7 @@ good_area:
tsk->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 9d181890a7e3..48e16dc20102 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -276,7 +276,6 @@ void flush_dcache_page(struct page *page)
{
struct address_space *mapping = page_mapping(page);
struct vm_area_struct *mpnt;
- struct prio_tree_iter iter;
unsigned long offset;
unsigned long addr, old_addr = 0;
pgoff_t pgoff;
@@ -299,7 +298,7 @@ void flush_dcache_page(struct page *page)
* to flush one address here for them all to become coherent */
flush_dcache_mmap_lock(mapping);
- vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
+ vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
addr = mpnt->vm_start + offset;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4ce0be32d153..df7edb887a04 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -99,6 +99,7 @@ config PPC
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
+ select SYSCTL_EXCEPTION_TRACE
select ARCH_WANT_OPTIONAL_GPIOLIB
select HAVE_IDE
select HAVE_IOREMAP_PROT
@@ -113,6 +114,7 @@ config PPC
select HAVE_DMA_API_DEBUG
select USE_GENERIC_SMP_HELPERS if SMP
select HAVE_OPROFILE
+ select HAVE_DEBUG_KMEMLEAK
select HAVE_SYSCALL_WRAPPERS if PPC64
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index da29032ae38f..e3b1d41c89be 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -268,6 +268,7 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
return t;
}
+#define atomic_dec_if_positive atomic_dec_if_positive
#define smp_mb__before_atomic_dec() smp_mb()
#define smp_mb__after_atomic_dec() smp_mb()
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index dfdb95bc59a5..62e11a32c4c2 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -151,6 +151,10 @@ static inline void arch_release_hugepage(struct page *page)
{
}
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
#else /* ! CONFIG_HUGETLB_PAGE */
static inline void flush_hugetlb_page(struct vm_area_struct *vma,
unsigned long vmaddr)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 83e929e66f9d..721d4603a235 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1183,7 +1183,7 @@ static const struct vm_operations_struct kvm_rma_vm_ops = {
static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
{
- vma->vm_flags |= VM_RESERVED;
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_ops = &kvm_rma_vm_ops;
return 0;
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 5495ebe983a2..0a6b28336eb0 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -451,6 +451,7 @@ good_area:
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 642fca137ccb..28f1af2db1f5 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -304,7 +304,7 @@ static inline unsigned long fast_get_dcookie(struct path *path)
return cookie;
}
-/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
+/* Look up the dcookie for the task's mm->exe_file,
* which corresponds loosely to "application name". Also, determine
* the offset for the SPU ELF object. If computed offset is
* non-zero, it implies an embedded SPU object; otherwise, it's a
@@ -321,7 +321,6 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
{
unsigned long app_cookie = 0;
unsigned int my_offset = 0;
- struct file *app = NULL;
struct vm_area_struct *vma;
struct mm_struct *mm = spu->mm;
@@ -330,16 +329,10 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
down_read(&mm->mmap_sem);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (!vma->vm_file)
- continue;
- if (!(vma->vm_flags & VM_EXECUTABLE))
- continue;
- app_cookie = fast_get_dcookie(&vma->vm_file->f_path);
+ if (mm->exe_file) {
+ app_cookie = fast_get_dcookie(&mm->exe_file->f_path);
pr_debug("got dcookie for %s\n",
- vma->vm_file->f_dentry->d_name.name);
- app = vma->vm_file;
- break;
+ mm->exe_file->f_dentry->d_name.name);
}
for (vma = mm->mmap; vma; vma = vma->vm_next) {
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 11d8e0544ac0..dc0a035e63bb 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -77,7 +77,8 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
{
unsigned long start, start_pfn;
struct zone *zone;
- int ret;
+ int i, ret;
+ int sections_to_remove;
start_pfn = base >> PAGE_SHIFT;
@@ -97,9 +98,13 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
* to sysfs "state" file and we can't remove sysfs entries
* while writing to it. So we have to defer it to here.
*/
- ret = __remove_pages(zone, start_pfn, memblock_size >> PAGE_SHIFT);
- if (ret)
- return ret;
+ sections_to_remove = (memblock_size >> PAGE_SHIFT) / PAGES_PER_SECTION;
+ for (i = 0; i < sections_to_remove; i++) {
+ unsigned long pfn = start_pfn + i * PAGES_PER_SECTION;
+ ret = __remove_pages(zone, start_pfn, PAGES_PER_SECTION);
+ if (ret)
+ return ret;
+ }
/*
* Update memory regions for memory remove
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c8af429991d9..ceff7aef2477 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -68,6 +68,7 @@ config S390
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_C_RECORDMCOUNT
select HAVE_SYSCALL_TRACEPOINTS
+ select SYSCTL_EXCEPTION_TRACE
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_REGS_AND_STACK_ACCESS_API
@@ -80,6 +81,7 @@ config S390
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select HAVE_DEBUG_KMEMLEAK
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
@@ -126,6 +128,7 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_BH
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+ select HAVE_UID16 if 32BIT
select ARCH_WANT_IPC_PARSE_VERSION
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 2d6e6e380564..593753ee07f3 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -33,6 +33,7 @@ static inline int prepare_hugepage_range(struct file *file,
}
#define hugetlb_prefault_arch_hook(mm) do { } while (0)
+#define arch_clear_hugepage_flags(page) do { } while (0)
int arch_prepare_hugepage(struct page *page);
void arch_release_hugepage(struct page *page);
@@ -77,23 +78,6 @@ static inline void __pmd_csp(pmd_t *pmdp)
" csp %1,%3"
: "=m" (*pmdp)
: "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
- pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
-}
-
-static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
-{
- unsigned long sto = (unsigned long) pmdp -
- pmd_index(address) * sizeof(pmd_t);
-
- if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
- asm volatile(
- " .insn rrf,0xb98e0000,%2,%3,0,0"
- : "=m" (*pmdp)
- : "m" (*pmdp), "a" (sto),
- "a" ((address & HPAGE_MASK))
- );
- }
- pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
}
static inline void huge_ptep_invalidate(struct mm_struct *mm,
@@ -105,6 +89,7 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm,
__pmd_idte(address, pmdp);
else
__pmd_csp(pmdp);
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
}
static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 6bd7d7483017..979fe3dc0788 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -42,6 +42,7 @@ extern void fault_init(void);
* tables contain all the necessary information.
*/
#define update_mmu_cache(vma, address, ptep) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0)
/*
* ZERO_PAGE is a global shared page that is always zero; used
@@ -347,6 +348,12 @@ extern struct page *vmemmap;
#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
+#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */
+#define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT)
+
+/* Set of bits not changed in pmd_modify */
+#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
+ | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO)
/* Page status table bits for virtualization */
#define RCP_ACC_BITS 0xf000000000000000UL
@@ -506,6 +513,30 @@ static inline int pmd_bad(pmd_t pmd)
return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY;
}
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+extern void pmdp_splitting_flush(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty);
+
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMD_WRITE
+static inline int pmd_write(pmd_t pmd)
+{
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0;
+}
+
+static inline int pmd_young(pmd_t pmd)
+{
+ return 0;
+}
+
static inline int pte_none(pte_t pte)
{
return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT);
@@ -1159,6 +1190,185 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
#define pte_unmap(pte) do { } while (0)
+static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
+{
+ unsigned long sto = (unsigned long) pmdp -
+ pmd_index(address) * sizeof(pmd_t);
+
+ if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
+ asm volatile(
+ " .insn rrf,0xb98e0000,%2,%3,0,0"
+ : "=m" (*pmdp)
+ : "m" (*pmdp), "a" (sto),
+ "a" ((address & HPAGE_MASK))
+ : "cc"
+ );
+ }
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+ return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t entry)
+{
+ *pmdp = entry;
+}
+
+static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
+{
+ unsigned long pgprot_pmd = 0;
+
+ if (pgprot_val(pgprot) & _PAGE_INVALID) {
+ if (pgprot_val(pgprot) & _PAGE_SWT)
+ pgprot_pmd |= _HPAGE_TYPE_NONE;
+ pgprot_pmd |= _SEGMENT_ENTRY_INV;
+ }
+ if (pgprot_val(pgprot) & _PAGE_RO)
+ pgprot_pmd |= _SEGMENT_ENTRY_RO;
+ return pgprot_pmd;
+}
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ pmd_val(pmd) &= _SEGMENT_CHG_MASK;
+ pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ return pmd;
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO;
+ return pmd;
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_RO;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+ /* No dirty bit in the segment table entry. */
+ return pmd;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+ /* No referenced bit in the segment table entry. */
+ return pmd;
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+ /* No referenced bit in the segment table entry. */
+ return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK;
+ long tmp, rc;
+ int counter;
+
+ rc = 0;
+ if (MACHINE_HAS_RRBM) {
+ counter = PTRS_PER_PTE >> 6;
+ asm volatile(
+ "0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */
+ " ogr %1,%0\n"
+ " la %3,0(%4,%3)\n"
+ " brct %2,0b\n"
+ : "=&d" (tmp), "+&d" (rc), "+d" (counter),
+ "+a" (pmd_addr)
+ : "a" (64 * 4096UL) : "cc");
+ rc = !!rc;
+ } else {
+ counter = PTRS_PER_PTE;
+ asm volatile(
+ "0: rrbe 0,%2\n"
+ " la %2,0(%3,%2)\n"
+ " brc 12,1f\n"
+ " lhi %0,1\n"
+ "1: brct %1,0b\n"
+ : "+d" (rc), "+d" (counter), "+a" (pmd_addr)
+ : "a" (4096UL) : "cc");
+ }
+ return rc;
+}
+
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ pmd_t pmd = *pmdp;
+
+ __pmd_idte(address, pmdp);
+ pmd_clear(pmdp);
+ return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
+static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ return pmdp_get_and_clear(vma->vm_mm, address, pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+static inline void pmdp_invalidate(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ __pmd_idte(address, pmdp);
+}
+
+static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
+{
+ pmd_t __pmd;
+ pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
+ return __pmd;
+}
+
+#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
+#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
+}
+
+static inline int has_transparent_hugepage(void)
+{
+ return MACHINE_HAS_HPAGE ? 1 : 0;
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ if (pmd_trans_huge(pmd))
+ return pmd_val(pmd) >> HPAGE_SHIFT;
+ else
+ return pmd_val(pmd) >> PAGE_SHIFT;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
/*
* 31 bit swap entry format:
* A page-table entry has some bits we have to treat in a special way.
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 87b47ca954f1..8cfd731a18d8 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -81,6 +81,7 @@ extern unsigned int s390_user_mode;
#define MACHINE_FLAG_SPP (1UL << 13)
#define MACHINE_FLAG_TOPOLOGY (1UL << 14)
#define MACHINE_FLAG_TE (1UL << 15)
+#define MACHINE_FLAG_RRBM (1UL << 16)
#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
@@ -99,7 +100,8 @@ extern unsigned int s390_user_mode;
#define MACHINE_HAS_PFMF (0)
#define MACHINE_HAS_SPP (0)
#define MACHINE_HAS_TOPOLOGY (0)
-#define MACHINE_HAS_TE (0)
+#define MACHINE_HAS_TE (0)
+#define MACHINE_HAS_RRBM (0)
#else /* CONFIG_64BIT */
#define MACHINE_HAS_IEEE (1)
#define MACHINE_HAS_CSP (1)
@@ -112,6 +114,7 @@ extern unsigned int s390_user_mode;
#define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP)
#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
+#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM)
#endif /* CONFIG_64BIT */
#define ZFCPDUMP_HSA_SIZE (32UL<<20)
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 06e5acbc84bd..b75d7d686684 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -137,6 +137,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0)
+#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0)
#define tlb_migrate_finish(mm) do { } while (0)
#endif /* _S390_TLB_H */
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 7f4717675c19..00d114445068 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -388,6 +388,8 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_SPP;
if (test_facility(50) && test_facility(73))
S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
+ if (test_facility(66))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM;
#endif
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ac9122ca1152..04ad4001a289 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -367,6 +367,7 @@ retry:
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
down_read(&mm->mmap_sem);
goto retry;
}
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index eeaf8023851f..60acb93a4680 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -115,7 +115,16 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
pmd = *pmdp;
barrier();
next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
+ /*
+ * The pmd_trans_splitting() check below explains why
+ * pmdp_splitting_flush() has to serialize with
+ * smp_call_function() against our disabled IRQs, to stop
+ * this gup-fast code from running while we set the
+ * splitting bit in the pmd. Returning zero will take
+ * the slow path that will call wait_split_huge_page()
+ * if the pmd is still in splitting state.
+ */
+ if (pmd_none(pmd) || pmd_trans_splitting(pmd))
return 0;
if (unlikely(pmd_huge(pmd))) {
if (!gup_huge_pmd(pmdp, pmd, addr, next,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b402991e43d7..c8188a18af05 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -787,6 +787,30 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
tlb_table_flush(tlb);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void thp_split_vma(struct vm_area_struct *vma)
+{
+ unsigned long addr;
+ struct page *page;
+
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+ page = follow_page(vma, addr, FOLL_SPLIT);
+ }
+}
+
+void thp_split_mm(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma = mm->mmap;
+
+ while (vma != NULL) {
+ thp_split_vma(vma);
+ vma->vm_flags &= ~VM_HUGEPAGE;
+ vma->vm_flags |= VM_NOHUGEPAGE;
+ vma = vma->vm_next;
+ }
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
/*
* switch on pgstes for its userspace process (for kvm)
*/
@@ -824,6 +848,12 @@ int s390_enable_sie(void)
if (!mm)
return -ENOMEM;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ /* split thp mappings and disable thp for future mappings */
+ thp_split_mm(mm);
+ mm->def_flags |= VM_NOHUGEPAGE;
+#endif
+
/* Now lets check again if something happened */
task_lock(tsk);
if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
@@ -866,3 +896,81 @@ bool kernel_page_present(struct page *page)
return cc == 0;
}
#endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ /* No need to flush TLB
+ * On s390 reference bits are in storage key and never in TLB */
+ return pmdp_test_and_clear_young(vma, address, pmdp);
+}
+
+int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty)
+{
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+ if (pmd_same(*pmdp, entry))
+ return 0;
+ pmdp_invalidate(vma, address, pmdp);
+ set_pmd_at(vma->vm_mm, address, pmdp, entry);
+ return 1;
+}
+
+static void pmdp_splitting_flush_sync(void *arg)
+{
+ /* Simply deliver the interrupt */
+}
+
+void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT,
+ (unsigned long *) pmdp)) {
+ /* need to serialize against gup-fast (IRQ disabled) */
+ smp_call_function(pmdp_splitting_flush_sync, NULL, 1);
+ }
+}
+
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
+{
+ struct list_head *lh = (struct list_head *) pgtable;
+
+ assert_spin_locked(&mm->page_table_lock);
+
+ /* FIFO */
+ if (!mm->pmd_huge_pte)
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) mm->pmd_huge_pte);
+ mm->pmd_huge_pte = pgtable;
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm)
+{
+ struct list_head *lh;
+ pgtable_t pgtable;
+ pte_t *ptep;
+
+ assert_spin_locked(&mm->page_table_lock);
+
+ /* FIFO */
+ pgtable = mm->pmd_huge_pte;
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ mm->pmd_huge_pte = NULL;
+ else {
+ mm->pmd_huge_pte = (pgtable_t) lh->next;
+ list_del(lh);
+ }
+ ptep = (pte_t *) pgtable;
+ pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ ptep++;
+ pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ return pgtable;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 36f5141e8041..3b3e27a3ff2c 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -13,14 +13,17 @@ config SUPERH
select HAVE_DMA_ATTRS
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
+ select HAVE_DEBUG_BUGVERBOSE
select ARCH_HAVE_CUSTOM_GPIO_H
select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
select PERF_USE_VMALLOC
+ select HAVE_DEBUG_KMEMLEAK
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_XZ
select HAVE_KERNEL_LZO
+ select HAVE_UID16
select ARCH_WANT_IPC_PARSE_VERSION
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h
index 967068fb79ac..b3808c7d67b2 100644
--- a/arch/sh/include/asm/hugetlb.h
+++ b/arch/sh/include/asm/hugetlb.h
@@ -1,6 +1,7 @@
#ifndef _ASM_SH_HUGETLB_H
#define _ASM_SH_HUGETLB_H
+#include <asm/cacheflush.h>
#include <asm/page.h>
@@ -89,4 +90,9 @@ static inline void arch_release_hugepage(struct page *page)
{
}
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+ clear_bit(PG_dcache_clean, &page->flags);
+}
+
#endif /* _ASM_SH_HUGETLB_H */
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 3bdc1ad9a341..cbbdcad8fcb3 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -504,6 +504,7 @@ good_area:
}
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 67f1f6f5f4e1..91c780c973ba 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -18,6 +18,7 @@ config SPARC
select HAVE_OPROFILE
select HAVE_ARCH_KGDB if !SMP || SPARC64
select HAVE_ARCH_TRACEHOOK
+ select SYSCTL_EXCEPTION_TRACE
select ARCH_WANT_OPTIONAL_GPIOLIB
select RTC_CLASS
select RTC_DRV_M48T59
@@ -32,6 +33,7 @@ config SPARC
select GENERIC_PCI_IOMAP
select HAVE_NMI_WATCHDOG if SPARC64
select HAVE_BPF_JIT
+ select HAVE_DEBUG_BUGVERBOSE
select GENERIC_SMP_IDLE_THREAD
select GENERIC_CMOS_UPDATE
select GENERIC_CLOCKEVENTS
@@ -42,6 +44,7 @@ config SPARC32
def_bool !64BIT
select GENERIC_ATOMIC64
select CLZ_TAB
+ select HAVE_UID16
config SPARC64
def_bool 64BIT
@@ -59,6 +62,7 @@ config SPARC64
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_DEBUG_KMEMLEAK
select RTC_DRV_CMOS
select RTC_DRV_BQ4802
select RTC_DRV_SUN4V
@@ -226,25 +230,6 @@ config EARLYFB
help
Say Y here to enable a faster early framebuffer boot console.
-choice
- prompt "Kernel page size" if SPARC64
- default SPARC64_PAGE_SIZE_8KB
-
-config SPARC64_PAGE_SIZE_8KB
- bool "8KB"
- help
- This lets you select the page size of the kernel.
-
- 8KB and 64KB work quite well, since SPARC ELF sections
- provide for up to 64KB alignment.
-
- If you don't know what to do, choose 8KB.
-
-config SPARC64_PAGE_SIZE_64KB
- bool "64KB"
-
-endchoice
-
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on SPARC64 && PROC_FS
@@ -316,23 +301,6 @@ config GENERIC_LOCKBREAK
default y
depends on SPARC64 && SMP && PREEMPT
-choice
- prompt "SPARC64 Huge TLB Page Size"
- depends on SPARC64 && HUGETLB_PAGE
- default HUGETLB_PAGE_SIZE_4MB
-
-config HUGETLB_PAGE_SIZE_4MB
- bool "4MB"
-
-config HUGETLB_PAGE_SIZE_512K
- bool "512K"
-
-config HUGETLB_PAGE_SIZE_64K
- depends on !SPARC64_PAGE_SIZE_64KB
- bool "64K"
-
-endchoice
-
config NUMA
bool "NUMA support"
depends on SPARC64 && SMP
@@ -571,6 +539,7 @@ config COMPAT
depends on SPARC64
default y
select COMPAT_BINFMT_ELF
+ select HAVE_UID16
select ARCH_WANT_OLD_COMPAT_IPC
config SYSVIPC_COMPAT
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 177061064ee6..8c5eed6d267f 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -10,7 +10,10 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);
-void hugetlb_prefault_arch_hook(struct mm_struct *mm);
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+ hugetlb_setup(mm);
+}
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr,
@@ -82,4 +85,8 @@ static inline void arch_release_hugepage(struct page *page)
{
}
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
#endif /* _ASM_SPARC64_HUGETLB_H */
diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h
index 9067dc500535..76092c4dd277 100644
--- a/arch/sparc/include/asm/mmu_64.h
+++ b/arch/sparc/include/asm/mmu_64.h
@@ -30,22 +30,8 @@
#define CTX_PGSZ_MASK ((CTX_PGSZ_BITS << CTX_PGSZ0_SHIFT) | \
(CTX_PGSZ_BITS << CTX_PGSZ1_SHIFT))
-#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
#define CTX_PGSZ_BASE CTX_PGSZ_8KB
-#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
-#define CTX_PGSZ_BASE CTX_PGSZ_64KB
-#else
-#error No page size specified in kernel configuration
-#endif
-
-#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
-#define CTX_PGSZ_HUGE CTX_PGSZ_4MB
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-#define CTX_PGSZ_HUGE CTX_PGSZ_512KB
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-#define CTX_PGSZ_HUGE CTX_PGSZ_64KB
-#endif
-
+#define CTX_PGSZ_HUGE CTX_PGSZ_4MB
#define CTX_PGSZ_KERN CTX_PGSZ_4MB
/* Thus, when running on UltraSPARC-III+ and later, we use the following
@@ -96,7 +82,7 @@ struct tsb_config {
#define MM_TSB_BASE 0
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
#define MM_TSB_HUGE 1
#define MM_NUM_TSBS 2
#else
@@ -107,6 +93,7 @@ typedef struct {
spinlock_t lock;
unsigned long sparc64_ctx_val;
unsigned long huge_pte_count;
+ struct page *pgtable_page;
struct tsb_config tsb_block[MM_NUM_TSBS];
struct hv_tsb_descr tsb_descr[MM_NUM_TSBS];
} mm_context_t;
diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
index a97fd085cebe..9191ca62ed9c 100644
--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -36,7 +36,7 @@ static inline void tsb_context_switch(struct mm_struct *mm)
{
__tsb_context_switch(__pa(mm->pgd),
&mm->context.tsb_block[0],
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
(mm->context.tsb_block[1].tsb ?
&mm->context.tsb_block[1] :
NULL)
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index f0d09b401036..4b39f74d6ca0 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -3,13 +3,7 @@
#include <linux/const.h>
-#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
#define PAGE_SHIFT 13
-#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
-#define PAGE_SHIFT 16
-#else
-#error No page size specified in kernel configuration
-#endif
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
@@ -21,15 +15,9 @@
#define DCACHE_ALIASING_POSSIBLE
#endif
-#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
#define HPAGE_SHIFT 22
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-#define HPAGE_SHIFT 19
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-#define HPAGE_SHIFT 16
-#endif
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1UL))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
@@ -38,6 +26,11 @@
#ifndef __ASSEMBLY__
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+struct mm_struct;
+extern void hugetlb_setup(struct mm_struct *mm);
+#endif
+
#define WANT_PAGE_VIRTUAL
extern void _clear_page(void *page);
@@ -98,7 +91,7 @@ typedef unsigned long pgprot_t;
#endif /* (STRICT_MM_TYPECHECKS) */
-typedef struct page *pgtable_t;
+typedef pte_t *pgtable_t;
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
(_AC(0x0000000070000000,UL)) : \
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 40b2d7a7023d..bcfe063bce23 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -38,51 +38,20 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
kmem_cache_free(pgtable_cache, pmd);
}
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
-{
- return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
-{
- struct page *page;
- pte_t *pte;
-
- pte = pte_alloc_one_kernel(mm, address);
- if (!pte)
- return NULL;
- page = virt_to_page(pte);
- pgtable_page_ctor(page);
- return page;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pgtable_page_dtor(ptepage);
- __free_page(ptepage);
-}
+extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address);
+extern pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address);
+extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
+extern void pte_free(struct mm_struct *mm, pgtable_t ptepage);
-#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
-#define pmd_populate(MM,PMD,PTE_PAGE) \
- pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE))
-#define pmd_pgtable(pmd) pmd_page(pmd)
+#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(MM, PMD, PTE)
+#define pmd_populate(MM, PMD, PTE) pmd_set(MM, PMD, PTE)
+#define pmd_pgtable(PMD) ((pte_t *)__pmd_page(PMD))
#define check_pgt_cache() do { } while (0)
-static inline void pgtable_free(void *table, bool is_page)
-{
- if (is_page)
- free_page((unsigned long)table);
- else
- kmem_cache_free(pgtable_cache, table);
-}
+extern void pgtable_free(void *table, bool is_page);
#ifdef CONFIG_SMP
@@ -113,11 +82,10 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, bool is
}
#endif /* !CONFIG_SMP */
-static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte,
unsigned long address)
{
- pgtable_page_dtor(ptepage);
- pgtable_free_tlb(tlb, page_address(ptepage), true);
+ pgtable_free_tlb(tlb, pte, true);
}
#define __pmd_free_tlb(tlb, pmd, addr) \
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 61210db139fb..95515f1e7cef 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -45,40 +45,59 @@
#define vmemmap ((struct page *)VMEMMAP_BASE)
-/* XXX All of this needs to be rethought so we can take advantage
- * XXX cheetah's full 64-bit virtual address space, ie. no more hole
- * XXX in the middle like on spitfire. -DaveM
- */
-/*
- * Given a virtual address, the lowest PAGE_SHIFT bits determine offset
- * into the page; the next higher PAGE_SHIFT-3 bits determine the pte#
- * in the proper pagetable (the -3 is from the 8 byte ptes, and each page
- * table is a single page long). The next higher PMD_BITS determine pmd#
- * in the proper pmdtable (where we must have PMD_BITS <= (PAGE_SHIFT-2)
- * since the pmd entries are 4 bytes, and each pmd page is a single page
- * long). Finally, the higher few bits determine pgde#.
- */
-
/* PMD_SHIFT determines the size of the area a second-level page
* table can map
*/
-#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3))
+#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4))
#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
#define PMD_BITS (PAGE_SHIFT - 2)
/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS)
+#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4) + PMD_BITS)
#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PGDIR_BITS (PAGE_SHIFT - 2)
+#if (PGDIR_SHIFT + PGDIR_BITS) != 44
+#error Page table parameters do not cover virtual address space properly.
+#endif
+
+#if (PMD_SHIFT != HPAGE_SHIFT)
+#error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages.
+#endif
+
+/* PMDs point to PTE tables which are 4K aligned. */
+#define PMD_PADDR _AC(0xfffffffe,UL)
+#define PMD_PADDR_SHIFT _AC(11,UL)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define PMD_ISHUGE _AC(0x00000001,UL)
+
+/* This is the PMD layout when PMD_ISHUGE is set. With 4MB huge
+ * pages, this frees up a bunch of bits in the layout that we can
+ * use for the protection settings and software metadata.
+ */
+#define PMD_HUGE_PADDR _AC(0xfffff800,UL)
+#define PMD_HUGE_PROTBITS _AC(0x000007ff,UL)
+#define PMD_HUGE_PRESENT _AC(0x00000400,UL)
+#define PMD_HUGE_WRITE _AC(0x00000200,UL)
+#define PMD_HUGE_DIRTY _AC(0x00000100,UL)
+#define PMD_HUGE_ACCESSED _AC(0x00000080,UL)
+#define PMD_HUGE_EXEC _AC(0x00000040,UL)
+#define PMD_HUGE_SPLITTING _AC(0x00000020,UL)
+#endif
+
+/* PGDs point to PMD tables which are 8K aligned. */
+#define PGD_PADDR _AC(0xfffffffc,UL)
+#define PGD_PADDR_SHIFT _AC(11,UL)
+
#ifndef __ASSEMBLY__
#include <linux/sched.h>
/* Entries per page directory level. */
-#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3))
+#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-4))
#define PTRS_PER_PMD (1UL << PMD_BITS)
#define PTRS_PER_PGD (1UL << PGDIR_BITS)
@@ -160,26 +179,11 @@
#define _PAGE_SZ8K_4V _AC(0x0000000000000000,UL) /* 8K Page */
#define _PAGE_SZALL_4V _AC(0x0000000000000007,UL) /* All pgsz bits */
-#if PAGE_SHIFT == 13
#define _PAGE_SZBITS_4U _PAGE_SZ8K_4U
#define _PAGE_SZBITS_4V _PAGE_SZ8K_4V
-#elif PAGE_SHIFT == 16
-#define _PAGE_SZBITS_4U _PAGE_SZ64K_4U
-#define _PAGE_SZBITS_4V _PAGE_SZ64K_4V
-#else
-#error Wrong PAGE_SHIFT specified
-#endif
-#if defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
#define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U
#define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-#define _PAGE_SZHUGE_4U _PAGE_SZ512K_4U
-#define _PAGE_SZHUGE_4V _PAGE_SZ512K_4V
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-#define _PAGE_SZHUGE_4U _PAGE_SZ64K_4U
-#define _PAGE_SZHUGE_4V _PAGE_SZ64K_4V
-#endif
/* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */
#define __P000 __pgprot(0)
@@ -218,7 +222,6 @@ extern unsigned long _PAGE_CACHE;
extern unsigned long pg_iobits;
extern unsigned long _PAGE_ALL_SZ_BITS;
-extern unsigned long _PAGE_SZBITS;
extern struct page *mem_map_zero;
#define ZERO_PAGE(vaddr) (mem_map_zero)
@@ -231,25 +234,25 @@ extern struct page *mem_map_zero;
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
{
unsigned long paddr = pfn << PAGE_SHIFT;
- unsigned long sz_bits;
-
- sz_bits = 0UL;
- if (_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL) {
- __asm__ __volatile__(
- "\n661: sethi %%uhi(%1), %0\n"
- " sllx %0, 32, %0\n"
- " .section .sun4v_2insn_patch, \"ax\"\n"
- " .word 661b\n"
- " mov %2, %0\n"
- " nop\n"
- " .previous\n"
- : "=r" (sz_bits)
- : "i" (_PAGE_SZBITS_4U), "i" (_PAGE_SZBITS_4V));
- }
- return __pte(paddr | sz_bits | pgprot_val(prot));
+
+ BUILD_BUG_ON(_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL);
+ return __pte(paddr | pgprot_val(prot));
}
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot);
+#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
+
+extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ /* Do nothing, mk_pmd() does this part. */
+ return pmd;
+}
+#endif
+
/* This one can be done with two shifts. */
static inline unsigned long pte_pfn(pte_t pte)
{
@@ -286,6 +289,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
* Note: We encode this into 3 sun4v 2-insn patch sequences.
*/
+ BUILD_BUG_ON(_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL);
__asm__ __volatile__(
"\n661: sethi %%uhi(%2), %1\n"
" sethi %%hi(%2), %0\n"
@@ -307,10 +311,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
: "=r" (mask), "=r" (tmp)
: "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
_PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U |
- _PAGE_SZBITS_4U | _PAGE_SPECIAL),
+ _PAGE_SPECIAL),
"i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V |
- _PAGE_SZBITS_4V | _PAGE_SPECIAL));
+ _PAGE_SPECIAL));
return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
}
@@ -618,19 +622,130 @@ static inline unsigned long pte_special(pte_t pte)
return pte_val(pte) & _PAGE_SPECIAL;
}
-#define pmd_set(pmdp, ptep) \
- (pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL))
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_young(pmd_t pmd)
+{
+ return pmd_val(pmd) & PMD_HUGE_ACCESSED;
+}
+
+static inline int pmd_write(pmd_t pmd)
+{
+ return pmd_val(pmd) & PMD_HUGE_WRITE;
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ unsigned long val = pmd_val(pmd) & PMD_HUGE_PADDR;
+
+ return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT);
+}
+
+static inline int pmd_large(pmd_t pmd)
+{
+ return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) ==
+ (PMD_ISHUGE | PMD_HUGE_PRESENT);
+}
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+ return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) ==
+ (PMD_ISHUGE|PMD_HUGE_SPLITTING);
+}
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ return pmd_val(pmd) & PMD_ISHUGE;
+}
+
+#define has_transparent_hugepage() 1
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_HUGE_ACCESSED;
+ return pmd;
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_HUGE_WRITE;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_DIRTY;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_ACCESSED;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_WRITE;
+ return pmd;
+}
+
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_HUGE_PRESENT;
+ return pmd;
+}
+
+static inline pmd_t pmd_mksplitting(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_SPLITTING;
+ return pmd;
+}
+
+extern pgprot_t pmd_pgprot(pmd_t entry);
+#endif
+
+static inline int pmd_present(pmd_t pmd)
+{
+ return pmd_val(pmd) != 0U;
+}
+
+#define pmd_none(pmd) (!pmd_val(pmd))
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd);
+#else
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ *pmdp = pmd;
+}
+#endif
+
+static inline void pmd_set(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
+{
+ unsigned long val = __pa((unsigned long) (ptep)) >> PMD_PADDR_SHIFT;
+
+ pmd_val(*pmdp) = val;
+}
+
#define pud_set(pudp, pmdp) \
- (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> 11UL))
-#define __pmd_page(pmd) \
- ((unsigned long) __va((((unsigned long)pmd_val(pmd))<<11UL)))
+ (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> PGD_PADDR_SHIFT))
+static inline unsigned long __pmd_page(pmd_t pmd)
+{
+ unsigned long paddr = (unsigned long) pmd_val(pmd);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (pmd_val(pmd) & PMD_ISHUGE)
+ paddr &= PMD_HUGE_PADDR;
+#endif
+ paddr <<= PMD_PADDR_SHIFT;
+ return ((unsigned long) __va(paddr));
+}
#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd))
#define pud_page_vaddr(pud) \
- ((unsigned long) __va((((unsigned long)pud_val(pud))<<11UL)))
+ ((unsigned long) __va((((unsigned long)pud_val(pud))<<PGD_PADDR_SHIFT)))
#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud))
-#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (0)
-#define pmd_present(pmd) (pmd_val(pmd) != 0U)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0U)
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (0)
@@ -664,6 +779,16 @@ static inline unsigned long pte_special(pte_t pte)
extern void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
pte_t *ptep, pte_t orig, int fullmm);
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t *pmdp)
+{
+ pmd_t pmd = *pmdp;
+ set_pmd_at(mm, addr, pmdp, __pmd(0U));
+ return pmd;
+}
+
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int fullmm)
{
@@ -719,6 +844,16 @@ extern void mmu_info(struct seq_file *);
struct vm_area_struct;
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd);
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
+#endif
/* Encode and de-code a swap entry */
#define __swp_type(entry) (((entry).val >> PAGE_SHIFT) & 0xffUL)
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index 1a8afd1ad04f..b4c258de4443 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@@ -147,20 +147,96 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
- sllx REG1, 11, REG1; \
+ sllx REG1, PGD_PADDR_SHIFT, REG1; \
andn REG2, 0x3, REG2; \
lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - PMD_SHIFT, REG2; \
- srlx REG2, 64 - PAGE_SHIFT, REG2; \
- sllx REG1, 11, REG1; \
+ srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \
+ sllx REG1, PMD_PADDR_SHIFT, REG1; \
andn REG2, 0x7, REG2; \
add REG1, REG2, REG1;
- /* Do a user page table walk in MMU globals. Leaves physical PTE
- * pointer in REG1. Jumps to FAIL_LABEL on early page table walk
- * termination. Physical base of page tables is in PHYS_PGD which
- * will not be modified.
+ /* This macro exists only to make the PMD translator below easier
+ * to read. It hides the ELF section switch for the sun4v code
+ * patching.
+ */
+#define OR_PTE_BIT(REG, NAME) \
+661: or REG, _PAGE_##NAME##_4U, REG; \
+ .section .sun4v_1insn_patch, "ax"; \
+ .word 661b; \
+ or REG, _PAGE_##NAME##_4V, REG; \
+ .previous;
+
+ /* Load into REG the PTE value for VALID, CACHE, and SZHUGE. */
+#define BUILD_PTE_VALID_SZHUGE_CACHE(REG) \
+661: sethi %uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG; \
+ .section .sun4v_1insn_patch, "ax"; \
+ .word 661b; \
+ sethi %uhi(_PAGE_VALID), REG; \
+ .previous; \
+ sllx REG, 32, REG; \
+661: or REG, _PAGE_CP_4U|_PAGE_CV_4U, REG; \
+ .section .sun4v_1insn_patch, "ax"; \
+ .word 661b; \
+ or REG, _PAGE_CP_4V|_PAGE_CV_4V|_PAGE_SZHUGE_4V, REG; \
+ .previous;
+
+ /* PMD has been loaded into REG1, interpret the value, seeing
+ * if it is a HUGE PMD or a normal one. If it is not valid
+ * then jump to FAIL_LABEL. If it is a HUGE PMD, and it
+ * translates to a valid PTE, branch to PTE_LABEL.
+ *
+ * We translate the PMD by hand, one bit at a time,
+ * constructing the huge PTE.
+ *
+ * So we construct the PTE in REG2 as follows:
+ *
+ * 1) Extract the PMD PFN from REG1 and place it into REG2.
+ *
+ * 2) Translate PMD protection bits in REG1 into REG2, one bit
+ * at a time using andcc tests on REG1 and OR's into REG2.
+ *
+ * Only two bits to be concerned with here, EXEC and WRITE.
+ * Now REG1 is freed up and we can use it as a temporary.
+ *
+ * 3) Construct the VALID, CACHE, and page size PTE bits in
+ * REG1, OR with REG2 to form final PTE.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+ brz,pn REG1, FAIL_LABEL; \
+ andcc REG1, PMD_ISHUGE, %g0; \
+ be,pt %xcc, 700f; \
+ and REG1, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED, REG2; \
+ cmp REG2, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED; \
+ bne,pn %xcc, FAIL_LABEL; \
+ andn REG1, PMD_HUGE_PROTBITS, REG2; \
+ sllx REG2, PMD_PADDR_SHIFT, REG2; \
+ /* REG2 now holds PFN << PAGE_SHIFT */ \
+ andcc REG1, PMD_HUGE_EXEC, %g0; \
+ bne,a,pt %xcc, 1f; \
+ OR_PTE_BIT(REG2, EXEC); \
+1: andcc REG1, PMD_HUGE_WRITE, %g0; \
+ bne,a,pt %xcc, 1f; \
+ OR_PTE_BIT(REG2, W); \
+ /* REG1 can now be clobbered, build final PTE */ \
+1: BUILD_PTE_VALID_SZHUGE_CACHE(REG1); \
+ ba,pt %xcc, PTE_LABEL; \
+ or REG1, REG2, REG1; \
+700:
+#else
+#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+ brz,pn REG1, FAIL_LABEL; \
+ nop;
+#endif
+
+ /* Do a user page table walk in MMU globals. Leaves final,
+ * valid, PTE value in REG1. Jumps to FAIL_LABEL on early
+ * page table walk termination or if the PTE is not valid.
+ *
+ * Physical base of page tables is in PHYS_PGD which will not
+ * be modified.
*
* VADDR will not be clobbered, but REG1 and REG2 will.
*/
@@ -172,15 +248,19 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
- sllx REG1, 11, REG1; \
+ sllx REG1, PGD_PADDR_SHIFT, REG1; \
andn REG2, 0x3, REG2; \
lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
- brz,pn REG1, FAIL_LABEL; \
- sllx VADDR, 64 - PMD_SHIFT, REG2; \
- srlx REG2, 64 - PAGE_SHIFT, REG2; \
- sllx REG1, 11, REG1; \
+ USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
+ sllx VADDR, 64 - PMD_SHIFT, REG2; \
+ srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \
+ sllx REG1, PMD_PADDR_SHIFT, REG1; \
andn REG2, 0x7, REG2; \
- add REG1, REG2, REG1;
+ add REG1, REG2, REG1; \
+ ldxa [REG1] ASI_PHYS_USE_EC, REG1; \
+ brgez,pn REG1, FAIL_LABEL; \
+ nop; \
+800:
/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
* If no entry is found, FAIL_LABEL will be branched to. On success
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index acc8c838ff72..75b31bcdeadf 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -779,7 +779,7 @@ static int __pci_mmap_make_offset(struct pci_dev *pdev,
static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma,
enum pci_mmap_state mmap_state)
{
- vma->vm_flags |= (VM_IO | VM_RESERVED);
+ vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
}
/* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S
index e1fbf8c75787..bde867fd71e8 100644
--- a/arch/sparc/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc/kernel/sun4v_tlb_miss.S
@@ -176,7 +176,7 @@ sun4v_tsb_miss_common:
sub %g2, TRAP_PER_CPU_FAULT_INFO, %g2
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
mov SCRATCHPAD_UTSBREG2, %g5
ldxa [%g5] ASI_SCRATCHPAD, %g5
cmp %g5, -1
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
index db15d123f054..d4bdc7a62375 100644
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -49,7 +49,7 @@ tsb_miss_page_table_walk:
/* Before committing to a full page table walk,
* check the huge page TSB.
*/
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
661: ldx [%g7 + TRAP_PER_CPU_TSB_HUGE], %g5
nop
@@ -110,12 +110,9 @@ tsb_miss_page_table_walk:
tsb_miss_page_table_walk_sun4v_fastpath:
USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
- /* Load and check PTE. */
- ldxa [%g5] ASI_PHYS_USE_EC, %g5
- brgez,pn %g5, tsb_do_fault
- nop
+ /* Valid PTE is now in %g5. */
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
661: sethi %uhi(_PAGE_SZALL_4U), %g7
sllx %g7, 32, %g7
.section .sun4v_2insn_patch, "ax"
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 77ac917be152..e98bfda205a2 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -265,6 +265,7 @@ good_area:
}
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 1fe0429b6314..2976dba1ebaf 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -452,6 +452,7 @@ good_area:
}
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
@@ -464,13 +465,13 @@ good_area:
up_read(&mm->mmap_sem);
mm_rss = get_mm_rss(mm);
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE));
#endif
if (unlikely(mm_rss >
mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
tsb_grow(mm, MM_TSB_BASE, mm_rss);
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
mm_rss = mm->context.huge_pte_count;
if (unlikely(mm_rss >
mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit))
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 07e14535375c..f76f83d5ac63 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -303,53 +303,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
{
return NULL;
}
-
-static void context_reload(void *__data)
-{
- struct mm_struct *mm = __data;
-
- if (mm == current->mm)
- load_secondary_context(mm);
-}
-
-void hugetlb_prefault_arch_hook(struct mm_struct *mm)
-{
- struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
-
- if (likely(tp->tsb != NULL))
- return;
-
- tsb_grow(mm, MM_TSB_HUGE, 0);
- tsb_context_switch(mm);
- smp_tsb_sync(mm);
-
- /* On UltraSPARC-III+ and later, configure the second half of
- * the Data-TLB for huge pages.
- */
- if (tlb_type == cheetah_plus) {
- unsigned long ctx;
-
- spin_lock(&ctx_alloc_lock);
- ctx = mm->context.sparc64_ctx_val;
- ctx &= ~CTX_PGSZ_MASK;
- ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
- ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
-
- if (ctx != mm->context.sparc64_ctx_val) {
- /* When changing the page size fields, we
- * must perform a context flush so that no
- * stale entries match. This flush must
- * occur with the original context register
- * settings.
- */
- do_flush_tlb_mm(mm);
-
- /* Reload the context register of all processors
- * also executing in this address space.
- */
- mm->context.sparc64_ctx_val = ctx;
- on_each_cpu(context_reload, mm, 0);
- }
- spin_unlock(&ctx_alloc_lock);
- }
-}
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 7a9b788c6ced..9e28a118e6a4 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -276,7 +276,6 @@ static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long
}
unsigned long _PAGE_ALL_SZ_BITS __read_mostly;
-unsigned long _PAGE_SZBITS __read_mostly;
static void flush_dcache(unsigned long pfn)
{
@@ -307,12 +306,24 @@ static void flush_dcache(unsigned long pfn)
}
}
+/* mm->context.lock must be held */
+static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index,
+ unsigned long tsb_hash_shift, unsigned long address,
+ unsigned long tte)
+{
+ struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb;
+ unsigned long tag;
+
+ tsb += ((address >> tsb_hash_shift) &
+ (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
+ tag = (address >> 22UL);
+ tsb_insert(tsb, tag, tte);
+}
+
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
{
+ unsigned long tsb_index, tsb_hash_shift, flags;
struct mm_struct *mm;
- struct tsb *tsb;
- unsigned long tag, flags;
- unsigned long tsb_index, tsb_hash_shift;
pte_t pte = *ptep;
if (tlb_type != hypervisor) {
@@ -329,7 +340,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
spin_lock_irqsave(&mm->context.lock, flags);
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) {
if ((tlb_type == hypervisor &&
(pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
@@ -341,11 +352,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
}
#endif
- tsb = mm->context.tsb_block[tsb_index].tsb;
- tsb += ((address >> tsb_hash_shift) &
- (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
- tag = (address >> 22UL);
- tsb_insert(tsb, tag, pte_val(pte));
+ __update_mmu_tsb_insert(mm, tsb_index, tsb_hash_shift,
+ address, pte_val(pte));
spin_unlock_irqrestore(&mm->context.lock, flags);
}
@@ -2275,8 +2283,7 @@ static void __init sun4u_pgprot_init(void)
__ACCESS_BITS_4U | _PAGE_E_4U);
#ifdef CONFIG_DEBUG_PAGEALLOC
- kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZBITS_4U) ^
- 0xfffff80000000000UL;
+ kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL;
#else
kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
0xfffff80000000000UL;
@@ -2287,7 +2294,6 @@ static void __init sun4u_pgprot_init(void)
for (i = 1; i < 4; i++)
kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
- _PAGE_SZBITS = _PAGE_SZBITS_4U;
_PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
_PAGE_SZ64K_4U | _PAGE_SZ8K_4U |
_PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U);
@@ -2324,8 +2330,7 @@ static void __init sun4v_pgprot_init(void)
_PAGE_CACHE = _PAGE_CACHE_4V;
#ifdef CONFIG_DEBUG_PAGEALLOC
- kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^
- 0xfffff80000000000UL;
+ kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL;
#else
kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
0xfffff80000000000UL;
@@ -2339,7 +2344,6 @@ static void __init sun4v_pgprot_init(void)
pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
__ACCESS_BITS_4V | _PAGE_E_4V);
- _PAGE_SZBITS = _PAGE_SZBITS_4V;
_PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V |
_PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V |
_PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
@@ -2472,3 +2476,281 @@ void __flush_tlb_all(void)
__asm__ __volatile__("wrpr %0, 0, %%pstate"
: : "r" (pstate));
}
+
+static pte_t *get_from_cache(struct mm_struct *mm)
+{
+ struct page *page;
+ pte_t *ret;
+
+ spin_lock(&mm->page_table_lock);
+ page = mm->context.pgtable_page;
+ ret = NULL;
+ if (page) {
+ void *p = page_address(page);
+
+ mm->context.pgtable_page = NULL;
+
+ ret = (pte_t *) (p + (PAGE_SIZE / 2));
+ }
+ spin_unlock(&mm->page_table_lock);
+
+ return ret;
+}
+
+static struct page *__alloc_for_cache(struct mm_struct *mm)
+{
+ struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
+ __GFP_REPEAT | __GFP_ZERO);
+
+ if (page) {
+ spin_lock(&mm->page_table_lock);
+ if (!mm->context.pgtable_page) {
+ atomic_set(&page->_count, 2);
+ mm->context.pgtable_page = page;
+ }
+ spin_unlock(&mm->page_table_lock);
+ }
+ return page;
+}
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address)
+{
+ struct page *page;
+ pte_t *pte;
+
+ pte = get_from_cache(mm);
+ if (pte)
+ return pte;
+
+ page = __alloc_for_cache(mm);
+ if (page)
+ pte = (pte_t *) page_address(page);
+
+ return pte;
+}
+
+pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
+{
+ struct page *page;
+ pte_t *pte;
+
+ pte = get_from_cache(mm);
+ if (pte)
+ return pte;
+
+ page = __alloc_for_cache(mm);
+ if (page) {
+ pgtable_page_ctor(page);
+ pte = (pte_t *) page_address(page);
+ }
+
+ return pte;
+}
+
+void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+ struct page *page = virt_to_page(pte);
+ if (put_page_testzero(page))
+ free_hot_cold_page(page, 0);
+}
+
+static void __pte_free(pgtable_t pte)
+{
+ struct page *page = virt_to_page(pte);
+ if (put_page_testzero(page)) {
+ pgtable_page_dtor(page);
+ free_hot_cold_page(page, 0);
+ }
+}
+
+void pte_free(struct mm_struct *mm, pgtable_t pte)
+{
+ __pte_free(pte);
+}
+
+void pgtable_free(void *table, bool is_page)
+{
+ if (is_page)
+ __pte_free(table);
+ else
+ kmem_cache_free(pgtable_cache, table);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot, bool for_modify)
+{
+ if (pgprot_val(pgprot) & _PAGE_VALID)
+ pmd_val(pmd) |= PMD_HUGE_PRESENT;
+ if (tlb_type == hypervisor) {
+ if (pgprot_val(pgprot) & _PAGE_WRITE_4V)
+ pmd_val(pmd) |= PMD_HUGE_WRITE;
+ if (pgprot_val(pgprot) & _PAGE_EXEC_4V)
+ pmd_val(pmd) |= PMD_HUGE_EXEC;
+
+ if (!for_modify) {
+ if (pgprot_val(pgprot) & _PAGE_ACCESSED_4V)
+ pmd_val(pmd) |= PMD_HUGE_ACCESSED;
+ if (pgprot_val(pgprot) & _PAGE_MODIFIED_4V)
+ pmd_val(pmd) |= PMD_HUGE_DIRTY;
+ }
+ } else {
+ if (pgprot_val(pgprot) & _PAGE_WRITE_4U)
+ pmd_val(pmd) |= PMD_HUGE_WRITE;
+ if (pgprot_val(pgprot) & _PAGE_EXEC_4U)
+ pmd_val(pmd) |= PMD_HUGE_EXEC;
+
+ if (!for_modify) {
+ if (pgprot_val(pgprot) & _PAGE_ACCESSED_4U)
+ pmd_val(pmd) |= PMD_HUGE_ACCESSED;
+ if (pgprot_val(pgprot) & _PAGE_MODIFIED_4U)
+ pmd_val(pmd) |= PMD_HUGE_DIRTY;
+ }
+ }
+
+ return pmd;
+}
+
+pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+{
+ pmd_t pmd;
+
+ pmd_val(pmd) = (page_nr << ((PAGE_SHIFT - PMD_PADDR_SHIFT)));
+ pmd_val(pmd) |= PMD_ISHUGE;
+ pmd = pmd_set_protbits(pmd, pgprot, false);
+ return pmd;
+}
+
+pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ pmd_val(pmd) &= ~(PMD_HUGE_PRESENT |
+ PMD_HUGE_WRITE |
+ PMD_HUGE_EXEC);
+ pmd = pmd_set_protbits(pmd, newprot, true);
+ return pmd;
+}
+
+pgprot_t pmd_pgprot(pmd_t entry)
+{
+ unsigned long pte = 0;
+
+ if (pmd_val(entry) & PMD_HUGE_PRESENT)
+ pte |= _PAGE_VALID;
+
+ if (tlb_type == hypervisor) {
+ if (pmd_val(entry) & PMD_HUGE_PRESENT)
+ pte |= _PAGE_PRESENT_4V;
+ if (pmd_val(entry) & PMD_HUGE_EXEC)
+ pte |= _PAGE_EXEC_4V;
+ if (pmd_val(entry) & PMD_HUGE_WRITE)
+ pte |= _PAGE_W_4V;
+ if (pmd_val(entry) & PMD_HUGE_ACCESSED)
+ pte |= _PAGE_ACCESSED_4V;
+ if (pmd_val(entry) & PMD_HUGE_DIRTY)
+ pte |= _PAGE_MODIFIED_4V;
+ pte |= _PAGE_CP_4V|_PAGE_CV_4V;
+ } else {
+ if (pmd_val(entry) & PMD_HUGE_PRESENT)
+ pte |= _PAGE_PRESENT_4U;
+ if (pmd_val(entry) & PMD_HUGE_EXEC)
+ pte |= _PAGE_EXEC_4U;
+ if (pmd_val(entry) & PMD_HUGE_WRITE)
+ pte |= _PAGE_W_4U;
+ if (pmd_val(entry) & PMD_HUGE_ACCESSED)
+ pte |= _PAGE_ACCESSED_4U;
+ if (pmd_val(entry) & PMD_HUGE_DIRTY)
+ pte |= _PAGE_MODIFIED_4U;
+ pte |= _PAGE_CP_4U|_PAGE_CV_4U;
+ }
+
+ return __pgprot(pte);
+}
+
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd)
+{
+ unsigned long pte, flags;
+ struct mm_struct *mm;
+ pmd_t entry = *pmd;
+ pgprot_t prot;
+
+ if (!pmd_large(entry) || !pmd_young(entry))
+ return;
+
+ pte = (pmd_val(entry) & ~PMD_HUGE_PROTBITS);
+ pte <<= PMD_PADDR_SHIFT;
+ pte |= _PAGE_VALID;
+
+ prot = pmd_pgprot(entry);
+
+ if (tlb_type == hypervisor)
+ pgprot_val(prot) |= _PAGE_SZHUGE_4V;
+ else
+ pgprot_val(prot) |= _PAGE_SZHUGE_4U;
+
+ pte |= pgprot_val(prot);
+
+ mm = vma->vm_mm;
+
+ spin_lock_irqsave(&mm->context.lock, flags);
+
+ if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL)
+ __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
+ addr, pte);
+
+ spin_unlock_irqrestore(&mm->context.lock, flags);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+static void context_reload(void *__data)
+{
+ struct mm_struct *mm = __data;
+
+ if (mm == current->mm)
+ load_secondary_context(mm);
+}
+
+void hugetlb_setup(struct mm_struct *mm)
+{
+ struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
+
+ if (likely(tp->tsb != NULL))
+ return;
+
+ tsb_grow(mm, MM_TSB_HUGE, 0);
+ tsb_context_switch(mm);
+ smp_tsb_sync(mm);
+
+ /* On UltraSPARC-III+ and later, configure the second half of
+ * the Data-TLB for huge pages.
+ */
+ if (tlb_type == cheetah_plus) {
+ unsigned long ctx;
+
+ spin_lock(&ctx_alloc_lock);
+ ctx = mm->context.sparc64_ctx_val;
+ ctx &= ~CTX_PGSZ_MASK;
+ ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
+ ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
+
+ if (ctx != mm->context.sparc64_ctx_val) {
+ /* When changing the page size fields, we
+ * must perform a context flush so that no
+ * stale entries match. This flush must
+ * occur with the original context register
+ * settings.
+ */
+ do_flush_tlb_mm(mm);
+
+ /* Reload the context register of all processors
+ * also executing in this address space.
+ */
+ mm->context.sparc64_ctx_val = ctx;
+ on_each_cpu(context_reload, mm, 0);
+ }
+ spin_unlock(&ctx_alloc_lock);
+ }
+}
+#endif
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index b1f279cd00bf..3e8fec391fe0 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -43,16 +43,37 @@ void flush_tlb_pending(void)
put_cpu_var(tlb_batch);
}
-void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
- pte_t *ptep, pte_t orig, int fullmm)
+static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
+ bool exec)
{
struct tlb_batch *tb = &get_cpu_var(tlb_batch);
unsigned long nr;
vaddr &= PAGE_MASK;
- if (pte_exec(orig))
+ if (exec)
vaddr |= 0x1UL;
+ nr = tb->tlb_nr;
+
+ if (unlikely(nr != 0 && mm != tb->mm)) {
+ flush_tlb_pending();
+ nr = 0;
+ }
+
+ if (nr == 0)
+ tb->mm = mm;
+
+ tb->vaddrs[nr] = vaddr;
+ tb->tlb_nr = ++nr;
+ if (nr >= TLB_BATCH_NR)
+ flush_tlb_pending();
+
+ put_cpu_var(tlb_batch);
+}
+
+void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+ pte_t *ptep, pte_t orig, int fullmm)
+{
if (tlb_type != hypervisor &&
pte_dirty(orig)) {
unsigned long paddr, pfn = pte_pfn(orig);
@@ -77,26 +98,91 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
}
no_cache_flush:
+ if (!fullmm)
+ tlb_batch_add_one(mm, vaddr, pte_exec(orig));
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
+ pmd_t pmd, bool exec)
+{
+ unsigned long end;
+ pte_t *pte;
+
+ pte = pte_offset_map(&pmd, vaddr);
+ end = vaddr + HPAGE_SIZE;
+ while (vaddr < end) {
+ if (pte_val(*pte) & _PAGE_VALID)
+ tlb_batch_add_one(mm, vaddr, exec);
+ pte++;
+ vaddr += PAGE_SIZE;
+ }
+ pte_unmap(pte);
+}
- if (fullmm) {
- put_cpu_var(tlb_batch);
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ pmd_t orig = *pmdp;
+
+ *pmdp = pmd;
+
+ if (mm == &init_mm)
return;
+
+ if ((pmd_val(pmd) ^ pmd_val(orig)) & PMD_ISHUGE) {
+ if (pmd_val(pmd) & PMD_ISHUGE)
+ mm->context.huge_pte_count++;
+ else
+ mm->context.huge_pte_count--;
+ if (mm->context.huge_pte_count == 1)
+ hugetlb_setup(mm);
}
- nr = tb->tlb_nr;
+ if (!pmd_none(orig)) {
+ bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0);
- if (unlikely(nr != 0 && mm != tb->mm)) {
- flush_tlb_pending();
- nr = 0;
+ addr &= HPAGE_MASK;
+ if (pmd_val(orig) & PMD_ISHUGE)
+ tlb_batch_add_one(mm, addr, exec);
+ else
+ tlb_batch_pmd_scan(mm, addr, orig, exec);
}
+}
- if (nr == 0)
- tb->mm = mm;
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
+{
+ struct list_head *lh = (struct list_head *) pgtable;
- tb->vaddrs[nr] = vaddr;
- tb->tlb_nr = ++nr;
- if (nr >= TLB_BATCH_NR)
- flush_tlb_pending();
+ assert_spin_locked(&mm->page_table_lock);
- put_cpu_var(tlb_batch);
+ /* FIFO */
+ if (!mm->pmd_huge_pte)
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) mm->pmd_huge_pte);
+ mm->pmd_huge_pte = pgtable;
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm)
+{
+ struct list_head *lh;
+ pgtable_t pgtable;
+
+ assert_spin_locked(&mm->page_table_lock);
+
+ /* FIFO */
+ pgtable = mm->pmd_huge_pte;
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ mm->pmd_huge_pte = NULL;
+ else {
+ mm->pmd_huge_pte = (pgtable_t) lh->next;
+ list_del(lh);
+ }
+ pte_val(pgtable[0]) = 0;
+ pte_val(pgtable[1]) = 0;
+
+ return pgtable;
}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index c52add79b83d..7f6474347491 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -78,7 +78,7 @@ void flush_tsb_user(struct tlb_batch *tb)
base = __pa(base);
__flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
@@ -90,29 +90,12 @@ void flush_tsb_user(struct tlb_batch *tb)
spin_unlock_irqrestore(&mm->context.lock, flags);
}
-#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB)
#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K
#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K
-#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB)
-#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K
-#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K
-#else
-#error Broken base page size setting...
-#endif
-#ifdef CONFIG_HUGETLB_PAGE
-#if defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_64K
-#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_64K
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_512K
-#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_512K
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB
#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB
-#else
-#error Broken huge page size setting...
-#endif
#endif
static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
@@ -207,7 +190,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
case MM_TSB_BASE:
hp->pgsz_idx = HV_PGSZ_IDX_BASE;
break;
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
case MM_TSB_HUGE:
hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
break;
@@ -222,7 +205,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
case MM_TSB_BASE:
hp->pgsz_mask = HV_PGSZ_MASK_BASE;
break;
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
case MM_TSB_HUGE:
hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
break;
@@ -444,7 +427,7 @@ retry_tsb_alloc:
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
unsigned long huge_pte_count;
#endif
unsigned int i;
@@ -453,7 +436,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
mm->context.sparc64_ctx_val = 0UL;
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
/* We reset it to zero because the fork() page copying
* will re-increment the counters as the parent PTEs are
* copied into the child address space.
@@ -462,6 +445,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
mm->context.huge_pte_count = 0;
#endif
+ mm->context.pgtable_page = NULL;
+
/* copy_mm() copies over the parent's mm_struct before calling
* us, so we need to zero out the TSB pointer or else tsb_grow()
* will be confused and think there is an older TSB to free up.
@@ -474,7 +459,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
*/
tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if (unlikely(huge_pte_count))
tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
#endif
@@ -500,10 +485,17 @@ static void tsb_destroy_one(struct tsb_config *tp)
void destroy_context(struct mm_struct *mm)
{
unsigned long flags, i;
+ struct page *page;
for (i = 0; i < MM_NUM_TSBS; i++)
tsb_destroy_one(&mm->context.tsb_block[i]);
+ page = mm->context.pgtable_page;
+ if (page && put_page_testzero(page)) {
+ pgtable_page_dtor(page);
+ free_hot_cold_page(page, 0);
+ }
+
spin_lock_irqsave(&ctx_alloc_lock, flags);
if (CTX_VALID(mm->context)) {
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index c9a3c1fe7297..dc46490adca0 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -7,12 +7,15 @@ config TILE
select HAVE_DMA_API_DEBUG
select HAVE_KVM if !TILEGX
select GENERIC_FIND_FIRST_BIT
+ select SYSCTL_EXCEPTION_TRACE
select USE_GENERIC_SMP_HELPERS
select CC_OPTIMIZE_FOR_SIZE
+ select HAVE_DEBUG_KMEMLEAK
select HAVE_GENERIC_HARDIRQS
select GENERIC_IRQ_PROBE
select GENERIC_PENDING_IRQ if SMP
select GENERIC_IRQ_SHOW
+ select HAVE_DEBUG_BUGVERBOSE
select HAVE_SYSCALL_WRAPPERS if TILEGX
select SYS_HYPERVISOR
select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index b2042380a5aa..0f885af2b621 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -106,6 +106,10 @@ static inline void arch_release_hugepage(struct page *page)
{
}
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
#ifdef CONFIG_HUGETLB_SUPER_PAGES
static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
struct page *page, int writable)
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c
index 758b6038c2b7..3cfa98bf9125 100644
--- a/arch/tile/mm/elf.c
+++ b/arch/tile/mm/elf.c
@@ -36,19 +36,14 @@ static void sim_notify_exec(const char *binary_name)
} while (c);
}
-static int notify_exec(void)
+static int notify_exec(struct mm_struct *mm)
{
int retval = 0; /* failure */
- struct vm_area_struct *vma = current->mm->mmap;
- while (vma) {
- if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
- break;
- vma = vma->vm_next;
- }
- if (vma) {
+
+ if (mm->exe_file) {
char *buf = (char *) __get_free_page(GFP_KERNEL);
if (buf) {
- char *path = d_path(&vma->vm_file->f_path,
+ char *path = d_path(&mm->exe_file->f_path,
buf, PAGE_SIZE);
if (!IS_ERR(path)) {
sim_notify_exec(path);
@@ -106,16 +101,16 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
unsigned long vdso_base;
int retval = 0;
+ down_write(&mm->mmap_sem);
+
/*
* Notify the simulator that an exec just occurred.
* If we can't find the filename of the mapping, just use
* whatever was passed as the linux_binprm filename.
*/
- if (!notify_exec())
+ if (!notify_exec(mm))
sim_notify_exec(bprm->filename);
- down_write(&mm->mmap_sem);
-
/*
* MAYWRITE to allow gdb to COW and set breakpoints
*/
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 84ce7abbf5af..fe811fa5f1b9 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -454,6 +454,7 @@ good_area:
tsk->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index cb837c223922..648121b037d5 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -7,6 +7,7 @@ config UML
bool
default y
select HAVE_GENERIC_HARDIRQS
+ select HAVE_UID16
select GENERIC_IRQ_SHOW
select GENERIC_CPU_DEVICES
select GENERIC_IO
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 0353b98ae35a..0f00e9c82080 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -89,6 +89,7 @@ good_area:
current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
goto retry;
}
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index b6f0458c3143..b008586dad75 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -380,7 +380,7 @@ int vectors_user_mapping(void)
return install_special_mapping(mm, 0xffff0000, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYEXEC |
- VM_RESERVED,
+ VM_DONTEXPAND | VM_DONTDUMP,
NULL);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b72777ff32a9..1ae94bcae5d9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -10,6 +10,7 @@ config X86_32
def_bool y
depends on !64BIT
select CLKSRC_I8253
+ select HAVE_UID16
config X86_64
def_bool y
@@ -46,6 +47,7 @@ config X86
select HAVE_FUNCTION_GRAPH_FP_TEST
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_SYSCALL_TRACEPOINTS
+ select SYSCTL_EXCEPTION_TRACE
select HAVE_KVM
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
@@ -65,6 +67,7 @@ config X86
select HAVE_PERF_EVENTS_NMI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_DEBUG_KMEMLEAK
select ANON_INODES
select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
select HAVE_CMPXCHG_LOCAL if !M386
@@ -85,6 +88,7 @@ config X86
select IRQ_FORCED_THREADING
select USE_GENERIC_SMP_HELPERS if SMP
select HAVE_BPF_JIT if X86_64
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
@@ -2168,6 +2172,7 @@ config IA32_EMULATION
bool "IA32 Emulation"
depends on X86_64
select COMPAT_BINFMT_ELF
+ select HAVE_UID16
---help---
Include code to run legacy 32-bit programs under a
64-bit kernel. You should likely turn this on, unless you're
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 250b8774c158..b6c3b821acf6 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -240,30 +240,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
return c;
}
-
-/*
- * atomic_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-static inline int atomic_dec_if_positive(atomic_t *v)
-{
- int c, old, dec;
- c = atomic_read(v);
- for (;;) {
- dec = c - 1;
- if (unlikely(dec < 0))
- break;
- old = atomic_cmpxchg((v), c, dec);
- if (likely(old == c))
- break;
- c = old;
- }
- return dec;
-}
-
/**
* atomic_inc_short - increment of a short integer
* @v: pointer to type int
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index 439a9acc132d..bdd35dbd0605 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -90,4 +90,8 @@ static inline void arch_release_hugepage(struct page *page)
{
}
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
#endif /* _ASM_X86_HUGETLB_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index fc9948465293..a1f780d45f76 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -146,8 +146,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
static inline int pmd_large(pmd_t pte)
{
- return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
- (_PAGE_PSE | _PAGE_PRESENT);
+ return pmd_flags(pte) & _PAGE_PSE;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -415,7 +414,13 @@ static inline int pte_hidden(pte_t pte)
static inline int pmd_present(pmd_t pmd)
{
- return pmd_flags(pmd) & _PAGE_PRESENT;
+ /*
+ * Checking for _PAGE_PSE is needed too because
+ * split_huge_page will temporarily clear the present bit (but
+ * the _PAGE_PSE flag will remain set at all times while the
+ * _PAGE_PRESENT bit is clear).
+ */
+ return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
}
static inline int pmd_none(pmd_t pmd)
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 0c92113c4cb6..8faa215a503e 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -71,6 +71,7 @@ do { \
* tables contain all the necessary information.
*/
#define update_mmu_cache(vma, address, ptep) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 8251be02301e..47356f9df82e 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -143,6 +143,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define pte_unmap(pte) ((void)(pte))/* NOP */
#define update_mmu_cache(vma, address, ptep) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
/* Encode and de-code a swap entry */
#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a530b230e7d7..8e13ecb41bee 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1220,6 +1220,7 @@ good_area:
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index b91e48512425..937bff5cdaa7 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
struct address_space *mapping = vma->vm_file->f_mapping;
pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff;
- struct prio_tree_iter iter;
struct vm_area_struct *svma;
unsigned long saddr;
pte_t *spte = NULL;
@@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
return (pte_t *)pmd_alloc(mm, pud, addr);
mutex_lock(&mapping->i_mmap_mutex);
- vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
+ vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
if (svma == vma)
continue;
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 3d68ef6d2266..0eb572eda406 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -664,20 +664,20 @@ static void free_pfn_range(u64 paddr, unsigned long size)
}
/*
- * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
+ * track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range().
*
* If the vma has a linear pfn mapping for the entire range, we get the prot
* from pte and reserve the entire vma range with single reserve_pfn_range call.
*/
-int track_pfn_vma_copy(struct vm_area_struct *vma)
+int track_pfn_copy(struct vm_area_struct *vma)
{
resource_size_t paddr;
unsigned long prot;
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
- if (is_linear_pfn_mapping(vma)) {
+ if (vma->vm_flags & VM_PAT) {
/*
* reserve the whole chunk covered by vma. We need the
* starting address and protection from pte.
@@ -694,31 +694,59 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
}
/*
- * track_pfn_vma_new is called when a _new_ pfn mapping is being established
- * for physical range indicated by pfn and size.
- *
* prot is passed in as a parameter for the new mapping. If the vma has a
* linear pfn mapping for the entire range reserve the entire vma range with
* single reserve_pfn_range call.
*/
-int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
- unsigned long pfn, unsigned long size)
+int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
+ unsigned long pfn, unsigned long addr, unsigned long size)
{
+ resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
unsigned long flags;
- resource_size_t paddr;
- unsigned long vma_size = vma->vm_end - vma->vm_start;
- if (is_linear_pfn_mapping(vma)) {
- /* reserve the whole chunk starting from vm_pgoff */
- paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
- return reserve_pfn_range(paddr, vma_size, prot, 0);
+ /* reserve the whole chunk starting from paddr */
+ if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) {
+ int ret;
+
+ ret = reserve_pfn_range(paddr, size, prot, 0);
+ if (!ret)
+ vma->vm_flags |= VM_PAT;
+ return ret;
}
if (!pat_enabled)
return 0;
- /* for vm_insert_pfn and friends, we set prot based on lookup */
- flags = lookup_memtype(pfn << PAGE_SHIFT);
+ /*
+ * For anything smaller than the vma size we set prot based on the
+ * lookup.
+ */
+ flags = lookup_memtype(paddr);
+
+ /* Check memtype for the remaining pages */
+ while (size > PAGE_SIZE) {
+ size -= PAGE_SIZE;
+ paddr += PAGE_SIZE;
+ if (flags != lookup_memtype(paddr))
+ return -EINVAL;
+ }
+
+ *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+ flags);
+
+ return 0;
+}
+
+int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
+ unsigned long pfn)
+{
+ unsigned long flags;
+
+ if (!pat_enabled)
+ return 0;
+
+ /* Set prot based on lookup */
+ flags = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT);
*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
flags);
@@ -726,22 +754,31 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
}
/*
- * untrack_pfn_vma is called while unmapping a pfnmap for a region.
+ * untrack_pfn is called while unmapping a pfnmap for a region.
* untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case size can be zero).
+ * can be for the entire vma (in which case pfn, size are zero).
*/
-void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
- unsigned long size)
+void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+ unsigned long size)
{
resource_size_t paddr;
- unsigned long vma_size = vma->vm_end - vma->vm_start;
+ unsigned long prot;
- if (is_linear_pfn_mapping(vma)) {
- /* free the whole chunk starting from vm_pgoff */
- paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
- free_pfn_range(paddr, vma_size);
+ if (!(vma->vm_flags & VM_PAT))
return;
+
+ /* free the chunk starting from pfn or the whole chunk */
+ paddr = (resource_size_t)pfn << PAGE_SHIFT;
+ if (!paddr && !size) {
+ if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ size = vma->vm_end - vma->vm_start;
}
+ free_pfn_range(paddr, size);
+ vma->vm_flags &= ~VM_PAT;
}
pgprot_t pgprot_writecombine(pgprot_t prot)
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 8acaddd0fb21..415f6c4ced36 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -12,7 +12,7 @@
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/rbtree.h>
+#include <linux/rbtree_augmented.h>
#include <linux/sched.h>
#include <linux/gfp.h>
@@ -54,29 +54,24 @@ static u64 get_subtree_max_end(struct rb_node *node)
return ret;
}
-/* Update 'subtree_max_end' for a node, based on node and its children */
-static void memtype_rb_augment_cb(struct rb_node *node, void *__unused)
+static u64 compute_subtree_max_end(struct memtype *data)
{
- struct memtype *data;
- u64 max_end, child_max_end;
-
- if (!node)
- return;
+ u64 max_end = data->end, child_max_end;
- data = container_of(node, struct memtype, rb);
- max_end = data->end;
-
- child_max_end = get_subtree_max_end(node->rb_right);
+ child_max_end = get_subtree_max_end(data->rb.rb_right);
if (child_max_end > max_end)
max_end = child_max_end;
- child_max_end = get_subtree_max_end(node->rb_left);
+ child_max_end = get_subtree_max_end(data->rb.rb_left);
if (child_max_end > max_end)
max_end = child_max_end;
- data->subtree_max_end = max_end;
+ return max_end;
}
+RB_DECLARE_CALLBACKS(static, memtype_rb_augment_cb, struct memtype, rb,
+ u64, subtree_max_end, compute_subtree_max_end)
+
/* Find the first (lowest start addr) overlapping range from rb tree */
static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
u64 start, u64 end)
@@ -179,15 +174,17 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
struct memtype *data = container_of(*node, struct memtype, rb);
parent = *node;
+ if (data->subtree_max_end < newdata->end)
+ data->subtree_max_end = newdata->end;
if (newdata->start <= data->start)
node = &((*node)->rb_left);
else if (newdata->start > data->start)
node = &((*node)->rb_right);
}
+ newdata->subtree_max_end = newdata->end;
rb_link_node(&newdata->rb, parent, node);
- rb_insert_color(&newdata->rb, root);
- rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL);
+ rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb);
}
int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
@@ -209,16 +206,13 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
struct memtype *rbt_memtype_erase(u64 start, u64 end)
{
- struct rb_node *deepest;
struct memtype *data;
data = memtype_rb_exact_match(&memtype_rbroot, start, end);
if (!data)
goto out;
- deepest = rb_augment_erase_begin(&data->rb);
- rb_erase(&data->rb, &memtype_rbroot);
- rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL);
+ rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb);
out:
return data;
}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5a16824cc2b3..fd28d86fe3d2 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2451,8 +2451,7 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
- BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
- (VM_PFNMAP | VM_RESERVED | VM_IO)));
+ BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
rmd.mfn = mfn;
rmd.prot = prot;
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 5a74c53bc69c..2c2f710ed1dc 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -126,6 +126,7 @@ good_area:
current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry