summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/kvm/Kconfig5
-rw-r--r--arch/arm/kvm/Makefile1
-rw-r--r--arch/arm/mach-pxa/cm-x255.c19
-rw-r--r--arch/arm64/kvm/Kconfig3
-rw-r--r--arch/arm64/kvm/Makefile1
-rw-r--r--arch/powerpc/include/asm/imc-pmu.h6
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c4
-rw-r--r--arch/powerpc/lib/code-patching.c6
-rw-r--r--arch/powerpc/mm/slice.c34
-rw-r--r--arch/powerpc/perf/imc-pmu.c27
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c22
-rw-r--r--arch/powerpc/platforms/powernv/vas.c1
-rw-r--r--arch/sparc/Kbuild1
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/Makefile4
-rw-r--r--arch/sparc/include/asm/bitops_64.h5
-rw-r--r--arch/sparc/include/asm/clocksource.h17
-rw-r--r--arch/sparc/include/asm/cmpxchg_32.h3
-rw-r--r--arch/sparc/include/asm/elf_64.h14
-rw-r--r--arch/sparc/include/asm/mmu_64.h1
-rw-r--r--arch/sparc/include/asm/mmu_context_64.h2
-rw-r--r--arch/sparc/include/asm/processor_64.h8
-rw-r--r--arch/sparc/include/asm/tsb.h2
-rw-r--r--arch/sparc/include/asm/vdso.h24
-rw-r--r--arch/sparc/include/asm/vvar.h74
-rw-r--r--arch/sparc/include/uapi/asm/auxvec.h4
-rw-r--r--arch/sparc/kernel/Makefile1
-rw-r--r--arch/sparc/kernel/head_64.S2
-rw-r--r--arch/sparc/kernel/mdesc.c17
-rw-r--r--arch/sparc/kernel/time_64.c12
-rw-r--r--arch/sparc/kernel/vdso.c70
-rw-r--r--arch/sparc/kernel/viohs.c6
-rw-r--r--arch/sparc/lib/Makefile3
-rw-r--r--arch/sparc/lib/NG4fls.S30
-rw-r--r--arch/sparc/lib/NG4patch.S9
-rw-r--r--arch/sparc/lib/atomic32.c14
-rw-r--r--arch/sparc/lib/fls.S67
-rw-r--r--arch/sparc/lib/fls64.S61
-rw-r--r--arch/sparc/vdso/.gitignore3
-rw-r--r--arch/sparc/vdso/Makefile149
-rw-r--r--arch/sparc/vdso/vclock_gettime.c264
-rw-r--r--arch/sparc/vdso/vdso-layout.lds.S104
-rw-r--r--arch/sparc/vdso/vdso-note.S12
-rw-r--r--arch/sparc/vdso/vdso.lds.S25
-rw-r--r--arch/sparc/vdso/vdso2c.c234
-rw-r--r--arch/sparc/vdso/vdso2c.h143
-rw-r--r--arch/sparc/vdso/vdso32/.gitignore1
-rw-r--r--arch/sparc/vdso/vdso32/vclock_gettime.c26
-rw-r--r--arch/sparc/vdso/vdso32/vdso-note.S12
-rw-r--r--arch/sparc/vdso/vdso32/vdso32.lds.S24
-rw-r--r--arch/sparc/vdso/vma.c268
-rw-r--r--arch/um/Kconfig.common1
-rw-r--r--arch/x86/kvm/svm.c7
-rw-r--r--arch/x86/kvm/vmx.c161
54 files changed, 1914 insertions, 102 deletions
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index f24628db5409..e2bd35b6780c 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -4,6 +4,7 @@
#
source "virt/kvm/Kconfig"
+source "virt/lib/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
@@ -23,6 +24,8 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select ARM_GIC
+ select ARM_GIC_V3
+ select ARM_GIC_V3_ITS
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
@@ -36,6 +39,8 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI
+ select IRQ_BYPASS_MANAGER
+ select HAVE_KVM_IRQ_BYPASS
depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
---help---
Support hosting virtualized guest machines.
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index f550abd64a25..48de846f2246 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -32,6 +32,7 @@ obj-y += $(KVM)/arm/vgic/vgic-init.o
obj-y += $(KVM)/arm/vgic/vgic-irqfd.o
obj-y += $(KVM)/arm/vgic/vgic-v2.o
obj-y += $(KVM)/arm/vgic/vgic-v3.o
+obj-y += $(KVM)/arm/vgic/vgic-v4.o
obj-y += $(KVM)/arm/vgic/vgic-mmio.o
obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o
diff --git a/arch/arm/mach-pxa/cm-x255.c b/arch/arm/mach-pxa/cm-x255.c
index b592f79a1742..fa8e7dd4d898 100644
--- a/arch/arm/mach-pxa/cm-x255.c
+++ b/arch/arm/mach-pxa/cm-x255.c
@@ -14,7 +14,7 @@
#include <linux/mtd/partitions.h>
#include <linux/mtd/physmap.h>
#include <linux/mtd/nand-gpio.h>
-
+#include <linux/gpio/machine.h>
#include <linux/spi/spi.h>
#include <linux/spi/pxa2xx_spi.h>
@@ -176,6 +176,17 @@ static inline void cmx255_init_nor(void) {}
#endif
#if defined(CONFIG_MTD_NAND_GPIO) || defined(CONFIG_MTD_NAND_GPIO_MODULE)
+
+static struct gpiod_lookup_table cmx255_nand_gpiod_table = {
+ .dev_id = "gpio-nand",
+ .table = {
+ GPIO_LOOKUP("gpio-pxa", GPIO_NAND_CS, "nce", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NAND_CLE, "cle", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NAND_ALE, "ale", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-pxa", GPIO_NAND_RB, "rdy", GPIO_ACTIVE_HIGH),
+ },
+};
+
static struct resource cmx255_nand_resource[] = {
[0] = {
.start = PXA_CS1_PHYS,
@@ -198,11 +209,6 @@ static struct mtd_partition cmx255_nand_parts[] = {
};
static struct gpio_nand_platdata cmx255_nand_platdata = {
- .gpio_nce = GPIO_NAND_CS,
- .gpio_cle = GPIO_NAND_CLE,
- .gpio_ale = GPIO_NAND_ALE,
- .gpio_rdy = GPIO_NAND_RB,
- .gpio_nwp = -1,
.parts = cmx255_nand_parts,
.num_parts = ARRAY_SIZE(cmx255_nand_parts),
.chip_delay = 25,
@@ -220,6 +226,7 @@ static struct platform_device cmx255_nand = {
static void __init cmx255_init_nand(void)
{
+ gpiod_add_lookup_table(&cmx255_nand_gpiod_table);
platform_device_register(&cmx255_nand);
}
#else
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 13f81f971390..2257dfcc44cc 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -4,6 +4,7 @@
#
source "virt/kvm/Kconfig"
+source "virt/lib/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
@@ -36,6 +37,8 @@ config KVM
select HAVE_KVM_MSI
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
+ select IRQ_BYPASS_MANAGER
+ select HAVE_KVM_IRQ_BYPASS
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 861acbbac385..87c4f7ae24de 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -27,6 +27,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 7f74c282710f..fad0e6ff460f 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -21,11 +21,6 @@
#include <asm/opal.h>
/*
- * For static allocation of some of the structures.
- */
-#define IMC_MAX_PMUS 32
-
-/*
* Compatibility macros for IMC devices
*/
#define IMC_DTB_COMPAT "ibm,opal-in-memory-counters"
@@ -125,4 +120,5 @@ enum {
extern int init_imc_pmu(struct device_node *parent,
struct imc_pmu *pmu_ptr, int pmu_id);
extern void thread_imc_disable(void);
+extern int get_max_nest_dev(void);
#endif /* __ASM_POWERPC_IMC_PMU_H */
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 602e0fde19b4..8bdc2f96c5d6 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -735,8 +735,8 @@ static __init void cpufeatures_cpu_quirks(void)
*/
if ((version & 0xffffff00) == 0x004e0100)
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
- else if ((version & 0xffffefff) == 0x004e0200)
- cur_cpu_spec->cpu_features &= ~CPU_FTR_POWER9_DD2_1;
+ else if ((version & 0xffffefff) == 0x004e0201)
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
}
static void __init cpufeatures_setup_finished(void)
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index c9de03e0c1f1..d469224c4ada 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -21,6 +21,7 @@
#include <asm/tlbflush.h>
#include <asm/page.h>
#include <asm/code-patching.h>
+#include <asm/setup.h>
static int __patch_instruction(unsigned int *addr, unsigned int instr)
{
@@ -146,11 +147,8 @@ int patch_instruction(unsigned int *addr, unsigned int instr)
* During early early boot patch_instruction is called
* when text_poke_area is not ready, but we still need
* to allow patching. We just do the plain old patching
- * We use slab_is_available and per cpu read * via this_cpu_read
- * of text_poke_area. Per-CPU areas might not be up early
- * this can create problems with just using this_cpu_read()
*/
- if (!slab_is_available() || !this_cpu_read(text_poke_area))
+ if (!this_cpu_read(*PTRRELOC(&text_poke_area)))
return __patch_instruction(addr, instr);
local_irq_save(flags);
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 564fff06f5c1..23ec2c5e3b78 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -122,7 +122,8 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
return !slice_area_is_free(mm, start, end - start);
}
-static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret)
+static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
+ unsigned long high_limit)
{
unsigned long i;
@@ -133,15 +134,16 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret)
if (!slice_low_has_vma(mm, i))
ret->low_slices |= 1u << i;
- if (mm->context.slb_addr_limit <= SLICE_LOW_TOP)
+ if (high_limit <= SLICE_LOW_TOP)
return;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++)
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++)
if (!slice_high_has_vma(mm, i))
__set_bit(i, ret->high_slices);
}
-static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret)
+static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
+ unsigned long high_limit)
{
unsigned char *hpsizes;
int index, mask_index;
@@ -156,8 +158,11 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
if (((lpsizes >> (i * 4)) & 0xf) == psize)
ret->low_slices |= 1u << i;
+ if (high_limit <= SLICE_LOW_TOP)
+ return;
+
hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) {
mask_index = i & 0x1;
index = i >> 1;
if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
@@ -169,6 +174,10 @@ static int slice_check_fit(struct mm_struct *mm,
struct slice_mask mask, struct slice_mask available)
{
DECLARE_BITMAP(result, SLICE_NUM_HIGH);
+ /*
+ * Make sure we just do bit compare only to the max
+ * addr limit and not the full bit map size.
+ */
unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
bitmap_and(result, mask.high_slices,
@@ -472,7 +481,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* First make up a "good" mask of slices that have the right size
* already
*/
- slice_mask_for_size(mm, psize, &good_mask);
+ slice_mask_for_size(mm, psize, &good_mask, high_limit);
slice_print_mask(" good_mask", good_mask);
/*
@@ -497,7 +506,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
#ifdef CONFIG_PPC_64K_PAGES
/* If we support combo pages, we can allow 64k pages in 4k slices */
if (psize == MMU_PAGE_64K) {
- slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask);
+ slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
if (fixed)
slice_or_mask(&good_mask, &compat_mask);
}
@@ -530,11 +539,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
return newaddr;
}
}
-
- /* We don't fit in the good mask, check what other slices are
+ /*
+ * We don't fit in the good mask, check what other slices are
* empty and thus can be converted
*/
- slice_mask_for_free(mm, &potential_mask);
+ slice_mask_for_free(mm, &potential_mask, high_limit);
slice_or_mask(&potential_mask, &good_mask);
slice_print_mask(" potential", potential_mask);
@@ -744,17 +753,18 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
{
struct slice_mask mask, available;
unsigned int psize = mm->context.user_psize;
+ unsigned long high_limit = mm->context.slb_addr_limit;
if (radix_enabled())
return 0;
slice_range_to_mask(addr, len, &mask);
- slice_mask_for_size(mm, psize, &available);
+ slice_mask_for_size(mm, psize, &available, high_limit);
#ifdef CONFIG_PPC_64K_PAGES
/* We need to account for 4k slices too */
if (psize == MMU_PAGE_64K) {
struct slice_mask compat_mask;
- slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask);
+ slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
slice_or_mask(&available, &compat_mask);
}
#endif
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 36344117c680..0ead3cd73caa 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -26,7 +26,7 @@
*/
static DEFINE_MUTEX(nest_init_lock);
static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
-static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+static struct imc_pmu **per_nest_pmu_arr;
static cpumask_t nest_imc_cpumask;
struct imc_pmu_ref *nest_imc_refc;
static int nest_pmus;
@@ -286,13 +286,14 @@ static struct imc_pmu_ref *get_nest_pmu_ref(int cpu)
static void nest_change_cpu_context(int old_cpu, int new_cpu)
{
struct imc_pmu **pn = per_nest_pmu_arr;
- int i;
if (old_cpu < 0 || new_cpu < 0)
return;
- for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++)
+ while (*pn) {
perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
+ pn++;
+ }
}
static int ppc_nest_imc_cpu_offline(unsigned int cpu)
@@ -467,7 +468,7 @@ static int nest_imc_event_init(struct perf_event *event)
* Nest HW counter memory resides in a per-chip reserve-memory (HOMER).
* Get the base memory addresss for this cpu.
*/
- chip_id = topology_physical_package_id(event->cpu);
+ chip_id = cpu_to_chip_id(event->cpu);
pcni = pmu->mem_info;
do {
if (pcni->id == chip_id) {
@@ -524,19 +525,19 @@ static int nest_imc_event_init(struct perf_event *event)
*/
static int core_imc_mem_init(int cpu, int size)
{
- int phys_id, rc = 0, core_id = (cpu / threads_per_core);
+ int nid, rc = 0, core_id = (cpu / threads_per_core);
struct imc_mem_info *mem_info;
/*
* alloc_pages_node() will allocate memory for core in the
* local node only.
*/
- phys_id = topology_physical_package_id(cpu);
+ nid = cpu_to_node(cpu);
mem_info = &core_imc_pmu->mem_info[core_id];
mem_info->id = core_id;
/* We need only vbase for core counters */
- mem_info->vbase = page_address(alloc_pages_node(phys_id,
+ mem_info->vbase = page_address(alloc_pages_node(nid,
GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
__GFP_NOWARN, get_order(size)));
if (!mem_info->vbase)
@@ -797,14 +798,14 @@ static int core_imc_event_init(struct perf_event *event)
static int thread_imc_mem_alloc(int cpu_id, int size)
{
u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id);
- int phys_id = topology_physical_package_id(cpu_id);
+ int nid = cpu_to_node(cpu_id);
if (!local_mem) {
/*
* This case could happen only once at start, since we dont
* free the memory in cpu offline path.
*/
- local_mem = page_address(alloc_pages_node(phys_id,
+ local_mem = page_address(alloc_pages_node(nid,
GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
__GFP_NOWARN, get_order(size)));
if (!local_mem)
@@ -1194,6 +1195,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
kfree(pmu_ptr);
+ kfree(per_nest_pmu_arr);
return;
}
@@ -1218,6 +1220,13 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
return -ENOMEM;
/* Needed for hotplug/migration */
+ if (!per_nest_pmu_arr) {
+ per_nest_pmu_arr = kcalloc(get_max_nest_dev() + 1,
+ sizeof(struct imc_pmu *),
+ GFP_KERNEL);
+ if (!per_nest_pmu_arr)
+ return -ENOMEM;
+ }
per_nest_pmu_arr[pmu_index] = pmu_ptr;
break;
case IMC_DOMAIN_CORE:
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 21f6531fae20..465ea105b771 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -153,6 +153,22 @@ static void disable_core_pmu_counters(void)
put_online_cpus();
}
+int get_max_nest_dev(void)
+{
+ struct device_node *node;
+ u32 pmu_units = 0, type;
+
+ for_each_compatible_node(node, NULL, IMC_DTB_UNIT_COMPAT) {
+ if (of_property_read_u32(node, "type", &type))
+ continue;
+
+ if (type == IMC_TYPE_CHIP)
+ pmu_units++;
+ }
+
+ return pmu_units;
+}
+
static int opal_imc_counters_probe(struct platform_device *pdev)
{
struct device_node *imc_dev = pdev->dev.of_node;
@@ -191,8 +207,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
break;
}
- if (!imc_pmu_create(imc_dev, pmu_count, domain))
- pmu_count++;
+ if (!imc_pmu_create(imc_dev, pmu_count, domain)) {
+ if (domain == IMC_DOMAIN_NEST)
+ pmu_count++;
+ }
}
return 0;
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
index c488621dbec3..aebbe95c9230 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -135,6 +135,7 @@ int chip_to_vas_id(int chipid)
}
return -1;
}
+EXPORT_SYMBOL(chip_to_vas_id);
static int vas_probe(struct platform_device *pdev)
{
diff --git a/arch/sparc/Kbuild b/arch/sparc/Kbuild
index 675afa285ddb..b4d0f570cc00 100644
--- a/arch/sparc/Kbuild
+++ b/arch/sparc/Kbuild
@@ -7,3 +7,4 @@ obj-y += mm/
obj-y += math-emu/
obj-y += net/
obj-y += crypto/
+obj-$(CONFIG_SPARC64) += vdso/
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 987a57502909..6bf594ace663 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -84,6 +84,8 @@ config SPARC64
select HAVE_REGS_AND_STACK_ACCESS_API
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
+ select GENERIC_TIME_VSYSCALL
+ select ARCH_CLOCKSOURCE_DATA
config ARCH_DEFCONFIG
string
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index dbc448923f48..edac927e4952 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -81,6 +81,10 @@ install:
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
+PHONY += vdso_install
+vdso_install:
+ $(Q)$(MAKE) $(build)=arch/sparc/vdso $@
+
# This is the image used for packaging
KBUILD_IMAGE := $(boot)/zImage
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index a90eea24b286..ca7ea5913494 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -23,10 +23,11 @@ void set_bit(unsigned long nr, volatile unsigned long *addr);
void clear_bit(unsigned long nr, volatile unsigned long *addr);
void change_bit(unsigned long nr, volatile unsigned long *addr);
+int fls(unsigned int word);
+int __fls(unsigned long word);
+
#include <asm-generic/bitops/non-atomic.h>
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
#ifdef __KERNEL__
diff --git a/arch/sparc/include/asm/clocksource.h b/arch/sparc/include/asm/clocksource.h
new file mode 100644
index 000000000000..d63ef224befe
--- /dev/null
+++ b/arch/sparc/include/asm/clocksource.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _ASM_SPARC_CLOCKSOURCE_H
+#define _ASM_SPARC_CLOCKSOURCE_H
+
+/* VDSO clocksources */
+#define VCLOCK_NONE 0 /* Nothing userspace can do. */
+#define VCLOCK_TICK 1 /* Use %tick. */
+#define VCLOCK_STICK 2 /* Use %stick. */
+
+struct arch_clocksource_data {
+ int vclock_mode;
+};
+
+#endif /* _ASM_SPARC_CLOCKSOURCE_H */
diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h
index 3e3823db303e..c73b5a3ab7b9 100644
--- a/arch/sparc/include/asm/cmpxchg_32.h
+++ b/arch/sparc/include/asm/cmpxchg_32.h
@@ -63,6 +63,9 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size)
(unsigned long)_n_, sizeof(*(ptr))); \
})
+u64 __cmpxchg_u64(u64 *ptr, u64 old, u64 new);
+#define cmpxchg64(ptr, old, new) __cmpxchg_u64(ptr, old, new)
+
#include <asm-generic/cmpxchg-local.h>
/*
diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h
index 5894389f5ed5..25340df3570c 100644
--- a/arch/sparc/include/asm/elf_64.h
+++ b/arch/sparc/include/asm/elf_64.h
@@ -211,4 +211,18 @@ do { if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \
(current->personality & (~PER_MASK))); \
} while (0)
+extern unsigned int vdso_enabled;
+
+#define ARCH_DLINFO \
+do { \
+ if (vdso_enabled) \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, \
+ (unsigned long)current->mm->context.vdso); \
+} while (0)
+
+struct linux_binprm;
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+ int uses_interp);
#endif /* !(__ASM_SPARC64_ELF_H) */
diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h
index 5fe64a57b4ba..ad4fb93508ba 100644
--- a/arch/sparc/include/asm/mmu_64.h
+++ b/arch/sparc/include/asm/mmu_64.h
@@ -97,6 +97,7 @@ typedef struct {
unsigned long thp_pte_count;
struct tsb_config tsb_block[MM_NUM_TSBS];
struct hv_tsb_descr tsb_descr[MM_NUM_TSBS];
+ void *vdso;
} mm_context_t;
#endif /* !__ASSEMBLY__ */
diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
index e25d25b0a34b..b361702ef52a 100644
--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -8,9 +8,11 @@
#include <linux/spinlock.h>
#include <linux/mm_types.h>
+#include <linux/smp.h>
#include <asm/spitfire.h>
#include <asm-generic/mm_hooks.h>
+#include <asm/percpu.h>
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index c7c79fe8d265..aac23d4a4ddd 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -200,6 +200,13 @@ unsigned long get_wchan(struct task_struct *task);
* To make a long story short, we are trying to yield the current cpu
* strand during busy loops.
*/
+#ifdef BUILD_VDSO
+#define cpu_relax() asm volatile("\n99:\n\t" \
+ "rd %%ccr, %%g0\n\t" \
+ "rd %%ccr, %%g0\n\t" \
+ "rd %%ccr, %%g0\n\t" \
+ ::: "memory")
+#else /* ! BUILD_VDSO */
#define cpu_relax() asm volatile("\n99:\n\t" \
"rd %%ccr, %%g0\n\t" \
"rd %%ccr, %%g0\n\t" \
@@ -211,6 +218,7 @@ unsigned long get_wchan(struct task_struct *task);
"nop\n\t" \
".previous" \
::: "memory")
+#endif
/* Prefetch support. This is tuned for UltraSPARC-III and later.
* UltraSPARC-I will treat these as nops, and UltraSPARC-II has
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index 25b6abdb3908..522a677e050d 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@@ -217,7 +217,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
sllx REG2, 32, REG2; \
andcc REG1, REG2, %g0; \
be,pt %xcc, 700f; \
- sethi %hi(0x1ffc0000), REG2; \
+ sethi %hi(0xffe00000), REG2; \
sllx REG2, 1, REG2; \
brgez,pn REG1, FAIL_LABEL; \
andn REG1, REG2, REG1; \
diff --git a/arch/sparc/include/asm/vdso.h b/arch/sparc/include/asm/vdso.h
new file mode 100644
index 000000000000..93b628731a5e
--- /dev/null
+++ b/arch/sparc/include/asm/vdso.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _ASM_SPARC_VDSO_H
+#define _ASM_SPARC_VDSO_H
+
+struct vdso_image {
+ void *data;
+ unsigned long size; /* Always a multiple of PAGE_SIZE */
+ long sym_vvar_start; /* Negative offset to the vvar area */
+ long sym_vread_tick; /* Start of vread_tick section */
+ long sym_vread_tick_patch_start; /* Start of tick read */
+ long sym_vread_tick_patch_end; /* End of tick read */
+};
+
+#ifdef CONFIG_SPARC64
+extern const struct vdso_image vdso_image_64_builtin;
+#endif
+#ifdef CONFIG_COMPAT
+extern const struct vdso_image vdso_image_32_builtin;
+#endif
+
+#endif /* _ASM_SPARC_VDSO_H */
diff --git a/arch/sparc/include/asm/vvar.h b/arch/sparc/include/asm/vvar.h
new file mode 100644
index 000000000000..0289503d1cb0
--- /dev/null
+++ b/arch/sparc/include/asm/vvar.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _ASM_SPARC_VVAR_DATA_H
+#define _ASM_SPARC_VVAR_DATA_H
+
+#include <asm/clocksource.h>
+#include <linux/seqlock.h>
+#include <linux/time.h>
+#include <linux/types.h>
+
+struct vvar_data {
+ unsigned int seq;
+
+ int vclock_mode;
+ struct { /* extract of a clocksource struct */
+ u64 cycle_last;
+ u64 mask;
+ int mult;
+ int shift;
+ } clock;
+ /* open coded 'struct timespec' */
+ u64 wall_time_sec;
+ u64 wall_time_snsec;
+ u64 monotonic_time_snsec;
+ u64 monotonic_time_sec;
+ u64 monotonic_time_coarse_sec;
+ u64 monotonic_time_coarse_nsec;
+ u64 wall_time_coarse_sec;
+ u64 wall_time_coarse_nsec;
+
+ int tz_minuteswest;
+ int tz_dsttime;
+};
+
+extern struct vvar_data *vvar_data;
+extern int vdso_fix_stick;
+
+static inline unsigned int vvar_read_begin(const struct vvar_data *s)
+{
+ unsigned int ret;
+
+repeat:
+ ret = READ_ONCE(s->seq);
+ if (unlikely(ret & 1)) {
+ cpu_relax();
+ goto repeat;
+ }
+ smp_rmb(); /* Finish all reads before we return seq */
+ return ret;
+}
+
+static inline int vvar_read_retry(const struct vvar_data *s,
+ unsigned int start)
+{
+ smp_rmb(); /* Finish all reads before checking the value of seq */
+ return unlikely(s->seq != start);
+}
+
+static inline void vvar_write_begin(struct vvar_data *s)
+{
+ ++s->seq;
+ smp_wmb(); /* Makes sure that increment of seq is reflected */
+}
+
+static inline void vvar_write_end(struct vvar_data *s)
+{
+ smp_wmb(); /* Makes the value of seq current before we increment */
+ ++s->seq;
+}
+
+
+#endif /* _ASM_SPARC_VVAR_DATA_H */
diff --git a/arch/sparc/include/uapi/asm/auxvec.h b/arch/sparc/include/uapi/asm/auxvec.h
index ad6f360261f6..5f80a70cc901 100644
--- a/arch/sparc/include/uapi/asm/auxvec.h
+++ b/arch/sparc/include/uapi/asm/auxvec.h
@@ -1,4 +1,8 @@
#ifndef __ASMSPARC_AUXVEC_H
#define __ASMSPARC_AUXVEC_H
+#define AT_SYSINFO_EHDR 33
+
+#define AT_VECTOR_SIZE_ARCH 1
+
#endif /* !(__ASMSPARC_AUXVEC_H) */
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 8de9617589a5..cc97545737f0 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_SPARC32) += systbls_32.o
obj-y += time_$(BITS).o
obj-$(CONFIG_SPARC32) += windows.o
obj-y += cpu.o
+obj-$(CONFIG_SPARC64) += vdso.o
obj-$(CONFIG_SPARC32) += devices.o
obj-y += ptrace_$(BITS).o
obj-y += unaligned_$(BITS).o
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 9e293de12052..a41e6e16eb36 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -641,6 +641,8 @@ niagara4_patch:
nop
call niagara4_patch_pageops
nop
+ call niagara4_patch_fls
+ nop
ba,a,pt %xcc, 80f
nop
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 1ef6156b1530..418592a09b41 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -13,6 +13,7 @@
#include <linux/miscdevice.h>
#include <linux/bootmem.h>
#include <linux/export.h>
+#include <linux/refcount.h>
#include <asm/cpudata.h>
#include <asm/hypervisor.h>
@@ -71,7 +72,7 @@ struct mdesc_handle {
struct list_head list;
struct mdesc_mem_ops *mops;
void *self_base;
- atomic_t refcnt;
+ refcount_t refcnt;
unsigned int handle_size;
struct mdesc_hdr mdesc;
};
@@ -153,7 +154,7 @@ static void mdesc_handle_init(struct mdesc_handle *hp,
memset(hp, 0, handle_size);
INIT_LIST_HEAD(&hp->list);
hp->self_base = base;
- atomic_set(&hp->refcnt, 1);
+ refcount_set(&hp->refcnt, 1);
hp->handle_size = handle_size;
}
@@ -183,7 +184,7 @@ static void __init mdesc_memblock_free(struct mdesc_handle *hp)
unsigned int alloc_size;
unsigned long start;
- BUG_ON(atomic_read(&hp->refcnt) != 0);
+ BUG_ON(refcount_read(&hp->refcnt) != 0);
BUG_ON(!list_empty(&hp->list));
alloc_size = PAGE_ALIGN(hp->handle_size);
@@ -221,7 +222,7 @@ static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
static void mdesc_kfree(struct mdesc_handle *hp)
{
- BUG_ON(atomic_read(&hp->refcnt) != 0);
+ BUG_ON(refcount_read(&hp->refcnt) != 0);
BUG_ON(!list_empty(&hp->list));
kfree(hp->self_base);
@@ -260,7 +261,7 @@ struct mdesc_handle *mdesc_grab(void)
spin_lock_irqsave(&mdesc_lock, flags);
hp = cur_mdesc;
if (hp)
- atomic_inc(&hp->refcnt);
+ refcount_inc(&hp->refcnt);
spin_unlock_irqrestore(&mdesc_lock, flags);
return hp;
@@ -272,7 +273,7 @@ void mdesc_release(struct mdesc_handle *hp)
unsigned long flags;
spin_lock_irqsave(&mdesc_lock, flags);
- if (atomic_dec_and_test(&hp->refcnt)) {
+ if (refcount_dec_and_test(&hp->refcnt)) {
list_del_init(&hp->list);
hp->mops->free(hp);
}
@@ -514,7 +515,7 @@ void mdesc_update(void)
if (status != HV_EOK || real_len > len) {
printk(KERN_ERR "MD: mdesc reread fails with %lu\n",
status);
- atomic_dec(&hp->refcnt);
+ refcount_dec(&hp->refcnt);
mdesc_free(hp);
goto out;
}
@@ -527,7 +528,7 @@ void mdesc_update(void)
mdesc_notify_clients(orig_hp, hp);
spin_lock_irqsave(&mdesc_lock, flags);
- if (atomic_dec_and_test(&orig_hp->refcnt))
+ if (refcount_dec_and_test(&orig_hp->refcnt))
mdesc_free(orig_hp);
else
list_add(&orig_hp->list, &mdesc_zombie_list);
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 3b397081047a..2ef8cfa9677e 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -28,7 +28,6 @@
#include <linux/jiffies.h>
#include <linux/cpufreq.h>
#include <linux/percpu.h>
-#include <linux/miscdevice.h>
#include <linux/rtc/m48t59.h>
#include <linux/kernel_stat.h>
#include <linux/clockchips.h>
@@ -54,6 +53,8 @@
DEFINE_SPINLOCK(rtc_lock);
+unsigned int __read_mostly vdso_fix_stick;
+
#ifdef CONFIG_SMP
unsigned long profile_pc(struct pt_regs *regs)
{
@@ -831,12 +832,17 @@ static void init_tick_ops(struct sparc64_tick_ops *ops)
void __init time_init_early(void)
{
if (tlb_type == spitfire) {
- if (is_hummingbird())
+ if (is_hummingbird()) {
init_tick_ops(&hbtick_operations);
- else
+ clocksource_tick.archdata.vclock_mode = VCLOCK_NONE;
+ } else {
init_tick_ops(&tick_operations);
+ clocksource_tick.archdata.vclock_mode = VCLOCK_TICK;
+ vdso_fix_stick = 1;
+ }
} else {
init_tick_ops(&stick_operations);
+ clocksource_tick.archdata.vclock_mode = VCLOCK_STICK;
}
}
diff --git a/arch/sparc/kernel/vdso.c b/arch/sparc/kernel/vdso.c
new file mode 100644
index 000000000000..58880662b271
--- /dev/null
+++ b/arch/sparc/kernel/vdso.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
+ * Copyright 2003 Andi Kleen, SuSE Labs.
+ *
+ * Thanks to hpa@transmeta.com for some useful hint.
+ * Special thanks to Ingo Molnar for his early experience with
+ * a different vsyscall implementation for Linux/IA32 and for the name.
+ */
+
+#include <linux/seqlock.h>
+#include <linux/time.h>
+#include <linux/timekeeper_internal.h>
+
+#include <asm/vvar.h>
+
+void update_vsyscall_tz(void)
+{
+ if (unlikely(vvar_data == NULL))
+ return;
+
+ vvar_data->tz_minuteswest = sys_tz.tz_minuteswest;
+ vvar_data->tz_dsttime = sys_tz.tz_dsttime;
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+ struct vvar_data *vdata = vvar_data;
+
+ if (unlikely(vdata == NULL))
+ return;
+
+ vvar_write_begin(vdata);
+ vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+ vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
+ vdata->clock.mask = tk->tkr_mono.mask;
+ vdata->clock.mult = tk->tkr_mono.mult;
+ vdata->clock.shift = tk->tkr_mono.shift;
+
+ vdata->wall_time_sec = tk->xtime_sec;
+ vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;
+
+ vdata->monotonic_time_sec = tk->xtime_sec +
+ tk->wall_to_monotonic.tv_sec;
+ vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec +
+ (tk->wall_to_monotonic.tv_nsec <<
+ tk->tkr_mono.shift);
+
+ while (vdata->monotonic_time_snsec >=
+ (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
+ vdata->monotonic_time_snsec -=
+ ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
+ vdata->monotonic_time_sec++;
+ }
+
+ vdata->wall_time_coarse_sec = tk->xtime_sec;
+ vdata->wall_time_coarse_nsec =
+ (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
+
+ vdata->monotonic_time_coarse_sec =
+ vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
+ vdata->monotonic_time_coarse_nsec =
+ vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
+
+ while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
+ vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
+ vdata->monotonic_time_coarse_sec++;
+ }
+
+ vvar_write_end(vdata);
+}
diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c
index c858f5f3ce2c..635d67ffc9a3 100644
--- a/arch/sparc/kernel/viohs.c
+++ b/arch/sparc/kernel/viohs.c
@@ -798,9 +798,9 @@ void vio_port_up(struct vio_driver_state *vio)
}
EXPORT_SYMBOL(vio_port_up);
-static void vio_port_timer(unsigned long _arg)
+static void vio_port_timer(struct timer_list *t)
{
- struct vio_driver_state *vio = (struct vio_driver_state *) _arg;
+ struct vio_driver_state *vio = from_timer(vio, t, timer);
vio_port_up(vio);
}
@@ -849,7 +849,7 @@ int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev,
vio->ops = ops;
- setup_timer(&vio->timer, vio_port_timer, (unsigned long) vio);
+ timer_setup(&vio->timer, vio_port_timer, 0);
return 0;
}
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 44829a8dc458..0f0f76b4f6cd 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -17,6 +17,9 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
+lib-$(CONFIG_SPARC64) += fls64.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 000000000000..2d0991e5b034
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,30 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+#define LZCNT_O0_G2 \
+ .word 0x85b002e8
+
+ .text
+ .register %g2, #scratch
+ .register %g3, #scratch
+
+ENTRY(NG4fls)
+ LZCNT_O0_G2 !lzcnt %o0, %g2
+ mov 64, %g3
+ retl
+ sub %g3, %g2, %o0
+ENDPROC(NG4fls)
+
+ENTRY(__NG4fls)
+ brz,pn %o0, 1f
+ LZCNT_O0_G2 !lzcnt %o0, %g2
+ mov 63, %g3
+ sub %g3, %g2, %o0
+1:
+ retl
+ nop
+ENDPROC(__NG4fls)
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index aa58ab39f9a6..37866175c921 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -4,6 +4,8 @@
* Copyright (C) 2012 David S. Miller <davem@davemloft.net>
*/
+#include <linux/linkage.h>
+
#define BRANCH_ALWAYS 0x10680000
#define NOP 0x01000000
#define NG_DO_PATCH(OLD, NEW) \
@@ -53,3 +55,10 @@ niagara4_patch_pageops:
retl
nop
.size niagara4_patch_pageops,.-niagara4_patch_pageops
+
+ENTRY(niagara4_patch_fls)
+ NG_DO_PATCH(fls, NG4fls)
+ NG_DO_PATCH(__fls, __NG4fls)
+ retl
+ nop
+ENDPROC(niagara4_patch_fls)
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index 5010df497387..465a901a0ada 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -173,6 +173,20 @@ unsigned long __cmpxchg_u32(volatile u32 *ptr, u32 old, u32 new)
}
EXPORT_SYMBOL(__cmpxchg_u32);
+u64 __cmpxchg_u64(u64 *ptr, u64 old, u64 new)
+{
+ unsigned long flags;
+ u64 prev;
+
+ spin_lock_irqsave(ATOMIC_HASH(ptr), flags);
+ if ((prev = *ptr) == old)
+ *ptr = new;
+ spin_unlock_irqrestore(ATOMIC_HASH(ptr), flags);
+
+ return prev;
+}
+EXPORT_SYMBOL(__cmpxchg_u64);
+
unsigned long __xchg_u32(volatile u32 *ptr, u32 new)
{
unsigned long flags;
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 000000000000..06b8d300bcae
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,67 @@
+/* fls.S: SPARC default fls definition.
+ *
+ * SPARC default fls definition, which follows the same algorithm as
+ * in generic fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+ .text
+ .register %g2, #scratch
+ .register %g3, #scratch
+ENTRY(fls)
+ brz,pn %o0, 6f
+ mov 0, %o1
+ sethi %hi(0xffff0000), %g3
+ mov %o0, %g2
+ andcc %o0, %g3, %g0
+ be,pt %icc, 8f
+ mov 32, %o1
+ sethi %hi(0xff000000), %g3
+ andcc %g2, %g3, %g0
+ bne,pt %icc, 3f
+ sethi %hi(0xf0000000), %g3
+ sll %o0, 8, %o0
+1:
+ add %o1, -8, %o1
+ sra %o0, 0, %o0
+ mov %o0, %g2
+2:
+ sethi %hi(0xf0000000), %g3
+3:
+ andcc %g2, %g3, %g0
+ bne,pt %icc, 4f
+ sethi %hi(0xc0000000), %g3
+ sll %o0, 4, %o0
+ add %o1, -4, %o1
+ sra %o0, 0, %o0
+ mov %o0, %g2
+4:
+ andcc %g2, %g3, %g0
+ be,a,pt %icc, 7f
+ sll %o0, 2, %o0
+5:
+ xnor %g0, %o0, %o0
+ srl %o0, 31, %o0
+ sub %o1, %o0, %o1
+6:
+ jmp %o7 + 8
+ sra %o1, 0, %o0
+7:
+ add %o1, -2, %o1
+ ba,pt %xcc, 5b
+ sra %o0, 0, %o0
+8:
+ sll %o0, 16, %o0
+ sethi %hi(0xff000000), %g3
+ sra %o0, 0, %o0
+ mov %o0, %g2
+ andcc %g2, %g3, %g0
+ bne,pt %icc, 2b
+ mov 16, %o1
+ ba,pt %xcc, 1b
+ sll %o0, 8, %o0
+ENDPROC(fls)
+EXPORT_SYMBOL(fls)
diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S
new file mode 100644
index 000000000000..c83e22ae9586
--- /dev/null
+++ b/arch/sparc/lib/fls64.S
@@ -0,0 +1,61 @@
+/* fls64.S: SPARC default __fls definition.
+ *
+ * SPARC default __fls definition, which follows the same algorithm as
+ * in generic __fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+ .text
+ .register %g2, #scratch
+ .register %g3, #scratch
+ENTRY(__fls)
+ mov -1, %g2
+ sllx %g2, 32, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 1f
+ mov 63, %g1
+ sllx %o0, 32, %o0
+ mov 31, %g1
+1:
+ mov -1, %g2
+ sllx %g2, 48, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 2f
+ mov -1, %g2
+ sllx %o0, 16, %o0
+ add %g1, -16, %g1
+2:
+ mov -1, %g2
+ sllx %g2, 56, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 3f
+ mov -1, %g2
+ sllx %o0, 8, %o0
+ add %g1, -8, %g1
+3:
+ sllx %g2, 60, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 4f
+ mov -1, %g2
+ sllx %o0, 4, %o0
+ add %g1, -4, %g1
+4:
+ sllx %g2, 62, %g2
+ and %o0, %g2, %g2
+ brnz,pt %g2, 5f
+ mov -1, %g3
+ sllx %o0, 2, %o0
+ add %g1, -2, %g1
+5:
+ mov 0, %g2
+ sllx %g3, 63, %g3
+ and %o0, %g3, %o0
+ movre %o0, 1, %g2
+ sub %g1, %g2, %g1
+ jmp %o7+8
+ sra %g1, 0, %o0
+ENDPROC(__fls)
+EXPORT_SYMBOL(__fls)
diff --git a/arch/sparc/vdso/.gitignore b/arch/sparc/vdso/.gitignore
new file mode 100644
index 000000000000..ef925b998222
--- /dev/null
+++ b/arch/sparc/vdso/.gitignore
@@ -0,0 +1,3 @@
+vdso.lds
+vdso-image-*.c
+vdso2c
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
new file mode 100644
index 000000000000..a6615d8864f7
--- /dev/null
+++ b/arch/sparc/vdso/Makefile
@@ -0,0 +1,149 @@
+#
+# Building vDSO images for sparc.
+#
+
+KBUILD_CFLAGS += $(DISABLE_LTO)
+
+VDSO64-$(CONFIG_SPARC64) := y
+VDSOCOMPAT-$(CONFIG_COMPAT) := y
+
+# files to link into the vdso
+vobjs-y := vdso-note.o vclock_gettime.o
+
+# files to link into kernel
+obj-y += vma.o
+
+# vDSO images to build
+vdso_img-$(VDSO64-y) += 64
+vdso_img-$(VDSOCOMPAT-y) += 32
+
+vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+
+$(obj)/vdso.o: $(obj)/vdso.so
+
+targets += vdso.lds $(vobjs-y)
+
+# Build the vDSO image C files and link them in.
+vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
+vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
+vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
+obj-y += $(vdso_img_objs)
+targets += $(vdso_img_cfiles)
+targets += $(vdso_img_sodbg)
+.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) \
+ $(vdso_img-y:%=$(obj)/vdso%.so)
+
+export CPPFLAGS_vdso.lds += -P -C
+
+VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
+ -Wl,--no-undefined \
+ -Wl,-z,max-page-size=8192 -Wl,-z,common-page-size=8192 \
+ $(DISABLE_LTO)
+
+$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
+ $(call if_changed,vdso)
+
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+hostprogs-y += vdso2c
+
+quiet_cmd_vdso2c = VDSO2C $@
+define cmd_vdso2c
+ $(obj)/vdso2c $< $(<:%.dbg=%) $@
+endef
+
+$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
+ $(call if_changed,vdso2c)
+
+#
+# Don't omit frame pointers for ease of userspace debugging, but do
+# optimize sibling calls.
+#
+CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables \
+ -m64 -ffixed-g2 -ffixed-g3 -fcall-used-g4 -fcall-used-g5 -ffixed-g6 \
+ -ffixed-g7 $(filter -g%,$(KBUILD_CFLAGS)) \
+ $(call cc-option, -fno-stack-protector) -fno-omit-frame-pointer \
+ -foptimize-sibling-calls -DBUILD_VDSO
+
+$(vobjs): KBUILD_CFLAGS += $(CFL)
+
+#
+# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
+#
+CFLAGS_REMOVE_vdso-note.o = -pg
+CFLAGS_REMOVE_vclock_gettime.o = -pg
+
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg
+ $(call if_changed,objcopy)
+
+CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
+VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf32_sparc,-soname=linux-gate.so.1
+
+#This makes sure the $(obj) subdirectory exists even though vdso32/
+#is not a kbuild sub-make subdirectory
+override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
+
+targets += vdso32/vdso32.lds
+targets += vdso32/vdso-note.o
+targets += vdso32/vclock_gettime.o
+
+KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
+$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+$(obj)/vdso32.so.dbg: asflags-$(CONFIG_SPARC64) += -m32
+
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mcmodel=medlow,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m32 -msoft-float -fpic -mno-app-regs -ffixed-g7
+KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
+KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
+KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
+KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS_32 += -mv8plus
+$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+
+$(obj)/vdso32.so.dbg: FORCE \
+ $(obj)/vdso32/vdso32.lds \
+ $(obj)/vdso32/vclock_gettime.o \
+ $(obj)/vdso32/vdso-note.o
+ $(call if_changed,vdso)
+
+#
+# The DSO images are built using a special linker script.
+#
+quiet_cmd_vdso = VDSO $@
+ cmd_vdso = $(CC) -nostdlib -o $@ \
+ $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
+ -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
+
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
+ $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic
+GCOV_PROFILE := n
+
+#
+# Install the unstripped copies of vdso*.so. If our toolchain supports
+# build-id, install .build-id links as well.
+#
+quiet_cmd_vdso_install = INSTALL $(@:install_%=%)
+define cmd_vdso_install
+ cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \
+ if readelf -n $< |grep -q 'Build ID'; then \
+ buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \
+ first=`echo $$buildid | cut -b-2`; \
+ last=`echo $$buildid | cut -b3-`; \
+ mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \
+ ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \
+ fi
+endef
+
+vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%)
+
+$(MODLIB)/vdso: FORCE
+ @mkdir -p $(MODLIB)/vdso
+
+$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
+ $(call cmd,vdso_install)
+
+PHONY += vdso_install $(vdso_img_insttargets)
+vdso_install: $(vdso_img_insttargets) FORCE
diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c
new file mode 100644
index 000000000000..3feb3d960ca5
--- /dev/null
+++ b/arch/sparc/vdso/vclock_gettime.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * Subject to the GNU Public License, v.2
+ *
+ * Fast user context implementation of clock_gettime, gettimeofday, and time.
+ *
+ * The code should have no internal unresolved relocations.
+ * Check with readelf after changing.
+ * Also alternative() doesn't work.
+ */
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+/* Disable profiling for userspace code: */
+#ifndef DISABLE_BRANCH_PROFILING
+#define DISABLE_BRANCH_PROFILING
+#endif
+
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/string.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+#include <asm/timex.h>
+#include <asm/clocksource.h>
+#include <asm/vvar.h>
+
+#undef TICK_PRIV_BIT
+#ifdef CONFIG_SPARC64
+#define TICK_PRIV_BIT (1UL << 63)
+#else
+#define TICK_PRIV_BIT (1ULL << 63)
+#endif
+
+#define SYSCALL_STRING \
+ "ta 0x6d;" \
+ "sub %%g0, %%o0, %%o0;" \
+
+#define SYSCALL_CLOBBERS \
+ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \
+ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \
+ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \
+ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \
+ "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46", \
+ "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", \
+ "cc", "memory"
+
+/*
+ * Compute the vvar page's address in the process address space, and return it
+ * as a pointer to the vvar_data.
+ */
+static notrace noinline struct vvar_data *
+get_vvar_data(void)
+{
+ unsigned long ret;
+
+ /*
+ * vdso data page is the first vDSO page so grab the return address
+ * and move up a page to get to the data page.
+ */
+ ret = (unsigned long)__builtin_return_address(0);
+ ret &= ~(8192 - 1);
+ ret -= 8192;
+
+ return (struct vvar_data *) ret;
+}
+
+static notrace long
+vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+ register long num __asm__("g1") = __NR_clock_gettime;
+ register long o0 __asm__("o0") = clock;
+ register long o1 __asm__("o1") = (long) ts;
+
+ __asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
+ "0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
+ return o0;
+}
+
+static notrace __always_inline long
+vdso_fallback_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ register long num __asm__("g1") = __NR_gettimeofday;
+ register long o0 __asm__("o0") = (long) tv;
+ register long o1 __asm__("o1") = (long) tz;
+
+ __asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
+ "0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
+ return o0;
+}
+
+#ifdef CONFIG_SPARC64
+static notrace noinline u64
+vread_tick(void) {
+ u64 ret;
+
+ __asm__ __volatile__("rd %%asr24, %0 \n"
+ ".section .vread_tick_patch, \"ax\" \n"
+ "rd %%tick, %0 \n"
+ ".previous \n"
+ : "=&r" (ret));
+ return ret & ~TICK_PRIV_BIT;
+}
+#else
+static notrace noinline u64
+vread_tick(void)
+{
+ unsigned int lo, hi;
+
+ __asm__ __volatile__("rd %%asr24, %%g1\n\t"
+ "srlx %%g1, 32, %1\n\t"
+ "srl %%g1, 0, %0\n"
+ ".section .vread_tick_patch, \"ax\" \n"
+ "rd %%tick, %%g1\n"
+ ".previous \n"
+ : "=&r" (lo), "=&r" (hi)
+ :
+ : "g1");
+ return lo | ((u64)hi << 32);
+}
+#endif
+
+static notrace inline u64
+vgetsns(struct vvar_data *vvar)
+{
+ u64 v;
+ u64 cycles;
+
+ cycles = vread_tick();
+ v = (cycles - vvar->clock.cycle_last) & vvar->clock.mask;
+ return v * vvar->clock.mult;
+}
+
+static notrace noinline int
+do_realtime(struct vvar_data *vvar, struct timespec *ts)
+{
+ unsigned long seq;
+ u64 ns;
+
+ ts->tv_nsec = 0;
+ do {
+ seq = vvar_read_begin(vvar);
+ ts->tv_sec = vvar->wall_time_sec;
+ ns = vvar->wall_time_snsec;
+ ns += vgetsns(vvar);
+ ns >>= vvar->clock.shift;
+ } while (unlikely(vvar_read_retry(vvar, seq)));
+
+ timespec_add_ns(ts, ns);
+
+ return 0;
+}
+
+static notrace noinline int
+do_monotonic(struct vvar_data *vvar, struct timespec *ts)
+{
+ unsigned long seq;
+ u64 ns;
+
+ ts->tv_nsec = 0;
+ do {
+ seq = vvar_read_begin(vvar);
+ ts->tv_sec = vvar->monotonic_time_sec;
+ ns = vvar->monotonic_time_snsec;
+ ns += vgetsns(vvar);
+ ns >>= vvar->clock.shift;
+ } while (unlikely(vvar_read_retry(vvar, seq)));
+
+ timespec_add_ns(ts, ns);
+
+ return 0;
+}
+
+static notrace noinline int
+do_realtime_coarse(struct vvar_data *vvar, struct timespec *ts)
+{
+ unsigned long seq;
+
+ do {
+ seq = vvar_read_begin(vvar);
+ ts->tv_sec = vvar->wall_time_coarse_sec;
+ ts->tv_nsec = vvar->wall_time_coarse_nsec;
+ } while (unlikely(vvar_read_retry(vvar, seq)));
+ return 0;
+}
+
+static notrace noinline int
+do_monotonic_coarse(struct vvar_data *vvar, struct timespec *ts)
+{
+ unsigned long seq;
+
+ do {
+ seq = vvar_read_begin(vvar);
+ ts->tv_sec = vvar->monotonic_time_coarse_sec;
+ ts->tv_nsec = vvar->monotonic_time_coarse_nsec;
+ } while (unlikely(vvar_read_retry(vvar, seq)));
+
+ return 0;
+}
+
+notrace int
+__vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+{
+ struct vvar_data *vvd = get_vvar_data();
+
+ switch (clock) {
+ case CLOCK_REALTIME:
+ if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
+ break;
+ return do_realtime(vvd, ts);
+ case CLOCK_MONOTONIC:
+ if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
+ break;
+ return do_monotonic(vvd, ts);
+ case CLOCK_REALTIME_COARSE:
+ return do_realtime_coarse(vvd, ts);
+ case CLOCK_MONOTONIC_COARSE:
+ return do_monotonic_coarse(vvd, ts);
+ }
+ /*
+ * Unknown clock ID ? Fall back to the syscall.
+ */
+ return vdso_fallback_gettime(clock, ts);
+}
+int
+clock_gettime(clockid_t, struct timespec *)
+ __attribute__((weak, alias("__vdso_clock_gettime")));
+
+notrace int
+__vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ struct vvar_data *vvd = get_vvar_data();
+
+ if (likely(vvd->vclock_mode != VCLOCK_NONE)) {
+ if (likely(tv != NULL)) {
+ union tstv_t {
+ struct timespec ts;
+ struct timeval tv;
+ } *tstv = (union tstv_t *) tv;
+ do_realtime(vvd, &tstv->ts);
+ /*
+ * Assign before dividing to ensure that the division is
+ * done in the type of tv_usec, not tv_nsec.
+ *
+ * There cannot be > 1 billion usec in a second:
+ * do_realtime() has already distributed such overflow
+ * into tv_sec. So we can assign it to an int safely.
+ */
+ tstv->tv.tv_usec = tstv->ts.tv_nsec;
+ tstv->tv.tv_usec /= 1000;
+ }
+ if (unlikely(tz != NULL)) {
+ /* Avoid memcpy. Some old compilers fail to inline it */
+ tz->tz_minuteswest = vvd->tz_minuteswest;
+ tz->tz_dsttime = vvd->tz_dsttime;
+ }
+ return 0;
+ }
+ return vdso_fallback_gettimeofday(tv, tz);
+}
+int
+gettimeofday(struct timeval *, struct timezone *)
+ __attribute__((weak, alias("__vdso_gettimeofday")));
diff --git a/arch/sparc/vdso/vdso-layout.lds.S b/arch/sparc/vdso/vdso-layout.lds.S
new file mode 100644
index 000000000000..f2c83abaca12
--- /dev/null
+++ b/arch/sparc/vdso/vdso-layout.lds.S
@@ -0,0 +1,104 @@
+/*
+ * Linker script for vDSO. This is an ELF shared object prelinked to
+ * its virtual address, and with only one read-only segment.
+ * This script controls its layout.
+ */
+
+#if defined(BUILD_VDSO64)
+# define SHDR_SIZE 64
+#elif defined(BUILD_VDSO32)
+# define SHDR_SIZE 40
+#else
+# error unknown VDSO target
+#endif
+
+#define NUM_FAKE_SHDRS 7
+
+SECTIONS
+{
+ /*
+ * User/kernel shared data is before the vDSO. This may be a little
+ * uglier than putting it after the vDSO, but it avoids issues with
+ * non-allocatable things that dangle past the end of the PT_LOAD
+ * segment. Page size is 8192 for both 64-bit and 32-bit vdso binaries
+ */
+
+ vvar_start = . -8192;
+ vvar_data = vvar_start;
+
+ . = SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .rodata : {
+ *(.rodata*)
+ *(.data*)
+ *(.sdata*)
+ *(.got.plt) *(.got)
+ *(.gnu.linkonce.d.*)
+ *(.bss*)
+ *(.dynbss*)
+ *(.gnu.linkonce.b.*)
+
+ /*
+ * Ideally this would live in a C file: kept in here for
+ * compatibility with x86-64.
+ */
+ VDSO_FAKE_SECTION_TABLE_START = .;
+ . = . + NUM_FAKE_SHDRS * SHDR_SIZE;
+ VDSO_FAKE_SECTION_TABLE_END = .;
+ } :text
+
+ .fake_shstrtab : { *(.fake_shstrtab) } :text
+
+
+ .note : { *(.note.*) } :text :note
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+
+
+ /*
+ * Text is well-separated from actual data: there's plenty of
+ * stuff that isn't used at runtime in between.
+ */
+
+ .text : { *(.text*) } :text =0x90909090,
+
+ .vread_tick_patch : {
+ vread_tick_patch_start = .;
+ *(.vread_tick_patch)
+ vread_tick_patch_end = .;
+ }
+
+ /DISCARD/ : {
+ *(.discard)
+ *(.discard.*)
+ *(__bug_table)
+ }
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME 0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
diff --git a/arch/sparc/vdso/vdso-note.S b/arch/sparc/vdso/vdso-note.S
new file mode 100644
index 000000000000..79a071e4357e
--- /dev/null
+++ b/arch/sparc/vdso/vdso-note.S
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/sparc/vdso/vdso.lds.S b/arch/sparc/vdso/vdso.lds.S
new file mode 100644
index 000000000000..f3caa29a331c
--- /dev/null
+++ b/arch/sparc/vdso/vdso.lds.S
@@ -0,0 +1,25 @@
+/*
+ * Linker script for 64-bit vDSO.
+ * We #include the file to define the layout details.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.
+ */
+
+#define BUILD_VDSO64
+
+#include "vdso-layout.lds.S"
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION {
+ LINUX_2.6 {
+ global:
+ clock_gettime;
+ __vdso_clock_gettime;
+ gettimeofday;
+ __vdso_gettimeofday;
+ local: *;
+ };
+}
diff --git a/arch/sparc/vdso/vdso2c.c b/arch/sparc/vdso/vdso2c.c
new file mode 100644
index 000000000000..9f5b1cd6d51d
--- /dev/null
+++ b/arch/sparc/vdso/vdso2c.c
@@ -0,0 +1,234 @@
+/*
+ * vdso2c - A vdso image preparation tool
+ * Copyright (c) 2014 Andy Lutomirski and others
+ * Licensed under the GPL v2
+ *
+ * vdso2c requires stripped and unstripped input. It would be trivial
+ * to fully strip the input in here, but, for reasons described below,
+ * we need to write a section table. Doing this is more or less
+ * equivalent to dropping all non-allocatable sections, but it's
+ * easier to let objcopy handle that instead of doing it ourselves.
+ * If we ever need to do something fancier than what objcopy provides,
+ * it would be straightforward to add here.
+ *
+ * We keep a section table for a few reasons:
+ *
+ * Binutils has issues debugging the vDSO: it reads the section table to
+ * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
+ * would break build-id if we removed the section table. Binutils
+ * also requires that shstrndx != 0. See:
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=17064
+ *
+ * elfutils might not look for PT_NOTE if there is a section table at
+ * all. I don't know whether this matters for any practical purpose.
+ *
+ * For simplicity, rather than hacking up a partial section table, we
+ * just write a mostly complete one. We omit non-dynamic symbols,
+ * though, since they're rather large.
+ *
+ * Once binutils gets fixed, we might be able to drop this for all but
+ * the 64-bit vdso, since build-id only works in kernel RPMs, and
+ * systems that update to new enough kernel RPMs will likely update
+ * binutils in sync. build-id has never worked for home-built kernel
+ * RPMs without manual symlinking, and I suspect that no one ever does
+ * that.
+ */
+
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <err.h>
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <tools/be_byteshift.h>
+
+#include <linux/elf.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+const char *outfilename;
+
+/* Symbols that we need in vdso2c. */
+enum {
+ sym_vvar_start,
+ sym_VDSO_FAKE_SECTION_TABLE_START,
+ sym_VDSO_FAKE_SECTION_TABLE_END,
+ sym_vread_tick,
+ sym_vread_tick_patch_start,
+ sym_vread_tick_patch_end
+};
+
+struct vdso_sym {
+ const char *name;
+ int export;
+};
+
+struct vdso_sym required_syms[] = {
+ [sym_vvar_start] = {"vvar_start", 1},
+ [sym_VDSO_FAKE_SECTION_TABLE_START] = {
+ "VDSO_FAKE_SECTION_TABLE_START", 0
+ },
+ [sym_VDSO_FAKE_SECTION_TABLE_END] = {
+ "VDSO_FAKE_SECTION_TABLE_END", 0
+ },
+ [sym_vread_tick] = {"vread_tick", 1},
+ [sym_vread_tick_patch_start] = {"vread_tick_patch_start", 1},
+ [sym_vread_tick_patch_end] = {"vread_tick_patch_end", 1}
+};
+
+__attribute__((format(printf, 1, 2))) __attribute__((noreturn))
+static void fail(const char *format, ...)
+{
+ va_list ap;
+
+ va_start(ap, format);
+ fprintf(stderr, "Error: ");
+ vfprintf(stderr, format, ap);
+ if (outfilename)
+ unlink(outfilename);
+ exit(1);
+ va_end(ap);
+}
+
+/*
+ * Evil macros for big-endian reads and writes
+ */
+#define GBE(x, bits, ifnot) \
+ __builtin_choose_expr( \
+ (sizeof(*(x)) == bits/8), \
+ (__typeof__(*(x)))get_unaligned_be##bits(x), ifnot)
+
+#define LAST_GBE(x) \
+ __builtin_choose_expr(sizeof(*(x)) == 1, *(x), (void)(0))
+
+#define GET_BE(x) \
+ GBE(x, 64, GBE(x, 32, GBE(x, 16, LAST_GBE(x))))
+
+#define PBE(x, val, bits, ifnot) \
+ __builtin_choose_expr( \
+ (sizeof(*(x)) == bits/8), \
+ put_unaligned_be##bits((val), (x)), ifnot)
+
+#define LAST_PBE(x, val) \
+ __builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), (void)(0))
+
+#define PUT_BE(x, val) \
+ PBE(x, val, 64, PBE(x, val, 32, PBE(x, val, 16, LAST_PBE(x, val))))
+
+#define NSYMS ARRAY_SIZE(required_syms)
+
+#define BITSFUNC3(name, bits, suffix) name##bits##suffix
+#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix)
+#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, )
+
+#define INT_BITS BITSFUNC2(int, ELF_BITS, _t)
+
+#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
+#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
+#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
+
+#define ELF_BITS 64
+#include "vdso2c.h"
+#undef ELF_BITS
+
+#define ELF_BITS 32
+#include "vdso2c.h"
+#undef ELF_BITS
+
+static void go(void *raw_addr, size_t raw_len,
+ void *stripped_addr, size_t stripped_len,
+ FILE *outfile, const char *name)
+{
+ Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr;
+
+ if (hdr->e_ident[EI_CLASS] == ELFCLASS64) {
+ go64(raw_addr, raw_len, stripped_addr, stripped_len,
+ outfile, name);
+ } else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) {
+ go32(raw_addr, raw_len, stripped_addr, stripped_len,
+ outfile, name);
+ } else {
+ fail("unknown ELF class\n");
+ }
+}
+
+static void map_input(const char *name, void **addr, size_t *len, int prot)
+{
+ off_t tmp_len;
+
+ int fd = open(name, O_RDONLY);
+
+ if (fd == -1)
+ err(1, "%s", name);
+
+ tmp_len = lseek(fd, 0, SEEK_END);
+ if (tmp_len == (off_t)-1)
+ err(1, "lseek");
+ *len = (size_t)tmp_len;
+
+ *addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0);
+ if (*addr == MAP_FAILED)
+ err(1, "mmap");
+
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ size_t raw_len, stripped_len;
+ void *raw_addr, *stripped_addr;
+ FILE *outfile;
+ char *name, *tmp;
+ int namelen;
+
+ if (argc != 4) {
+ printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n");
+ return 1;
+ }
+
+ /*
+ * Figure out the struct name. If we're writing to a .so file,
+ * generate raw output insted.
+ */
+ name = strdup(argv[3]);
+ namelen = strlen(name);
+ if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) {
+ name = NULL;
+ } else {
+ tmp = strrchr(name, '/');
+ if (tmp)
+ name = tmp + 1;
+ tmp = strchr(name, '.');
+ if (tmp)
+ *tmp = '\0';
+ for (tmp = name; *tmp; tmp++)
+ if (*tmp == '-')
+ *tmp = '_';
+ }
+
+ map_input(argv[1], &raw_addr, &raw_len, PROT_READ);
+ map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ);
+
+ outfilename = argv[3];
+ outfile = fopen(outfilename, "w");
+ if (!outfile)
+ err(1, "%s", argv[2]);
+
+ go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
+
+ munmap(raw_addr, raw_len);
+ munmap(stripped_addr, stripped_len);
+ fclose(outfile);
+
+ return 0;
+}
diff --git a/arch/sparc/vdso/vdso2c.h b/arch/sparc/vdso/vdso2c.h
new file mode 100644
index 000000000000..808decb0f7be
--- /dev/null
+++ b/arch/sparc/vdso/vdso2c.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * This file is included up to twice from vdso2c.c. It generates code for
+ * 32-bit and 64-bit vDSOs. We will eventually need both for 64-bit builds,
+ * since 32-bit vDSOs will then be built for 32-bit userspace.
+ */
+
+static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
+ void *stripped_addr, size_t stripped_len,
+ FILE *outfile, const char *name)
+{
+ int found_load = 0;
+ unsigned long load_size = -1; /* Work around bogus warning */
+ unsigned long mapping_size;
+ int i;
+ unsigned long j;
+
+ ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr;
+ ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
+ ELF(Dyn) *dyn = 0, *dyn_end = 0;
+ INT_BITS syms[NSYMS] = {};
+
+ ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_BE(&hdr->e_phoff));
+
+ /* Walk the segment table. */
+ for (i = 0; i < GET_BE(&hdr->e_phnum); i++) {
+ if (GET_BE(&pt[i].p_type) == PT_LOAD) {
+ if (found_load)
+ fail("multiple PT_LOAD segs\n");
+
+ if (GET_BE(&pt[i].p_offset) != 0 ||
+ GET_BE(&pt[i].p_vaddr) != 0)
+ fail("PT_LOAD in wrong place\n");
+
+ if (GET_BE(&pt[i].p_memsz) != GET_BE(&pt[i].p_filesz))
+ fail("cannot handle memsz != filesz\n");
+
+ load_size = GET_BE(&pt[i].p_memsz);
+ found_load = 1;
+ } else if (GET_BE(&pt[i].p_type) == PT_DYNAMIC) {
+ dyn = raw_addr + GET_BE(&pt[i].p_offset);
+ dyn_end = raw_addr + GET_BE(&pt[i].p_offset) +
+ GET_BE(&pt[i].p_memsz);
+ }
+ }
+ if (!found_load)
+ fail("no PT_LOAD seg\n");
+
+ if (stripped_len < load_size)
+ fail("stripped input is too short\n");
+
+ /* Walk the dynamic table */
+ for (i = 0; dyn + i < dyn_end &&
+ GET_BE(&dyn[i].d_tag) != DT_NULL; i++) {
+ typeof(dyn[i].d_tag) tag = GET_BE(&dyn[i].d_tag);
+ typeof(dyn[i].d_un.d_val) val = GET_BE(&dyn[i].d_un.d_val);
+
+ if ((tag == DT_RELSZ || tag == DT_RELASZ) && (val != 0))
+ fail("vdso image contains dynamic relocations\n");
+ }
+
+ /* Walk the section table */
+ for (i = 0; i < GET_BE(&hdr->e_shnum); i++) {
+ ELF(Shdr) *sh = raw_addr + GET_BE(&hdr->e_shoff) +
+ GET_BE(&hdr->e_shentsize) * i;
+ if (GET_BE(&sh->sh_type) == SHT_SYMTAB)
+ symtab_hdr = sh;
+ }
+
+ if (!symtab_hdr)
+ fail("no symbol table\n");
+
+ strtab_hdr = raw_addr + GET_BE(&hdr->e_shoff) +
+ GET_BE(&hdr->e_shentsize) * GET_BE(&symtab_hdr->sh_link);
+
+ /* Walk the symbol table */
+ for (i = 0;
+ i < GET_BE(&symtab_hdr->sh_size) / GET_BE(&symtab_hdr->sh_entsize);
+ i++) {
+ int k;
+
+ ELF(Sym) *sym = raw_addr + GET_BE(&symtab_hdr->sh_offset) +
+ GET_BE(&symtab_hdr->sh_entsize) * i;
+ const char *name = raw_addr + GET_BE(&strtab_hdr->sh_offset) +
+ GET_BE(&sym->st_name);
+
+ for (k = 0; k < NSYMS; k++) {
+ if (!strcmp(name, required_syms[k].name)) {
+ if (syms[k]) {
+ fail("duplicate symbol %s\n",
+ required_syms[k].name);
+ }
+
+ /*
+ * Careful: we use negative addresses, but
+ * st_value is unsigned, so we rely
+ * on syms[k] being a signed type of the
+ * correct width.
+ */
+ syms[k] = GET_BE(&sym->st_value);
+ }
+ }
+ }
+
+ /* Validate mapping addresses. */
+ if (syms[sym_vvar_start] % 8192)
+ fail("vvar_begin must be a multiple of 8192\n");
+
+ if (!name) {
+ fwrite(stripped_addr, stripped_len, 1, outfile);
+ return;
+ }
+
+ mapping_size = (stripped_len + 8191) / 8192 * 8192;
+
+ fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n");
+ fprintf(outfile, "#include <linux/cache.h>\n");
+ fprintf(outfile, "#include <asm/vdso.h>\n");
+ fprintf(outfile, "\n");
+ fprintf(outfile,
+ "static unsigned char raw_data[%lu] __ro_after_init __aligned(8192)= {",
+ mapping_size);
+ for (j = 0; j < stripped_len; j++) {
+ if (j % 10 == 0)
+ fprintf(outfile, "\n\t");
+ fprintf(outfile, "0x%02X, ",
+ (int)((unsigned char *)stripped_addr)[j]);
+ }
+ fprintf(outfile, "\n};\n\n");
+
+ fprintf(outfile, "const struct vdso_image %s_builtin = {\n", name);
+ fprintf(outfile, "\t.data = raw_data,\n");
+ fprintf(outfile, "\t.size = %lu,\n", mapping_size);
+ for (i = 0; i < NSYMS; i++) {
+ if (required_syms[i].export && syms[i])
+ fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
+ required_syms[i].name, (int64_t)syms[i]);
+ }
+ fprintf(outfile, "};\n");
+}
diff --git a/arch/sparc/vdso/vdso32/.gitignore b/arch/sparc/vdso/vdso32/.gitignore
new file mode 100644
index 000000000000..e45fba9d0ced
--- /dev/null
+++ b/arch/sparc/vdso/vdso32/.gitignore
@@ -0,0 +1 @@
+vdso32.lds
diff --git a/arch/sparc/vdso/vdso32/vclock_gettime.c b/arch/sparc/vdso/vdso32/vclock_gettime.c
new file mode 100644
index 000000000000..026abb3b826c
--- /dev/null
+++ b/arch/sparc/vdso/vdso32/vclock_gettime.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define BUILD_VDSO32
+
+#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
+#undef CONFIG_OPTIMIZE_INLINING
+#endif
+
+#ifdef CONFIG_SPARC64
+
+/*
+ * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
+ * configuration
+ */
+#undef CONFIG_64BIT
+#undef CONFIG_SPARC64
+#define BUILD_VDSO32_64
+#define CONFIG_32BIT
+#undef CONFIG_QUEUED_RWLOCKS
+#undef CONFIG_QUEUED_SPINLOCKS
+
+#endif
+
+#include "../vclock_gettime.c"
diff --git a/arch/sparc/vdso/vdso32/vdso-note.S b/arch/sparc/vdso/vdso32/vdso-note.S
new file mode 100644
index 000000000000..e234983cf0d8
--- /dev/null
+++ b/arch/sparc/vdso/vdso32/vdso-note.S
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO
+ * text. Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/sparc/vdso/vdso32/vdso32.lds.S b/arch/sparc/vdso/vdso32/vdso32.lds.S
new file mode 100644
index 000000000000..53575ee154c4
--- /dev/null
+++ b/arch/sparc/vdso/vdso32/vdso32.lds.S
@@ -0,0 +1,24 @@
+/*
+ * Linker script for sparc32 vDSO
+ * We #include the file to define the layout details.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.
+ */
+
+#define BUILD_VDSO32
+#include "../vdso-layout.lds.S"
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION {
+ LINUX_2.6 {
+ global:
+ clock_gettime;
+ __vdso_clock_gettime;
+ gettimeofday;
+ __vdso_gettimeofday;
+ local: *;
+ };
+}
diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c
new file mode 100644
index 000000000000..0a6f50098e23
--- /dev/null
+++ b/arch/sparc/vdso/vma.c
@@ -0,0 +1,268 @@
+/*
+ * Set up the VMAs to tell the VM about the vDSO.
+ * Copyright 2007 Andi Kleen, SUSE Labs.
+ * Subject to the GPL, v.2
+ */
+
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/random.h>
+#include <linux/elf.h>
+#include <asm/vdso.h>
+#include <asm/vvar.h>
+#include <asm/page.h>
+
+unsigned int __read_mostly vdso_enabled = 1;
+
+static struct vm_special_mapping vvar_mapping = {
+ .name = "[vvar]"
+};
+
+#ifdef CONFIG_SPARC64
+static struct vm_special_mapping vdso_mapping64 = {
+ .name = "[vdso]"
+};
+#endif
+
+#ifdef CONFIG_COMPAT
+static struct vm_special_mapping vdso_mapping32 = {
+ .name = "[vdso]"
+};
+#endif
+
+struct vvar_data *vvar_data;
+
+#define SAVE_INSTR_SIZE 4
+
+/*
+ * Allocate pages for the vdso and vvar, and copy in the vdso text from the
+ * kernel image.
+ */
+int __init init_vdso_image(const struct vdso_image *image,
+ struct vm_special_mapping *vdso_mapping)
+{
+ int i;
+ struct page *dp, **dpp = NULL;
+ int dnpages = 0;
+ struct page *cp, **cpp = NULL;
+ int cnpages = (image->size) / PAGE_SIZE;
+
+ /*
+ * First, the vdso text. This is initialied data, an integral number of
+ * pages long.
+ */
+ if (WARN_ON(image->size % PAGE_SIZE != 0))
+ goto oom;
+
+ cpp = kcalloc(cnpages, sizeof(struct page *), GFP_KERNEL);
+ vdso_mapping->pages = cpp;
+
+ if (!cpp)
+ goto oom;
+
+ if (vdso_fix_stick) {
+ /*
+ * If the system uses %tick instead of %stick, patch the VDSO
+ * with instruction reading %tick instead of %stick.
+ */
+ unsigned int j, k = SAVE_INSTR_SIZE;
+ unsigned char *data = image->data;
+
+ for (j = image->sym_vread_tick_patch_start;
+ j < image->sym_vread_tick_patch_end; j++) {
+
+ data[image->sym_vread_tick + k] = data[j];
+ k++;
+ }
+ }
+
+ for (i = 0; i < cnpages; i++) {
+ cp = alloc_page(GFP_KERNEL);
+ if (!cp)
+ goto oom;
+ cpp[i] = cp;
+ copy_page(page_address(cp), image->data + i * PAGE_SIZE);
+ }
+
+ /*
+ * Now the vvar page. This is uninitialized data.
+ */
+
+ if (vvar_data == NULL) {
+ dnpages = (sizeof(struct vvar_data) / PAGE_SIZE) + 1;
+ if (WARN_ON(dnpages != 1))
+ goto oom;
+ dpp = kcalloc(dnpages, sizeof(struct page *), GFP_KERNEL);
+ vvar_mapping.pages = dpp;
+
+ if (!dpp)
+ goto oom;
+
+ dp = alloc_page(GFP_KERNEL);
+ if (!dp)
+ goto oom;
+
+ dpp[0] = dp;
+ vvar_data = page_address(dp);
+ memset(vvar_data, 0, PAGE_SIZE);
+
+ vvar_data->seq = 0;
+ }
+
+ return 0;
+ oom:
+ if (cpp != NULL) {
+ for (i = 0; i < cnpages; i++) {
+ if (cpp[i] != NULL)
+ __free_page(cpp[i]);
+ }
+ kfree(cpp);
+ vdso_mapping->pages = NULL;
+ }
+
+ if (dpp != NULL) {
+ for (i = 0; i < dnpages; i++) {
+ if (dpp[i] != NULL)
+ __free_page(dpp[i]);
+ }
+ kfree(dpp);
+ vvar_mapping.pages = NULL;
+ }
+
+ pr_warn("Cannot allocate vdso\n");
+ vdso_enabled = 0;
+ return -ENOMEM;
+}
+
+static int __init init_vdso(void)
+{
+ int err = 0;
+#ifdef CONFIG_SPARC64
+ err = init_vdso_image(&vdso_image_64_builtin, &vdso_mapping64);
+ if (err)
+ return err;
+#endif
+
+#ifdef CONFIG_COMPAT
+ err = init_vdso_image(&vdso_image_32_builtin, &vdso_mapping32);
+#endif
+ return err;
+
+}
+subsys_initcall(init_vdso);
+
+struct linux_binprm;
+
+/* Shuffle the vdso up a bit, randomly. */
+static unsigned long vdso_addr(unsigned long start, unsigned int len)
+{
+ unsigned int offset;
+
+ /* This loses some more bits than a modulo, but is cheaper */
+ offset = get_random_int() & (PTRS_PER_PTE - 1);
+ return start + (offset << PAGE_SHIFT);
+}
+
+static int map_vdso(const struct vdso_image *image,
+ struct vm_special_mapping *vdso_mapping)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long text_start, addr = 0;
+ int ret = 0;
+
+ down_write(&mm->mmap_sem);
+
+ /*
+ * First, get an unmapped region: then randomize it, and make sure that
+ * region is free.
+ */
+ if (current->flags & PF_RANDOMIZE) {
+ addr = get_unmapped_area(NULL, 0,
+ image->size - image->sym_vvar_start,
+ 0, 0);
+ if (IS_ERR_VALUE(addr)) {
+ ret = addr;
+ goto up_fail;
+ }
+ addr = vdso_addr(addr, image->size - image->sym_vvar_start);
+ }
+ addr = get_unmapped_area(NULL, addr,
+ image->size - image->sym_vvar_start, 0, 0);
+ if (IS_ERR_VALUE(addr)) {
+ ret = addr;
+ goto up_fail;
+ }
+
+ text_start = addr - image->sym_vvar_start;
+ current->mm->context.vdso = (void __user *)text_start;
+
+ /*
+ * MAYWRITE to allow gdb to COW and set breakpoints
+ */
+ vma = _install_special_mapping(mm,
+ text_start,
+ image->size,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ vdso_mapping);
+
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto up_fail;
+ }
+
+ vma = _install_special_mapping(mm,
+ addr,
+ -image->sym_vvar_start,
+ VM_READ|VM_MAYREAD,
+ &vvar_mapping);
+
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ do_munmap(mm, text_start, image->size, NULL);
+ }
+
+up_fail:
+ if (ret)
+ current->mm->context.vdso = NULL;
+
+ up_write(&mm->mmap_sem);
+ return ret;
+}
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+
+ if (!vdso_enabled)
+ return 0;
+
+#if defined CONFIG_COMPAT
+ if (!(is_32bit_task()))
+ return map_vdso(&vdso_image_64_builtin, &vdso_mapping64);
+ else
+ return map_vdso(&vdso_image_32_builtin, &vdso_mapping32);
+#else
+ return map_vdso(&vdso_image_64_builtin, &vdso_mapping64);
+#endif
+
+}
+
+static __init int vdso_setup(char *s)
+{
+ int err;
+ unsigned long val;
+
+ err = kstrtoul(s, 10, &val);
+ vdso_enabled = val;
+ return err;
+}
+__setup("vdso=", vdso_setup);
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index d9280482a2f8..c68add8df3ae 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -10,7 +10,6 @@ config UML
select HAVE_DEBUG_KMEMLEAK
select GENERIC_IRQ_SHOW
select GENERIC_CPU_DEVICES
- select GENERIC_IO
select GENERIC_CLOCKEVENTS
select HAVE_GCC_PLUGINS
select TTY # Needed for line.c
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b71daed3cca2..59e13a79c2e3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3671,6 +3671,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u32 ecx = msr->index;
u64 data = msr->data;
switch (ecx) {
+ case MSR_IA32_CR_PAT:
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ return 1;
+ vcpu->arch.pat = data;
+ svm->vmcb->save.g_pat = data;
+ mark_dirty(svm->vmcb, VMCB_NPT);
+ break;
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr);
break;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7c3522a989d0..714a0673ec3c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -70,6 +70,9 @@ MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
static bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
+static bool __read_mostly enable_vnmi = 1;
+module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
+
static bool __read_mostly flexpriority_enabled = 1;
module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
@@ -202,6 +205,10 @@ struct loaded_vmcs {
bool nmi_known_unmasked;
unsigned long vmcs_host_cr3; /* May not match real cr3 */
unsigned long vmcs_host_cr4; /* May not match real cr4 */
+ /* Support for vnmi-less CPUs */
+ int soft_vnmi_blocked;
+ ktime_t entry_time;
+ s64 vnmi_blocked_time;
struct list_head loaded_vmcss_on_cpu_link;
};
@@ -1291,6 +1298,11 @@ static inline bool cpu_has_vmx_invpcid(void)
SECONDARY_EXEC_ENABLE_INVPCID;
}
+static inline bool cpu_has_virtual_nmis(void)
+{
+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
+}
+
static inline bool cpu_has_vmx_wbinvd_exit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -1348,11 +1360,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
(vmcs12->secondary_vm_exec_control & bit);
}
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
-{
- return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
-}
-
static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
{
return vmcs12->pin_based_vm_exec_control &
@@ -3712,9 +3719,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_vmexit_control) < 0)
return -EIO;
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
- PIN_BASED_VIRTUAL_NMIS;
- opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
+ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+ opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
+ PIN_BASED_VMX_PREEMPTION_TIMER;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
@@ -5232,6 +5239,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
if (!kvm_vcpu_apicv_active(&vmx->vcpu))
pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+
+ if (!enable_vnmi)
+ pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
+
/* Enable the preemption timer dynamically */
pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
return pin_based_exec_ctrl;
@@ -5666,7 +5677,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
- if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+ if (!enable_vnmi ||
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
enable_irq_window(vcpu);
return;
}
@@ -5706,6 +5718,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ if (!enable_vnmi) {
+ /*
+ * Tracking the NMI-blocked state in software is built upon
+ * finding the next open IRQ window. This, in turn, depends on
+ * well-behaving guests: They have to keep IRQs disabled at
+ * least as long as the NMI handler runs. Otherwise we may
+ * cause NMI nesting, maybe breaking the guest. But as this is
+ * highly unlikely, we can live with the residual risk.
+ */
+ vmx->loaded_vmcs->soft_vnmi_blocked = 1;
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
+ }
+
++vcpu->stat.nmi_injections;
vmx->loaded_vmcs->nmi_known_unmasked = false;
@@ -5724,6 +5749,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool masked;
+ if (!enable_vnmi)
+ return vmx->loaded_vmcs->soft_vnmi_blocked;
if (vmx->loaded_vmcs->nmi_known_unmasked)
return false;
masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@@ -5735,13 +5762,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- vmx->loaded_vmcs->nmi_known_unmasked = !masked;
- if (masked)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
+ if (!enable_vnmi) {
+ if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
+ vmx->loaded_vmcs->soft_vnmi_blocked = masked;
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
+ }
+ } else {
+ vmx->loaded_vmcs->nmi_known_unmasked = !masked;
+ if (masked)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ }
}
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -5749,6 +5783,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
if (to_vmx(vcpu)->nested.nested_run_pending)
return 0;
+ if (!enable_vnmi &&
+ to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
+ return 0;
+
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
| GUEST_INTR_STATE_NMI));
@@ -6476,6 +6514,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* AAK134, BY25.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
@@ -6535,6 +6574,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
static int handle_nmi_window(struct kvm_vcpu *vcpu)
{
+ WARN_ON_ONCE(!enable_vnmi);
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_VIRTUAL_NMI_PENDING);
++vcpu->stat.nmi_window_exits;
@@ -6758,6 +6798,9 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_flexpriority())
flexpriority_enabled = 0;
+ if (!cpu_has_virtual_nmis())
+ enable_vnmi = 0;
+
/*
* set_apic_access_page_addr() is used to reload apic access
* page upon invalidation. No need to do anything if not
@@ -6962,7 +7005,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
}
/* Create a new VMCS */
- item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+ item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
if (!item)
return NULL;
item->vmcs02.vmcs = alloc_vmcs();
@@ -7979,6 +8022,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
* "blocked by NMI" bit has to be set before next VM entry.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
@@ -8823,6 +8867,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
return 0;
}
+ if (unlikely(!enable_vnmi &&
+ vmx->loaded_vmcs->soft_vnmi_blocked)) {
+ if (vmx_interrupt_allowed(vcpu)) {
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+ } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
+ vcpu->arch.nmi_pending) {
+ /*
+ * This CPU don't support us in finding the end of an
+ * NMI-blocked window if the guest runs with IRQs
+ * disabled. So we pull the trigger after 1 s of
+ * futile waiting, but inform the user about this.
+ */
+ printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
+ "state on VCPU %d after 1 s timeout\n",
+ __func__, vcpu->vcpu_id);
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+ }
+ }
+
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
@@ -9105,33 +9168,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
- if (vmx->loaded_vmcs->nmi_known_unmasked)
- return;
- /*
- * Can't use vmx->exit_intr_info since we're not sure what
- * the exit reason is.
- */
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
- vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
- /*
- * SDM 3: 27.7.1.2 (September 2008)
- * Re-set bit "block by NMI" before VM entry if vmexit caused by
- * a guest IRET fault.
- * SDM 3: 23.2.2 (September 2008)
- * Bit 12 is undefined in any of the following cases:
- * If the VM exit sets the valid bit in the IDT-vectoring
- * information field.
- * If the VM exit is due to a double fault.
- */
- if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
- vector != DF_VECTOR && !idtv_info_valid)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmx->loaded_vmcs->nmi_known_unmasked =
- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
- & GUEST_INTR_STATE_NMI);
+ if (enable_vnmi) {
+ if (vmx->loaded_vmcs->nmi_known_unmasked)
+ return;
+ /*
+ * Can't use vmx->exit_intr_info since we're not sure what
+ * the exit reason is.
+ */
+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
+ vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+ /*
+ * SDM 3: 27.7.1.2 (September 2008)
+ * Re-set bit "block by NMI" before VM entry if vmexit caused by
+ * a guest IRET fault.
+ * SDM 3: 23.2.2 (September 2008)
+ * Bit 12 is undefined in any of the following cases:
+ * If the VM exit sets the valid bit in the IDT-vectoring
+ * information field.
+ * If the VM exit is due to a double fault.
+ */
+ if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+ vector != DF_VECTOR && !idtv_info_valid)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmx->loaded_vmcs->nmi_known_unmasked =
+ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+ & GUEST_INTR_STATE_NMI);
+ } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
+ vmx->loaded_vmcs->vnmi_blocked_time +=
+ ktime_to_ns(ktime_sub(ktime_get(),
+ vmx->loaded_vmcs->entry_time));
}
static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@@ -9248,6 +9316,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr, cr3, cr4;
+ /* Record the guest's net vcpu time for enforced NMI injections. */
+ if (unlikely(!enable_vnmi &&
+ vmx->loaded_vmcs->soft_vnmi_blocked))
+ vmx->loaded_vmcs->entry_time = ktime_get();
+
/* Don't enter VMX if guest state is invalid, let the exit handler
start emulation until we arrive back to a valid state */
if (vmx->emulation_required)