From 4fbaf649ebbc625189e349fd6d07a4fe22a863f8 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 27 Oct 2017 15:23:44 -0700
Subject: ARC: [plat-axs10x] auto-select AXS101 or AXS103 given the  ISA config

AXS10x platform has two flavours:
 - AXS101 (ARC700 cpu, ARCompact ISA)
 - AXS103 (ARC HS38x cpu, ARCv2 ISA)

So depending on the ISA support configured, pre-select the AXS10x
variant.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/plat-axs10x/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arc/plat-axs10x/Kconfig b/arch/arc/plat-axs10x/Kconfig
index c54d1ae57fe0..4e0df7b7a248 100644
--- a/arch/arc/plat-axs10x/Kconfig
+++ b/arch/arc/plat-axs10x/Kconfig
@@ -14,6 +14,8 @@ menuconfig ARC_PLAT_AXS10X
 	select MIGHT_HAVE_PCI
 	select GENERIC_IRQ_CHIP
 	select GPIOLIB
+	select AXS101 if ISA_ARCOMPACT
+	select AXS103 if ISA_ARCV2
 	help
 	  Support for the ARC AXS10x Software Development Platforms.
 
-- 
cgit 


From 92d44128241f8aa129cf2f3672786a1b52053510 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 6 Nov 2017 10:55:51 -0800
Subject: ARCv2: Accomodate HS48 MMUv5 by relaxing MMU ver checking

HS48 cpus will have a new MMUv5, although Linux is currently not
explicitly supporting the newer features (so remains at V4).
The existing software/hardware version check is very tight and causes
boot abort. Given that the MMUv5 hardware is backwards compatible,
relax the boot check to allow current kernel support level to work
with new hardware.

Also while at it, move the ancient MMU related code to under ARCompact
builds as baseline MMU for HS cpus is v4.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/tlb.c | 57 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 8ceefbf72fb0..4097764fea23 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -762,21 +762,23 @@ void read_decode_mmu_bcr(void)
 	tmp = read_aux_reg(ARC_REG_MMU_BCR);
 	mmu->ver = (tmp >> 24);
 
-	if (mmu->ver <= 2) {
-		mmu2 = (struct bcr_mmu_1_2 *)&tmp;
-		mmu->pg_sz_k = TO_KB(0x2000);
-		mmu->sets = 1 << mmu2->sets;
-		mmu->ways = 1 << mmu2->ways;
-		mmu->u_dtlb = mmu2->u_dtlb;
-		mmu->u_itlb = mmu2->u_itlb;
-	} else if (mmu->ver == 3) {
-		mmu3 = (struct bcr_mmu_3 *)&tmp;
-		mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
-		mmu->sets = 1 << mmu3->sets;
-		mmu->ways = 1 << mmu3->ways;
-		mmu->u_dtlb = mmu3->u_dtlb;
-		mmu->u_itlb = mmu3->u_itlb;
-		mmu->sasid = mmu3->sasid;
+	if (is_isa_arcompact()) {
+		if (mmu->ver <= 2) {
+			mmu2 = (struct bcr_mmu_1_2 *)&tmp;
+			mmu->pg_sz_k = TO_KB(0x2000);
+			mmu->sets = 1 << mmu2->sets;
+			mmu->ways = 1 << mmu2->ways;
+			mmu->u_dtlb = mmu2->u_dtlb;
+			mmu->u_itlb = mmu2->u_itlb;
+		} else {
+			mmu3 = (struct bcr_mmu_3 *)&tmp;
+			mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
+			mmu->sets = 1 << mmu3->sets;
+			mmu->ways = 1 << mmu3->ways;
+			mmu->u_dtlb = mmu3->u_dtlb;
+			mmu->u_itlb = mmu3->u_itlb;
+			mmu->sasid = mmu3->sasid;
+		}
 	} else {
 		mmu4 = (struct bcr_mmu_4 *)&tmp;
 		mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
@@ -818,8 +820,9 @@ int pae40_exist_but_not_enab(void)
 
 void arc_mmu_init(void)
 {
-	char str[256];
 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	char str[256];
+	int compat = 0;
 
 	pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str)));
 
@@ -834,15 +837,21 @@ void arc_mmu_init(void)
 	 */
 	BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE));
 
-	/* For efficiency sake, kernel is compile time built for a MMU ver
-	 * This must match the hardware it is running on.
-	 * Linux built for MMU V2, if run on MMU V1 will break down because V1
-	 *  hardware doesn't understand cmds such as WriteNI, or IVUTLB
-	 * On the other hand, Linux built for V1 if run on MMU V2 will do
-	 *   un-needed workarounds to prevent memcpy thrashing.
-	 * Similarly MMU V3 has new features which won't work on older MMU
+	/*
+	 * Ensure that MMU features assumed by kernel exist in hardware.
+	 * For older ARC700 cpus, it has to be exact match, since the MMU
+	 * revisions were not backwards compatible (MMUv3 TLB layout changed
+	 * so even if kernel for v2 didn't use any new cmds of v3, it would
+	 * still not work.
+	 * For HS cpus, MMUv4 was baseline and v5 is backwards compatible
+	 * (will run older software).
 	 */
-	if (mmu->ver != CONFIG_ARC_MMU_VER) {
+	if (is_isa_arcompact() && mmu->ver == CONFIG_ARC_MMU_VER)
+		compat = 1;
+	else if (is_isa_arcv2() && mmu->ver >= CONFIG_ARC_MMU_VER)
+		compat = 1;
+
+	if (!compat) {
 		panic("MMU ver %d doesn't match kernel built for %d...\n",
 		      mmu->ver, CONFIG_ARC_MMU_VER);
 	}
-- 
cgit 


From f3156851616b4f3cb1d6c567fe46adcfd43ad8f8 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 10 Nov 2017 12:40:00 -0800
Subject: ARCv2: boot log: updates for HS48: dual-issue, ECC, Loop Buffer

Print the hardware support for ECC, Loop Buffer as well as the runtime
enabled status

Note that unlike the existing boot printing, this one is not read from
pre-decoded hardware capabilty info cached in cpuinfo[] struct.
Instead we read the AUX regs on the spot and print it, without botherign
to save anywhere.

There is no point in saving static hardware capabilites in memory when
its use is very sporadic and non-performance critical, mainly for
/proc/cpuinfo. This gets worse in SMP, given it is per-cpu, and pretty
much exactly same across all cpus. So only info needed at runtime
(e.g. TLB geometry) needs to be cached in cpuinfo[]. So going fwd
we will start converting code to this paradigm.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/arcregs.h | 33 +++++++++++++++++++++++++++++++-
 arch/arc/kernel/setup.c        | 43 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index b1c56d35f2a9..49bfbd879caa 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -11,12 +11,14 @@
 
 /* Build Configuration Registers */
 #define ARC_REG_AUX_DCCM	0x18	/* DCCM Base Addr ARCv2 */
+#define ARC_REG_ERP_CTRL	0x3F	/* ARCv2 Error protection control */
 #define ARC_REG_DCCM_BASE_BUILD	0x61	/* DCCM Base Addr ARCompact */
 #define ARC_REG_CRC_BCR		0x62
 #define ARC_REG_VECBASE_BCR	0x68
 #define ARC_REG_PERIBASE_BCR	0x69
 #define ARC_REG_FP_BCR		0x6B	/* ARCompact: Single-Precision FPU */
 #define ARC_REG_DPFP_BCR	0x6C	/* ARCompact: Dbl Precision FPU */
+#define ARC_REG_ERP_BUILD	0xc7	/* ARCv2 Error protection Build: ECC/Parity */
 #define ARC_REG_FP_V2_BCR	0xc8	/* ARCv2 FPU */
 #define ARC_REG_SLC_BCR		0xce
 #define ARC_REG_DCCM_BUILD	0x74	/* DCCM size (common) */
@@ -32,11 +34,14 @@
 #define ARC_REG_D_UNCACH_BCR	0x6A
 #define ARC_REG_BPU_BCR		0xc0
 #define ARC_REG_ISA_CFG_BCR	0xc1
+#define ARC_REG_LPB_BUILD	0xE9	/* ARCv2 Loop Buffer Build */
 #define ARC_REG_RTT_BCR		0xF2
 #define ARC_REG_IRQ_BCR		0xF3
+#define ARC_REG_MICRO_ARCH_BCR	0xF9	/* ARCv2 Product revision */
 #define ARC_REG_SMART_BCR	0xFF
 #define ARC_REG_CLUSTER_BCR	0xcf
 #define ARC_REG_AUX_ICCM	0x208	/* ICCM Base Addr (ARCv2) */
+#define ARC_REG_LPB_CTRL	0x488	/* ARCv2 Loop Buffer control */
 
 /* Common for ARCompact and ARCv2 status register */
 #define ARC_REG_STATUS32	0x0A
@@ -229,6 +234,32 @@ struct bcr_bpu_arcv2 {
 #endif
 };
 
+/* Error Protection Build: ECC/Parity */
+struct bcr_erp {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad3:5, mmu:3, pad2:4, ic:3, dc:3, pad1:6, ver:8;
+#else
+	unsigned int ver:8, pad1:6, dc:3, ic:3, pad2:4, mmu:3, pad3:5;
+#endif
+};
+
+/* Error Protection Control */
+struct ctl_erp {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad2:27, mpd:1, pad1:2, dpd:1, dpi:1;
+#else
+	unsigned int dpi:1, dpd:1, pad1:2, mpd:1, pad2:27;
+#endif
+};
+
+struct bcr_lpb {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	unsigned int pad:16, entries:8, ver:8;
+#else
+	unsigned int ver:8, entries:8, pad:16;
+#endif
+};
+
 struct bcr_generic {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int info:24, ver:8;
@@ -270,7 +301,7 @@ struct cpuinfo_arc {
 	struct cpuinfo_arc_ccm iccm, dccm;
 	struct {
 		unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2,
-			     fpu_sp:1, fpu_dp:1, dual_iss_enb:1, dual_iss_exist:1, pad2:4,
+			     fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4,
 			     debug:1, ap:1, smart:1, rtt:1, pad3:4,
 			     timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
 	} extn;
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index fb83844daeea..7ef7d9a8ff89 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -199,8 +199,10 @@ static void read_arc_build_cfg_regs(void)
 			unsigned int exec_ctrl;
 
 			READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
-			cpu->extn.dual_iss_exist = 1;
-			cpu->extn.dual_iss_enb = exec_ctrl & 1;
+			cpu->extn.dual_enb = exec_ctrl & 1;
+
+			/* dual issue always present for this core */
+			cpu->extn.dual = 1;
 		}
 	}
 
@@ -253,7 +255,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 		       cpu_id, cpu->name, cpu->details,
 		       is_isa_arcompact() ? "ARCompact" : "ARCv2",
 		       IS_AVAIL1(cpu->isa.be, "[Big-Endian]"),
-		       IS_AVAIL3(cpu->extn.dual_iss_exist, cpu->extn.dual_iss_enb, " Dual-Issue"));
+		       IS_AVAIL3(cpu->extn.dual, cpu->extn.dual_enb, " Dual-Issue "));
 
 	n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s%s%s\nISA Extn\t: ",
 		       IS_AVAIL1(cpu->extn.timer0, "Timer0 "),
@@ -293,11 +295,26 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 
 	if (cpu->bpu.ver)
 		n += scnprintf(buf + n, len - n,
-			      "BPU\t\t: %s%s match, cache:%d, Predict Table:%d\n",
+			      "BPU\t\t: %s%s match, cache:%d, Predict Table:%d",
 			      IS_AVAIL1(cpu->bpu.full, "full"),
 			      IS_AVAIL1(!cpu->bpu.full, "partial"),
 			      cpu->bpu.num_cache, cpu->bpu.num_pred);
 
+	if (is_isa_arcv2()) {
+		struct bcr_lpb lpb;
+
+		READ_BCR(ARC_REG_LPB_BUILD, lpb);
+		if (lpb.ver) {
+			unsigned int ctl;
+			ctl = read_aux_reg(ARC_REG_LPB_CTRL);
+
+			n += scnprintf(buf + n, len - n, " Loop Buffer:%d %s",
+				lpb.entries,
+				IS_DISABLED_RUN(!ctl));
+		}
+	}
+
+	n += scnprintf(buf + n, len - n, "\n");
 	return buf;
 }
 
@@ -326,6 +343,24 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 			       cpu->dccm.base_addr, TO_KB(cpu->dccm.sz),
 			       cpu->iccm.base_addr, TO_KB(cpu->iccm.sz));
 
+	if (is_isa_arcv2()) {
+
+		/* Error Protection: ECC/Parity */
+		struct bcr_erp erp;
+		READ_BCR(ARC_REG_ERP_BUILD, erp);
+
+		if (erp.ver) {
+			struct  ctl_erp ctl;
+			READ_BCR(ARC_REG_ERP_CTRL, ctl);
+
+			/* inverted bits: 0 means enabled */
+			n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n",
+				IS_AVAIL3(erp.ic,  !ctl.dpi, "IC "),
+				IS_AVAIL3(erp.dc,  !ctl.dpd, "DC "),
+				IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU "));
+		}
+	}
+
 	n += scnprintf(buf + n, len - n, "OS ABI [v%d]\t: %s\n",
 			EF_ARC_OSABI_CURRENT >> 8,
 			EF_ARC_OSABI_CURRENT == EF_ARC_OSABI_V3 ?
-- 
cgit 


From ff64d695f92123f7d341473921a46add51a44a87 Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Tue, 14 Nov 2017 17:32:13 +0300
Subject: ARC: [plat-axs10x] DTS: Add reset controller node to manage ethernet
 reset

DW ethernet controller on axs10x hangs sometimes after SW reset.
Invoke the newly aded driver (reset-axs10x.c) by adding the DT bits.

With this in place, we don't need the open-coded quirk in platform
code, so get rid of it as well !

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/boot/dts/axs10x_mb.dtsi | 8 ++++++++
 arch/arc/plat-axs10x/axs10x.c    | 7 -------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index e114000a84f5..74d070cd3c13 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -16,6 +16,12 @@
 		ranges = <0x00000000 0x0 0xe0000000 0x10000000>;
 		interrupt-parent = <&mb_intc>;
 
+		creg_rst: reset-controller@11220 {
+			compatible = "snps,axs10x-reset";
+			#reset-cells = <1>;
+			reg = <0x11220 0x4>;
+		};
+
 		i2sclk: i2sclk@100a0 {
 			compatible = "snps,axs10x-i2s-pll-clock";
 			reg = <0x100a0 0x10>;
@@ -73,6 +79,8 @@
 			clocks = <&apbclk>;
 			clock-names = "stmmaceth";
 			max-speed = <100>;
+			resets = <&creg_rst 5>;
+			reset-names = "stmmaceth";
 		};
 
 		ehci@0x40000 {
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
index cf14ebc36916..f1ac6790da5f 100644
--- a/arch/arc/plat-axs10x/axs10x.c
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -111,13 +111,6 @@ static void __init axs10x_early_init(void)
 
 	axs10x_enable_gpio_intc_wire();
 
-	/*
-	 * Reset ethernet IP core.
-	 * TODO: get rid of this quirk after axs10x reset driver (or simple
-	 * reset driver) will be available in upstream.
-	 */
-	iowrite32((1 << 5), (void __iomem *) CREG_MB_SW_RESET);
-
 	scnprintf(mb, 32, "MainBoard v%d", mb_rev);
 	axs10x_print_board_ver(CREG_MB_VER, mb);
 }
-- 
cgit 


From 4d431290402c8d867af7ba45ee75407d68748c4a Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Sat, 9 May 2015 18:27:30 +0530
Subject: ARCv2: perf: tweak overflow interrupt

Current perf ISR loops thru all 32 counters, checking for each if it
caused the interrupt. Instead only loop thru counters which actually
interrupted (typically 1).

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/perf_event.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 2ce24e74f879..0eaa132a2c90 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -377,21 +377,22 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
 	struct perf_sample_data data;
 	struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
 	struct pt_regs *regs;
-	int active_ints;
+	unsigned int active_ints;
 	int idx;
 
 	arc_pmu_disable(&arc_pmu->pmu);
 
 	active_ints = read_aux_reg(ARC_REG_PCT_INT_ACT);
+	if (!active_ints)
+		goto done;
 
 	regs = get_irq_regs();
 
-	for (idx = 0; idx < arc_pmu->n_counters; idx++) {
-		struct perf_event *event = pmu_cpu->act_counter[idx];
+	do {
+		struct perf_event *event;
 		struct hw_perf_event *hwc;
 
-		if (!(active_ints & (1 << idx)))
-			continue;
+		idx = __ffs(active_ints);
 
 		/* Reset interrupt flag by writing of 1 */
 		write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx);
@@ -404,19 +405,22 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
 		write_aux_reg(ARC_REG_PCT_INT_CTRL,
 			read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx));
 
+		event = pmu_cpu->act_counter[idx];
 		hwc = &event->hw;
 
 		WARN_ON_ONCE(hwc->idx != idx);
 
 		arc_perf_event_update(event, &event->hw, event->hw.idx);
 		perf_sample_data_init(&data, 0, hwc->last_period);
-		if (!arc_pmu_event_set_period(event))
-			continue;
+		if (arc_pmu_event_set_period(event)) {
+			if (perf_event_overflow(event, &data, regs))
+				arc_pmu_stop(event, 0);
+		}
 
-		if (perf_event_overflow(event, &data, regs))
-			arc_pmu_stop(event, 0);
-	}
+		active_ints &= ~(1U << idx);
+	} while (active_ints);
 
+done:
 	arc_pmu_enable(&arc_pmu->pmu);
 
 	return IRQ_HANDLED;
-- 
cgit 


From 5b9027d6d044d4917992119d184ab0bb616489cc Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 8 Oct 2015 22:17:48 +0530
Subject: ARCv2: perf: optimize given that num counters <= 32

use ffz primitive which maps to ARCv2 instruction, vs. non atomic
__test_and_set_bit

It is unlikely if we will even have more than 32 counters, but still add
a BUILD_BUG to catch that

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/perf_event.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 0eaa132a2c90..8aec462d90fb 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -336,15 +336,12 @@ static int arc_pmu_add(struct perf_event *event, int flags)
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
-	if (__test_and_set_bit(idx, pmu_cpu->used_mask)) {
-		idx = find_first_zero_bit(pmu_cpu->used_mask,
-					  arc_pmu->n_counters);
-		if (idx == arc_pmu->n_counters)
-			return -EAGAIN;
-
-		__set_bit(idx, pmu_cpu->used_mask);
-		hwc->idx = idx;
-	}
+	idx = ffz(pmu_cpu->used_mask[0]);
+	if (idx == arc_pmu->n_counters)
+		return -EAGAIN;
+
+	__set_bit(idx, pmu_cpu->used_mask);
+	hwc->idx = idx;
 
 	write_aux_reg(ARC_REG_PCT_INDEX, idx);
 
@@ -465,6 +462,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 		pr_err("This core does not have performance counters!\n");
 		return -ENODEV;
 	}
+	BUILD_BUG_ON(ARC_PERF_MAX_COUNTERS > 32);
 	BUG_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS);
 
 	READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
-- 
cgit 


From 82385732b1c9d6a22942b5fe6e48a99891cb806f Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 28 Sep 2016 11:53:17 -0700
Subject: ARC: perf: avoid vmalloc backed mmap

For non-alising Dcache, vmalloc is not needed.

vmalloc triggers additonal D-TLB Misses in the perf interrupt code path
making it slightly inefficient as evident from hackbench runs below.

| [ARCLinux]# perf stat -e dTLB-load-misses --repeat 5 hackbench
| Running with 10*40 (== 400) tasks.
| Time: 35.060
| ...
| Performance counter stats for 'hackbench' (5 runs):

Before:      399235      dTLB-load-misses ( +-  2.08% )
After :      397676      dTLB-load-misses ( +-  2.27% )

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index c84e67fdea09..f3cad98eeb8f 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -39,7 +39,7 @@ config ARC
 	select OF
 	select OF_EARLY_FLATTREE
 	select OF_RESERVED_MEM
-	select PERF_USE_VMALLOC
+	select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_KERNEL_GZIP
-- 
cgit