summaryrefslogtreecommitdiff
path: root/arch/x86/events/intel/pt.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events/intel/pt.c')
-rw-r--r--arch/x86/events/intel/pt.c269
1 files changed, 201 insertions, 68 deletions
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 915847655c06..44524a387c58 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -13,14 +13,19 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/types.h>
+#include <linux/bits.h>
+#include <linux/limits.h>
#include <linux/slab.h>
#include <linux/device.h>
+#include <linux/kvm_types.h>
+#include <asm/cpuid/api.h>
#include <asm/perf_event.h>
#include <asm/insn.h>
#include <asm/io.h>
#include <asm/intel_pt.h>
-#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
#include "../perf_event.h"
#include "pt.h"
@@ -57,12 +62,14 @@ static struct pt_cap_desc {
PT_CAP(mtc, 0, CPUID_EBX, BIT(3)),
PT_CAP(ptwrite, 0, CPUID_EBX, BIT(4)),
PT_CAP(power_event_trace, 0, CPUID_EBX, BIT(5)),
+ PT_CAP(event_trace, 0, CPUID_EBX, BIT(7)),
+ PT_CAP(tnt_disable, 0, CPUID_EBX, BIT(8)),
PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)),
PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)),
PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)),
PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)),
PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)),
- PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3),
+ PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7),
PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000),
PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff),
PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000),
@@ -76,13 +83,13 @@ u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability)
return (c & cd->mask) >> shift;
}
-EXPORT_SYMBOL_GPL(intel_pt_validate_cap);
+EXPORT_SYMBOL_FOR_KVM(intel_pt_validate_cap);
u32 intel_pt_validate_hw_cap(enum pt_capabilities cap)
{
return intel_pt_validate_cap(pt_pmu.caps, cap);
}
-EXPORT_SYMBOL_GPL(intel_pt_validate_hw_cap);
+EXPORT_SYMBOL_FOR_KVM(intel_pt_validate_hw_cap);
static ssize_t pt_cap_show(struct device *cdev,
struct device_attribute *attr,
@@ -108,6 +115,8 @@ PMU_FORMAT_ATTR(tsc, "config:10" );
PMU_FORMAT_ATTR(noretcomp, "config:11" );
PMU_FORMAT_ATTR(ptw, "config:12" );
PMU_FORMAT_ATTR(branch, "config:13" );
+PMU_FORMAT_ATTR(event, "config:31" );
+PMU_FORMAT_ATTR(notnt, "config:55" );
PMU_FORMAT_ATTR(mtc_period, "config:14-17" );
PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" );
PMU_FORMAT_ATTR(psb_period, "config:24-27" );
@@ -116,6 +125,8 @@ static struct attribute *pt_formats_attr[] = {
&format_attr_pt.attr,
&format_attr_cyc.attr,
&format_attr_pwr_evt.attr,
+ &format_attr_event.attr,
+ &format_attr_notnt.attr,
&format_attr_fup_on_ptw.attr,
&format_attr_mtc.attr,
&format_attr_tsc.attr,
@@ -185,7 +196,7 @@ static int __init pt_pmu_hw_init(void)
int ret;
long i;
- rdmsrl(MSR_PLATFORM_INFO, reg);
+ rdmsrq(MSR_PLATFORM_INFO, reg);
pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8;
/*
@@ -193,21 +204,21 @@ static int __init pt_pmu_hw_init(void)
* otherwise, zero for numerator stands for "not enumerated"
* as per SDM
*/
- if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) {
+ if (boot_cpu_data.cpuid_level >= CPUID_LEAF_TSC) {
u32 eax, ebx, ecx, edx;
- cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx);
+ cpuid(CPUID_LEAF_TSC, &eax, &ebx, &ecx, &edx);
pt_pmu.tsc_art_num = ebx;
pt_pmu.tsc_art_den = eax;
}
/* model-specific quirks */
- switch (boot_cpu_data.x86_model) {
- case INTEL_FAM6_BROADWELL:
- case INTEL_FAM6_BROADWELL_D:
- case INTEL_FAM6_BROADWELL_G:
- case INTEL_FAM6_BROADWELL_X:
+ switch (boot_cpu_data.x86_vfm) {
+ case INTEL_BROADWELL:
+ case INTEL_BROADWELL_D:
+ case INTEL_BROADWELL_G:
+ case INTEL_BROADWELL_X:
/* not setting BRANCH_EN will #GP, erratum BDM106 */
pt_pmu.branch_en_always_on = true;
break;
@@ -221,7 +232,7 @@ static int __init pt_pmu_hw_init(void)
* "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
* post-VMXON.
*/
- rdmsrl(MSR_IA32_VMX_MISC, reg);
+ rdmsrq(MSR_IA32_VMX_MISC, reg);
if (reg & BIT(14))
pt_pmu.vmx = true;
}
@@ -296,6 +307,8 @@ fail:
RTIT_CTL_CYC_PSB | \
RTIT_CTL_MTC | \
RTIT_CTL_PWR_EVT_EN | \
+ RTIT_CTL_EVENT_EN | \
+ RTIT_CTL_NOTNT | \
RTIT_CTL_FUP_ON_PTW | \
RTIT_CTL_PTW_EN)
@@ -350,6 +363,14 @@ static bool pt_event_valid(struct perf_event *event)
!intel_pt_validate_hw_cap(PT_CAP_power_event_trace))
return false;
+ if (config & RTIT_CTL_EVENT_EN &&
+ !intel_pt_validate_hw_cap(PT_CAP_event_trace))
+ return false;
+
+ if (config & RTIT_CTL_NOTNT &&
+ !intel_pt_validate_hw_cap(PT_CAP_tnt_disable))
+ return false;
+
if (config & RTIT_CTL_PTW) {
if (!intel_pt_validate_hw_cap(PT_CAP_ptwrite))
return false;
@@ -398,15 +419,18 @@ static bool pt_event_valid(struct perf_event *event)
static void pt_config_start(struct perf_event *event)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
- u64 ctl = event->hw.config;
+ u64 ctl = event->hw.aux_config;
+
+ if (READ_ONCE(event->hw.aux_paused))
+ return;
ctl |= RTIT_CTL_TRACEEN;
if (READ_ONCE(pt->vmx_on))
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
else
- wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+ wrmsrq(MSR_IA32_RTIT_CTL, ctl);
- WRITE_ONCE(event->hw.config, ctl);
+ WRITE_ONCE(event->hw.aux_config, ctl);
}
/* Address ranges and their corresponding msr configuration registers */
@@ -463,16 +487,16 @@ static u64 pt_config_filters(struct perf_event *event)
/* avoid redundant msr writes */
if (pt->filters.filter[range].msr_a != filter->msr_a) {
- wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
+ wrmsrq(pt_address_ranges[range].msr_a, filter->msr_a);
pt->filters.filter[range].msr_a = filter->msr_a;
}
if (pt->filters.filter[range].msr_b != filter->msr_b) {
- wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
+ wrmsrq(pt_address_ranges[range].msr_b, filter->msr_b);
pt->filters.filter[range].msr_b = filter->msr_b;
}
- rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
+ rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off;
}
return rtit_ctl;
@@ -485,9 +509,9 @@ static void pt_config(struct perf_event *event)
u64 reg;
/* First round: clear STATUS, in particular the PSB byte counter. */
- if (!event->hw.config) {
+ if (!event->hw.aux_config) {
perf_event_itrace_started(event);
- wrmsrl(MSR_IA32_RTIT_STATUS, 0);
+ wrmsrq(MSR_IA32_RTIT_STATUS, 0);
}
reg = pt_config_filters(event);
@@ -515,14 +539,31 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK);
- event->hw.config = reg;
+ event->hw.aux_config = reg;
+
+ /*
+ * Allow resume before starting so as not to overwrite a value set by a
+ * PMI.
+ */
+ barrier();
+ WRITE_ONCE(pt->resume_allowed, 1);
+ /* Configuration is complete, it is now OK to handle an NMI */
+ barrier();
+ WRITE_ONCE(pt->handle_nmi, 1);
+ barrier();
pt_config_start(event);
+ barrier();
+ /*
+ * Allow pause after starting so its pt_config_stop() doesn't race with
+ * pt_config_start().
+ */
+ WRITE_ONCE(pt->pause_allowed, 1);
}
static void pt_config_stop(struct perf_event *event)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
- u64 ctl = READ_ONCE(event->hw.config);
+ u64 ctl = READ_ONCE(event->hw.aux_config);
/* may be already stopped by a PMI */
if (!(ctl & RTIT_CTL_TRACEEN))
@@ -530,9 +571,9 @@ static void pt_config_stop(struct perf_event *event)
ctl &= ~RTIT_CTL_TRACEEN;
if (!READ_ONCE(pt->vmx_on))
- wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+ wrmsrq(MSR_IA32_RTIT_CTL, ctl);
- WRITE_ONCE(event->hw.config, ctl);
+ WRITE_ONCE(event->hw.aux_config, ctl);
/*
* A wrmsr that disables trace generation serializes other PT
@@ -619,13 +660,13 @@ static void pt_config_buffer(struct pt_buffer *buf)
reg = virt_to_phys(base);
if (pt->output_base != reg) {
pt->output_base = reg;
- wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg);
+ wrmsrq(MSR_IA32_RTIT_OUTPUT_BASE, reg);
}
reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32);
if (pt->output_mask != reg) {
pt->output_mask = reg;
- wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
+ wrmsrq(MSR_IA32_RTIT_OUTPUT_MASK, reg);
}
}
@@ -718,6 +759,7 @@ static bool topa_table_full(struct topa *topa)
/**
* topa_insert_pages() - create a list of ToPA tables
* @buf: PT buffer being initialized.
+ * @cpu: CPU on which to allocate.
* @gfp: Allocation flags.
*
* This initializes a list of ToPA tables with entries from
@@ -809,11 +851,13 @@ static void pt_buffer_advance(struct pt_buffer *buf)
buf->cur_idx++;
if (buf->cur_idx == buf->cur->last) {
- if (buf->cur == buf->last)
+ if (buf->cur == buf->last) {
buf->cur = buf->first;
- else
+ buf->wrapped = true;
+ } else {
buf->cur = list_entry(buf->cur->list.next, struct topa,
list);
+ }
buf->cur_idx = 0;
}
}
@@ -827,8 +871,11 @@ static void pt_buffer_advance(struct pt_buffer *buf)
static void pt_update_head(struct pt *pt)
{
struct pt_buffer *buf = perf_get_aux(&pt->handle);
+ bool wrapped = buf->wrapped;
u64 topa_idx, base, old;
+ buf->wrapped = false;
+
if (buf->single) {
local_set(&buf->data_size, buf->output_off);
return;
@@ -846,7 +893,7 @@ static void pt_update_head(struct pt *pt)
} else {
old = (local64_xchg(&buf->head, base) &
((buf->nr_pages << PAGE_SHIFT) - 1));
- if (base < old)
+ if (base < old || (base == old && wrapped))
base += buf->nr_pages << PAGE_SHIFT;
local_add(base - old, &buf->data_size);
@@ -859,7 +906,7 @@ static void pt_update_head(struct pt *pt)
*/
static void *pt_buffer_region(struct pt_buffer *buf)
{
- return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
+ return phys_to_virt((phys_addr_t)TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
}
/**
@@ -881,7 +928,7 @@ static void pt_handle_status(struct pt *pt)
int advance = 0;
u64 status;
- rdmsrl(MSR_IA32_RTIT_STATUS, status);
+ rdmsrq(MSR_IA32_RTIT_STATUS, status);
if (status & RTIT_STATUS_ERROR) {
pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n");
@@ -897,8 +944,9 @@ static void pt_handle_status(struct pt *pt)
* means we are already losing data; need to let the decoder
* know.
*/
- if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
- buf->output_off == pt_buffer_region_size(buf)) {
+ if (!buf->single &&
+ (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
+ buf->output_off == pt_buffer_region_size(buf))) {
perf_aux_output_flag(&pt->handle,
PERF_AUX_FLAG_TRUNCATED);
advance++;
@@ -924,7 +972,7 @@ static void pt_handle_status(struct pt *pt)
if (advance)
pt_buffer_advance(buf);
- wrmsrl(MSR_IA32_RTIT_STATUS, status);
+ wrmsrq(MSR_IA32_RTIT_STATUS, status);
}
/**
@@ -939,12 +987,12 @@ static void pt_read_offset(struct pt_buffer *buf)
struct topa_page *tp;
if (!buf->single) {
- rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
+ rdmsrq(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
tp = phys_to_virt(pt->output_base);
buf->cur = &tp->topa;
}
- rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
+ rdmsrq(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
/* offset within current output region */
buf->output_off = pt->output_mask >> 32;
/* index of current output region within this table */
@@ -970,7 +1018,7 @@ pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg)
* order allocations, there shouldn't be many of these.
*/
list_for_each_entry(topa, &buf->tables, list) {
- if (topa->offset + topa->size > pg << PAGE_SHIFT)
+ if (topa->offset + topa->size > (unsigned long)pg << PAGE_SHIFT)
goto found;
}
@@ -1188,8 +1236,11 @@ static void pt_buffer_fini_topa(struct pt_buffer *buf)
/**
* pt_buffer_init_topa() - initialize ToPA table for pt buffer
* @buf: PT buffer.
- * @size: Total size of all regions within this ToPA.
+ * @cpu: CPU on which to allocate.
+ * @nr_pages: No. of pages to allocate.
* @gfp: Allocation flags.
+ *
+ * Return: 0 on success or error code.
*/
static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu,
unsigned long nr_pages, gfp_t gfp)
@@ -1244,6 +1295,15 @@ static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages)
if (1 << order != nr_pages)
goto out;
+ /*
+ * Some processors cannot always support single range for more than
+ * 4KB - refer errata TGL052, ADL037 and RPL017. Future processors might
+ * also be affected, so for now rather than trying to keep track of
+ * which ones, just disable it for all.
+ */
+ if (nr_pages > 1)
+ goto out;
+
buf->single = true;
buf->nr_pages = nr_pages;
ret = 0;
@@ -1253,7 +1313,7 @@ out:
/**
* pt_buffer_setup_aux() - set up topa tables for a PT buffer
- * @cpu: Cpu on which to allocate, -1 means current.
+ * @event: Performance event
* @pages: Array of pointers to buffer pages passed from perf core.
* @nr_pages: Number of pages in the buffer.
* @snapshot: If this is a snapshot/overwrite counter.
@@ -1347,10 +1407,26 @@ static void pt_addr_filters_fini(struct perf_event *event)
event->hw.addr_filters = NULL;
}
-static inline bool valid_kernel_ip(unsigned long ip)
+#ifdef CONFIG_X86_64
+/* Clamp to a canonical address greater-than-or-equal-to the address given */
+static u64 clamp_to_ge_canonical_addr(u64 vaddr, u8 vaddr_bits)
+{
+ return __is_canonical_address(vaddr, vaddr_bits) ?
+ vaddr :
+ -BIT_ULL(vaddr_bits - 1);
+}
+
+/* Clamp to a canonical address less-than-or-equal-to the address given */
+static u64 clamp_to_le_canonical_addr(u64 vaddr, u8 vaddr_bits)
{
- return virt_addr_valid(ip) && kernel_ip(ip);
+ return __is_canonical_address(vaddr, vaddr_bits) ?
+ vaddr :
+ BIT_ULL(vaddr_bits - 1) - 1;
}
+#else
+#define clamp_to_ge_canonical_addr(x, y) (x)
+#define clamp_to_le_canonical_addr(x, y) (x)
+#endif
static int pt_event_addr_filters_validate(struct list_head *filters)
{
@@ -1366,14 +1442,6 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
filter->action == PERF_ADDR_FILTER_ACTION_START)
return -EOPNOTSUPP;
- if (!filter->path.dentry) {
- if (!valid_kernel_ip(filter->offset))
- return -EINVAL;
-
- if (!valid_kernel_ip(filter->offset + filter->size))
- return -EINVAL;
- }
-
if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges))
return -EOPNOTSUPP;
}
@@ -1397,9 +1465,26 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
if (filter->path.dentry && !fr[range].start) {
msr_a = msr_b = 0;
} else {
- /* apply the offset */
- msr_a = fr[range].start;
- msr_b = msr_a + fr[range].size - 1;
+ unsigned long n = fr[range].size - 1;
+ unsigned long a = fr[range].start;
+ unsigned long b;
+
+ if (a > ULONG_MAX - n)
+ b = ULONG_MAX;
+ else
+ b = a + n;
+ /*
+ * Apply the offset. 64-bit addresses written to the
+ * MSRs must be canonical, but the range can encompass
+ * non-canonical addresses. Since software cannot
+ * execute at non-canonical addresses, adjusting to
+ * canonical addresses does not affect the result of the
+ * address filter.
+ */
+ msr_a = clamp_to_ge_canonical_addr(a, boot_cpu_data.x86_virt_bits);
+ msr_b = clamp_to_le_canonical_addr(b, boot_cpu_data.x86_virt_bits);
+ if (msr_b < msr_a)
+ msr_a = msr_b = 0;
}
filters->filter[range].msr_a = msr_a;
@@ -1454,6 +1539,7 @@ void intel_pt_interrupt(void)
buf = perf_aux_output_begin(&pt->handle, event);
if (!buf) {
event->hw.state = PERF_HES_STOPPED;
+ WRITE_ONCE(pt->resume_allowed, 0);
return;
}
@@ -1462,6 +1548,7 @@ void intel_pt_interrupt(void)
ret = pt_buffer_reset_markers(buf, &pt->handle);
if (ret) {
perf_aux_output_end(&pt->handle, 0);
+ WRITE_ONCE(pt->resume_allowed, 0);
return;
}
@@ -1500,11 +1587,11 @@ void intel_pt_handle_vmx(int on)
/* Turn PTs back on */
if (!on && event)
- wrmsrl(MSR_IA32_RTIT_CTL, event->hw.config);
+ wrmsrq(MSR_IA32_RTIT_CTL, event->hw.aux_config);
local_irq_restore(flags);
}
-EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
+EXPORT_SYMBOL_FOR_KVM(intel_pt_handle_vmx);
/*
* PMU callbacks
@@ -1516,6 +1603,26 @@ static void pt_event_start(struct perf_event *event, int mode)
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf;
+ if (mode & PERF_EF_RESUME) {
+ if (READ_ONCE(pt->resume_allowed)) {
+ u64 status;
+
+ /*
+ * Only if the trace is not active and the error and
+ * stopped bits are clear, is it safe to start, but a
+ * PMI might have just cleared these, so resume_allowed
+ * must be checked again also.
+ */
+ rdmsrq(MSR_IA32_RTIT_STATUS, status);
+ if (!(status & (RTIT_STATUS_TRIGGEREN |
+ RTIT_STATUS_ERROR |
+ RTIT_STATUS_STOPPED)) &&
+ READ_ONCE(pt->resume_allowed))
+ pt_config_start(event);
+ }
+ return;
+ }
+
buf = perf_aux_output_begin(&pt->handle, event);
if (!buf)
goto fail_stop;
@@ -1526,7 +1633,6 @@ static void pt_event_start(struct perf_event *event, int mode)
goto fail_end_stop;
}
- WRITE_ONCE(pt->handle_nmi, 1);
hwc->state = 0;
pt_config_buffer(buf);
@@ -1544,11 +1650,27 @@ static void pt_event_stop(struct perf_event *event, int mode)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
+ if (mode & PERF_EF_PAUSE) {
+ if (READ_ONCE(pt->pause_allowed))
+ pt_config_stop(event);
+ return;
+ }
+
/*
* Protect against the PMI racing with disabling wrmsr,
* see comment in intel_pt_interrupt().
*/
WRITE_ONCE(pt->handle_nmi, 0);
+ barrier();
+
+ /*
+ * Prevent a resume from attempting to restart tracing, or a pause
+ * during a subsequent start. Do this after clearing handle_nmi so that
+ * pt_event_snapshot_aux() will not re-allow them.
+ */
+ WRITE_ONCE(pt->pause_allowed, 0);
+ WRITE_ONCE(pt->resume_allowed, 0);
+ barrier();
pt_config_stop(event);
@@ -1599,12 +1721,15 @@ static long pt_event_snapshot_aux(struct perf_event *event,
if (WARN_ON_ONCE(!buf->snapshot))
return 0;
+ /* Prevent pause/resume from attempting to start/stop tracing */
+ WRITE_ONCE(pt->pause_allowed, 0);
+ WRITE_ONCE(pt->resume_allowed, 0);
+ barrier();
/*
- * Here, handle_nmi tells us if the tracing is on
+ * There is no PT interrupt in this mode, so stop the trace and it will
+ * remain stopped while the buffer is copied.
*/
- if (READ_ONCE(pt->handle_nmi))
- pt_config_stop(event);
-
+ pt_config_stop(event);
pt_read_offset(buf);
pt_update_head(pt);
@@ -1616,12 +1741,16 @@ static long pt_event_snapshot_aux(struct perf_event *event,
ret = perf_output_copy_aux(&pt->handle, handle, from, to);
/*
- * If the tracing was on when we turned up, restart it.
- * Compiler barrier not needed as we couldn't have been
- * preempted by anything that touches pt->handle_nmi.
+ * Here, handle_nmi tells us if the tracing was on.
+ * If the tracing was on, restart it.
*/
- if (pt->handle_nmi)
+ if (READ_ONCE(pt->handle_nmi)) {
+ WRITE_ONCE(pt->resume_allowed, 1);
+ barrier();
pt_config_start(event);
+ barrier();
+ WRITE_ONCE(pt->pause_allowed, 1);
+ }
return ret;
}
@@ -1708,15 +1837,15 @@ static __init int pt_init(void)
if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
return -ENODEV;
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu) {
u64 ctl;
- ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
+ ret = rdmsrq_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
if (!ret && (ctl & RTIT_CTL_TRACEEN))
prior_warn++;
}
- put_online_cpus();
+ cpus_read_unlock();
if (prior_warn) {
x86_add_exclusive(x86_lbr_exclusive_pt);
@@ -1736,8 +1865,12 @@ static __init int pt_init(void)
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
+ else
+ pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_PREFER_LARGE;
- pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
+ pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE |
+ PERF_PMU_CAP_ITRACE |
+ PERF_PMU_CAP_AUX_PAUSE;
pt_pmu.pmu.attr_groups = pt_attr_groups;
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
pt_pmu.pmu.event_init = pt_event_init;