diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-28 16:35:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-28 16:35:01 -0700 |
commit | 1a7c611546e552193180941ecf6b191e659db979 (patch) | |
tree | 2b93a6cf9d10cb83046f7b56325d4c23c2e199f4 /arch/x86/events/intel | |
parent | d637fce03462821127892d7c2bce9ec432ffe7aa (diff) | |
parent | 2c65477f14a359db9f1edee5dd8e683d3dae69e2 (diff) |
Merge tag 'perf-core-2023-08-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf event updates from Ingo Molnar:
- AMD IBS improvements
- Intel PMU driver updates
- Extend core perf facilities & the ARM PMU driver to better handle ARM big.LITTLE events
- Micro-optimize software events and the ring-buffer code
- Misc cleanups & fixes
* tag 'perf-core-2023-08-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/uncore: Remove unnecessary ?: operator around pcibios_err_to_errno() call
perf/x86/intel: Add Crestmont PMU
x86/cpu: Update Hybrids
x86/cpu: Fix Crestmont uarch
x86/cpu: Fix Gracemont uarch
perf: Remove unused extern declaration arch_perf_get_page_size()
perf: Remove unused PERF_PMU_CAP_HETEROGENEOUS_CPUS capability
arm_pmu: Remove unused PERF_PMU_CAP_HETEROGENEOUS_CPUS capability
perf/x86: Remove unused PERF_PMU_CAP_HETEROGENEOUS_CPUS capability
arm_pmu: Add PERF_PMU_CAP_EXTENDED_HW_TYPE capability
perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src
perf/mem: Add PERF_MEM_LVLNUM_NA to PERF_MEM_NA
perf/mem: Introduce PERF_MEM_LVLNUM_UNC
perf/ring_buffer: Use local_try_cmpxchg in __perf_output_begin
locking/arch: Avoid variable shadowing in local_try_cmpxchg()
perf/core: Use local64_try_cmpxchg in perf_swevent_set_period
perf/x86: Use local64_try_cmpxchg
perf/amd: Prevent grouping of IBS events
Diffstat (limited to 'arch/x86/events/intel')
-rw-r--r-- | arch/x86/events/intel/core.c | 54 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c | 12 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 9 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.c | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snbep.c | 2 |
5 files changed, 66 insertions, 13 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2a284ba951b7..fa355d3658a6 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2129,6 +2129,17 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +EVENT_ATTR_STR(topdown-retiring, td_retiring_cmt, "event=0x72,umask=0x0"); +EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec_cmt, "event=0x73,umask=0x0"); + +static struct attribute *cmt_events_attrs[] = { + EVENT_PTR(td_fe_bound_tnt), + EVENT_PTR(td_retiring_cmt), + EVENT_PTR(td_bad_spec_cmt), + EVENT_PTR(td_be_bound_tnt), + NULL +}; + static struct extra_reg intel_cmt_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0), @@ -4847,6 +4858,8 @@ PMU_FORMAT_ATTR(ldlat, "config1:0-15"); PMU_FORMAT_ATTR(frontend, "config1:0-23"); +PMU_FORMAT_ATTR(snoop_rsp, "config1:0-63"); + static struct attribute *intel_arch3_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -4877,6 +4890,13 @@ static struct attribute *slm_format_attr[] = { NULL }; +static struct attribute *cmt_format_attr[] = { + &format_attr_offcore_rsp.attr, + &format_attr_ldlat.attr, + &format_attr_snoop_rsp.attr, + NULL +}; + static struct attribute *skl_format_attr[] = { &format_attr_frontend.attr, NULL, @@ -5656,7 +5676,6 @@ static struct attribute *adl_hybrid_extra_attr[] = { NULL }; -PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63"); FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small); static struct attribute *mtl_hybrid_extra_attr_rtm[] = { @@ -6174,7 +6193,7 @@ __init int intel_pmu_init(void) name = "Tremont"; break; - case INTEL_FAM6_ALDERLAKE_N: + case INTEL_FAM6_ATOM_GRACEMONT: x86_pmu.mid_ack = true; memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6204,6 +6223,37 @@ __init int intel_pmu_init(void) name = "gracemont"; break; + case INTEL_FAM6_ATOM_CRESTMONT: + case INTEL_FAM6_ATOM_CRESTMONT_X: + x86_pmu.mid_ack = true; + memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints; + x86_pmu.extra_regs = intel_cmt_extra_regs; + + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.pebs_block = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + + intel_pmu_pebs_data_source_cmt(); + x86_pmu.pebs_latency_data = mtl_latency_data_small; + x86_pmu.get_event_constraints = cmt_get_event_constraints; + x86_pmu.limit_period = spr_limit_period; + td_attr = cmt_events_attrs; + mem_attr = grt_mem_attrs; + extra_attr = cmt_format_attr; + pr_cont("Crestmont events, "); + name = "crestmont"; + break; + case INTEL_FAM6_WESTMERE: case INTEL_FAM6_WESTMERE_EP: case INTEL_FAM6_WESTMERE_EX: diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 835862c548cc..96fffb2d521d 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -365,13 +365,11 @@ static void cstate_pmu_event_update(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; u64 prev_raw_count, new_raw_count; -again: prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = cstate_pmu_read_counter(event); - - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; + do { + new_raw_count = cstate_pmu_read_counter(event); + } while (!local64_try_cmpxchg(&hwc->prev_count, + &prev_raw_count, new_raw_count)); local64_add(new_raw_count - prev_raw_count, &event->count); } @@ -671,6 +669,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &glm_cstates), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates), @@ -686,7 +685,6 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates), diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index df88576d6b2a..eb8dd8b8a1e8 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -144,7 +144,7 @@ void __init intel_pmu_pebs_data_source_adl(void) __intel_pmu_pebs_data_source_grt(data_source); } -static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source) +static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source) { data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD); data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); @@ -164,7 +164,12 @@ void __init intel_pmu_pebs_data_source_mtl(void) data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); - intel_pmu_pebs_data_source_cmt(data_source); + __intel_pmu_pebs_data_source_cmt(data_source); +} + +void __init intel_pmu_pebs_data_source_cmt(void) +{ + __intel_pmu_pebs_data_source_cmt(pebs_data_source); } static u64 precise_store_data(u64 status) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index bc226603ef3e..69043e02e8a7 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1858,7 +1858,6 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init), @@ -1867,6 +1866,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_uncore_init), {}, }; MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index d49e90dc04a4..4d349986f76a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1502,7 +1502,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool pci_dev_put(ubox_dev); - return err ? pcibios_err_to_errno(err) : 0; + return pcibios_err_to_errno(err); } int snbep_uncore_pci_init(void) |