diff options
Diffstat (limited to 'arch/x86/events/perf_event.h')
| -rw-r--r-- | arch/x86/events/perf_event.h | 1042 |
1 files changed, 944 insertions, 98 deletions
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 78d7b7031bfc..3161ec0a3416 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -14,7 +14,10 @@ #include <linux/perf_event.h> +#include <asm/fpu/xstate.h> #include <asm/intel_ds.h> +#include <asm/cpu.h> +#include <asm/msr.h> /* To enable MSR tracing please use the generic trace points. */ @@ -33,15 +36,17 @@ * per-core reg tables. */ enum extra_reg_type { - EXTRA_REG_NONE = -1, /* not used */ + EXTRA_REG_NONE = -1, /* not used */ - EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ - EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ - EXTRA_REG_LBR = 2, /* lbr_select */ - EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ - EXTRA_REG_FE = 4, /* fe_* */ + EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ + EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ + EXTRA_REG_LBR = 2, /* lbr_select */ + EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ + EXTRA_REG_FE = 4, /* fe_* */ + EXTRA_REG_SNOOP_0 = 5, /* snoop response 0 */ + EXTRA_REG_SNOOP_1 = 6, /* snoop response 1 */ - EXTRA_REG_MAX /* number of entries needed */ + EXTRA_REG_MAX /* number of entries needed */ }; struct event_constraint { @@ -49,28 +54,84 @@ struct event_constraint { unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; u64 idxmsk64; }; - u64 code; - u64 cmask; - int weight; - int overlap; - int flags; + u64 code; + u64 cmask; + int weight; + int overlap; + int flags; + unsigned int size; }; + +static inline bool constraint_match(struct event_constraint *c, u64 ecode) +{ + return ((ecode & c->cmask) - c->code) <= (u64)c->size; +} + +#define PERF_ARCH(name, val) \ + PERF_X86_EVENT_##name = val, + /* * struct hw_perf_event.flags flags */ -#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */ -#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */ -#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */ -#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */ -#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */ -#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */ -#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */ -#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */ -#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ -#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */ -#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */ -#define PERF_X86_EVENT_LARGE_PEBS 0x0800 /* use large PEBS */ +enum { +#include "perf_event_flags.h" +}; + +#undef PERF_ARCH + +#define PERF_ARCH(name, val) \ + static_assert((PERF_X86_EVENT_##name & PERF_EVENT_FLAG_ARCH) == \ + PERF_X86_EVENT_##name); + +#include "perf_event_flags.h" + +#undef PERF_ARCH + +static inline bool is_topdown_count(struct perf_event *event) +{ + return event->hw.flags & PERF_X86_EVENT_TOPDOWN; +} + +static inline bool is_metric_event(struct perf_event *event) +{ + u64 config = event->attr.config; + + return ((config & ARCH_PERFMON_EVENTSEL_EVENT) == 0) && + ((config & INTEL_ARCH_EVENT_MASK) >= INTEL_TD_METRIC_RETIRING) && + ((config & INTEL_ARCH_EVENT_MASK) <= INTEL_TD_METRIC_MAX); +} + +static inline bool is_slots_event(struct perf_event *event) +{ + return (event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_TD_SLOTS; +} +static inline bool is_topdown_event(struct perf_event *event) +{ + return is_metric_event(event) || is_slots_event(event); +} + +int is_x86_event(struct perf_event *event); + +static inline bool check_leader_group(struct perf_event *leader, int flags) +{ + return is_x86_event(leader) ? !!(leader->hw.flags & flags) : false; +} + +static inline bool is_branch_counters_group(struct perf_event *event) +{ + return check_leader_group(event->group_leader, PERF_X86_EVENT_BRANCH_COUNTERS); +} + +static inline bool is_pebs_counter_event_group(struct perf_event *event) +{ + return check_leader_group(event->group_leader, PERF_X86_EVENT_PEBS_CNTR); +} + +static inline bool is_acr_event_group(struct perf_event *event) +{ + return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR); +} struct amd_nb { int nb_id; /* NorthBridge id */ @@ -80,6 +141,11 @@ struct amd_nb { }; #define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) +#define PEBS_PMI_AFTER_EACH_RECORD BIT_ULL(60) +#define PEBS_OUTPUT_OFFSET 61 +#define PEBS_OUTPUT_MASK (3ull << PEBS_OUTPUT_OFFSET) +#define PEBS_OUTPUT_PT (1ull << PEBS_OUTPUT_OFFSET) +#define PEBS_VIA_PT_MASK (PEBS_OUTPUT_PT | PEBS_PMI_AFTER_EACH_RECORD) /* * Flags PEBS can handle without an PMI. @@ -94,27 +160,28 @@ struct amd_nb { PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \ - PERF_SAMPLE_PERIOD) - -#define PEBS_REGS \ - (PERF_REG_X86_AX | \ - PERF_REG_X86_BX | \ - PERF_REG_X86_CX | \ - PERF_REG_X86_DX | \ - PERF_REG_X86_DI | \ - PERF_REG_X86_SI | \ - PERF_REG_X86_SP | \ - PERF_REG_X86_BP | \ - PERF_REG_X86_IP | \ - PERF_REG_X86_FLAGS | \ - PERF_REG_X86_R8 | \ - PERF_REG_X86_R9 | \ - PERF_REG_X86_R10 | \ - PERF_REG_X86_R11 | \ - PERF_REG_X86_R12 | \ - PERF_REG_X86_R13 | \ - PERF_REG_X86_R14 | \ - PERF_REG_X86_R15) + PERF_SAMPLE_PERIOD | PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_WEIGHT_TYPE) + +#define PEBS_GP_REGS \ + ((1ULL << PERF_REG_X86_AX) | \ + (1ULL << PERF_REG_X86_BX) | \ + (1ULL << PERF_REG_X86_CX) | \ + (1ULL << PERF_REG_X86_DX) | \ + (1ULL << PERF_REG_X86_DI) | \ + (1ULL << PERF_REG_X86_SI) | \ + (1ULL << PERF_REG_X86_SP) | \ + (1ULL << PERF_REG_X86_BP) | \ + (1ULL << PERF_REG_X86_IP) | \ + (1ULL << PERF_REG_X86_FLAGS) | \ + (1ULL << PERF_REG_X86_R8) | \ + (1ULL << PERF_REG_X86_R9) | \ + (1ULL << PERF_REG_X86_R10) | \ + (1ULL << PERF_REG_X86_R11) | \ + (1ULL << PERF_REG_X86_R12) | \ + (1ULL << PERF_REG_X86_R13) | \ + (1ULL << PERF_REG_X86_R14) | \ + (1ULL << PERF_REG_X86_R15)) /* * Per register state. @@ -167,6 +234,18 @@ struct x86_perf_task_context; #define MAX_LBR_ENTRIES 32 enum { + LBR_FORMAT_32 = 0x00, + LBR_FORMAT_LIP = 0x01, + LBR_FORMAT_EIP = 0x02, + LBR_FORMAT_EIP_FLAGS = 0x03, + LBR_FORMAT_EIP_FLAGS2 = 0x04, + LBR_FORMAT_INFO = 0x05, + LBR_FORMAT_TIME = 0x06, + LBR_FORMAT_INFO2 = 0x07, + LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2, +}; + +enum { X86_PERF_KFREE_SHARED = 0, X86_PERF_KFREE_EXCL = 1, X86_PERF_KFREE_MAX @@ -178,7 +257,7 @@ struct cpu_hw_events { */ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; int enabled; int n_events; /* the # of events in the below arrays */ @@ -186,6 +265,8 @@ struct cpu_hw_events { they've never been enabled yet */ int n_txn; /* the # last events in the below arrays; added in the current transaction */ + int n_txn_pair; + int n_txn_metric; int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ u64 tags[X86_PMC_IDX_MAX]; @@ -193,6 +274,7 @@ struct cpu_hw_events { struct event_constraint *event_constraint[X86_PMC_IDX_MAX]; int n_excl; /* the number of exclusive events */ + int n_late_setup; /* the num of events needs late setup */ unsigned int txn_flags; int is_fake; @@ -201,22 +283,47 @@ struct cpu_hw_events { * Intel DebugStore bits */ struct debug_store *ds; - void *ds_pebs_vaddr; void *ds_bts_vaddr; + /* DS based PEBS or arch-PEBS buffer address */ + void *pebs_vaddr; u64 pebs_enabled; int n_pebs; int n_large_pebs; + int n_pebs_via_pt; + int pebs_output; + + /* Current super set of events hardware configuration */ + u64 pebs_data_cfg; + u64 active_pebs_data_cfg; + int pebs_record_size; + + /* Intel Fixed counter configuration */ + u64 fixed_ctrl_val; + u64 active_fixed_ctrl_val; + + /* Intel ACR configuration */ + u64 acr_cfg_b[X86_PMC_IDX_MAX]; + u64 acr_cfg_c[X86_PMC_IDX_MAX]; + /* Cached CFG_C values */ + u64 cfg_c_val[X86_PMC_IDX_MAX]; /* * Intel LBR bits */ int lbr_users; + int lbr_pebs_users; struct perf_branch_stack lbr_stack; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; - struct er_account *lbr_sel; + u64 lbr_counters[MAX_LBR_ENTRIES]; /* branch stack extra */ + union { + struct er_account *lbr_sel; + struct er_account *lbr_ctl; + }; u64 br_sel; - struct x86_perf_task_context *last_task_ctx; + void *last_task_ctx; int last_log_id; + int lbr_select; + void *lbr_xsave; /* * Intel host/guest exclude bits @@ -243,27 +350,54 @@ struct cpu_hw_events { int excl_thread_id; /* 0 or 1 */ /* + * SKL TSX_FORCE_ABORT shadow + */ + u64 tfa_shadow; + + /* + * Perf Metrics + */ + /* number of accepted metrics events */ + int n_metric; + + /* * AMD specific bits */ struct amd_nb *amd_nb; + int brs_active; /* BRS is enabled */ + /* Inverted mask of bits to clear in the perf_ctr ctrl registers */ u64 perf_ctr_virt_mask; + int n_pair; /* Large increment events */ void *kfree_on_online[X86_PERF_KFREE_MAX]; + + struct pmu *pmu; }; -#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\ +#define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \ { .idxmsk64 = (n) }, \ .code = (c), \ + .size = (e) - (c), \ .cmask = (m), \ .weight = (w), \ .overlap = (o), \ .flags = f, \ } +#define __EVENT_CONSTRAINT(c, n, m, w, o, f) \ + __EVENT_CONSTRAINT_RANGE(c, c, n, m, w, o, f) + #define EVENT_CONSTRAINT(c, n, m) \ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0) +/* + * The constraint_match() function only works for 'simple' event codes + * and not for extended (AMD64_EVENTSEL_EVENT) events codes. + */ +#define EVENT_CONSTRAINT_RANGE(c, e, n, m) \ + __EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0) + #define INTEL_EXCLEVT_CONSTRAINT(c, n) \ __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\ 0, PERF_X86_EVENT_EXCL) @@ -299,6 +433,12 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) /* + * Constraint on a range of Event codes + */ +#define INTEL_EVENT_CONSTRAINT_RANGE(c, e, n) \ + EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT) + +/* * Constraint on the Event code + UMask + fixed-mask * * filter mask to validate fixed counter events. @@ -316,6 +456,19 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS) /* + * The special metric counters do not actually exist. They are calculated from + * the combination of the FxCtr3 + MSR_PERF_METRICS. + * + * The special metric counters are mapped to a dummy offset for the scheduler. + * The sharing between multiple users of the same metric without multiplexing + * is not allowed, even though the hardware supports that in principle. + */ + +#define METRIC_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, (1ULL << (INTEL_PMC_IDX_METRIC_BASE + n)), \ + INTEL_ARCH_EVENT_MASK) + +/* * Constraint on the Event code + UMask */ #define INTEL_UEVENT_CONSTRAINT(c, n) \ @@ -337,13 +490,32 @@ struct cpu_hw_events { __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) +#define INTEL_PSD_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_STLAT) + #define INTEL_PST_CONSTRAINT(c, n) \ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) +#define INTEL_HYBRID_LAT_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID) + +#define INTEL_HYBRID_LDLAT_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_LD_HSW) + +#define INTEL_HYBRID_STLAT_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_ST_HSW) + /* Event constraint, but match on all event flags too. */ #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) + +#define INTEL_FLAGS_EVENT_CONSTRAINT_RANGE(c, e, n) \ + EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) /* Check only flags, but allow all event/umask */ #define INTEL_ALL_EVENT_CONSTRAINT(code, n) \ @@ -361,6 +533,11 @@ struct cpu_hw_events { ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) +#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(code, end, n) \ + __EVENT_CONSTRAINT_RANGE(code, end, n, \ + ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW) + #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \ __EVENT_CONSTRAINT(code, n, \ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \ @@ -468,6 +645,12 @@ union perf_capabilities { * values > 32bit. */ u64 full_width_write:1; + u64 pebs_baseline:1; + u64 perf_metrics:1; + u64 pebs_output_pt_available:1; + u64 pebs_timing_info:1; + u64 anythread_deprecated:1; + u64 rdpmc_metrics_clear:1; }; u64 capabilities; }; @@ -507,6 +690,120 @@ enum { x86_lbr_exclusive_max, }; +#define PERF_PEBS_DATA_SOURCE_MAX 0x100 +#define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1) +#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) + +#define X86_HYBRID_PMU_ATOM_IDX 0 +#define X86_HYBRID_PMU_CORE_IDX 1 +#define X86_HYBRID_PMU_TINY_IDX 2 + +enum hybrid_pmu_type { + not_hybrid, + hybrid_small = BIT(X86_HYBRID_PMU_ATOM_IDX), + hybrid_big = BIT(X86_HYBRID_PMU_CORE_IDX), + hybrid_tiny = BIT(X86_HYBRID_PMU_TINY_IDX), + + /* The belows are only used for matching */ + hybrid_big_small = hybrid_big | hybrid_small, + hybrid_small_tiny = hybrid_small | hybrid_tiny, + hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny, +}; + +struct arch_pebs_cap { + u64 caps; + u64 counters; + u64 pdists; +}; + +struct x86_hybrid_pmu { + struct pmu pmu; + const char *name; + enum hybrid_pmu_type pmu_type; + cpumask_t supported_cpus; + union perf_capabilities intel_cap; + u64 intel_ctrl; + u64 pebs_events_mask; + u64 config_mask; + union { + u64 cntr_mask64; + unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 fixed_cntr_mask64; + unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + + union { + u64 acr_cntr_mask64; + unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 acr_cause_mask64; + unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + struct event_constraint unconstrained; + + u64 hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + u64 hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + struct event_constraint *event_constraints; + struct event_constraint *pebs_constraints; + struct extra_reg *extra_regs; + + unsigned int late_ack :1, + mid_ack :1, + enabled_ack :1; + + struct arch_pebs_cap arch_pebs_cap; + + u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX]; +}; + +static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu) +{ + return container_of(pmu, struct x86_hybrid_pmu, pmu); +} + +extern struct static_key_false perf_is_hybrid; +#define is_hybrid() static_branch_unlikely(&perf_is_hybrid) + +#define hybrid(_pmu, _field) \ +(*({ \ + typeof(&x86_pmu._field) __Fp = &x86_pmu._field; \ + \ + if (is_hybrid() && (_pmu)) \ + __Fp = &hybrid_pmu(_pmu)->_field; \ + \ + __Fp; \ +})) + +#define hybrid_var(_pmu, _var) \ +(*({ \ + typeof(&_var) __Fp = &_var; \ + \ + if (is_hybrid() && (_pmu)) \ + __Fp = &hybrid_pmu(_pmu)->_var; \ + \ + __Fp; \ +})) + +#define hybrid_bit(_pmu, _field) \ +({ \ + bool __Fp = x86_pmu._field; \ + \ + if (is_hybrid() && (_pmu)) \ + __Fp = hybrid_pmu(_pmu)->_field; \ + \ + __Fp; \ +}) + /* * struct x86_pmu - generic x86 pmu */ @@ -521,19 +818,43 @@ struct x86_pmu { void (*enable_all)(int added); void (*enable)(struct perf_event *); void (*disable)(struct perf_event *); + void (*assign)(struct perf_event *event, int idx); void (*add)(struct perf_event *); void (*del)(struct perf_event *); void (*read)(struct perf_event *event); + int (*set_period)(struct perf_event *event); + u64 (*update)(struct perf_event *event); int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); + void (*late_setup)(void); + void (*pebs_enable)(struct perf_event *event); + void (*pebs_disable)(struct perf_event *event); + void (*pebs_enable_all)(void); + void (*pebs_disable_all)(void); unsigned eventsel; unsigned perfctr; + unsigned fixedctr; int (*addr_offset)(int index, bool eventsel); int (*rdpmc_index)(int index); u64 (*event_map)(int); int max_events; - int num_counters; - int num_counters_fixed; + u64 config_mask; + union { + u64 cntr_mask64; + unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 fixed_cntr_mask64; + unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 acr_cntr_mask64; + unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 acr_cause_mask64; + unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; int cntval_bits; u64 cntval_mask; union { @@ -559,26 +880,23 @@ struct x86_pmu { struct event_constraint *event_constraints; struct x86_pmu_quirk *quirks; - int perfctr_second_write; - u64 (*limit_period)(struct perf_event *event, u64 l); + void (*limit_period)(struct perf_event *event, s64 *l); /* PMI handler bits */ unsigned int late_ack :1, - counter_freezing :1; + mid_ack :1, + enabled_ack :1; /* * sysfs attrs */ int attr_rdpmc_broken; int attr_rdpmc; struct attribute **format_attrs; - struct attribute **event_attrs; - struct attribute **caps_attrs; ssize_t (*events_sysfs_show)(char *page, u64 config); - struct attribute **cpu_events; + const struct attribute_group **attr_update; unsigned long attr_freeze_on_smi; - struct attribute **attrs; /* * CPU Hotplug hooks @@ -589,8 +907,8 @@ struct x86_pmu { void (*cpu_dead)(int cpu); void (*check_microcode)(void); - void (*sched_task)(struct perf_event_context *ctx, - bool sched_in); + void (*sched_task)(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in); /* * Intel Arch Perfmon v2+ @@ -599,42 +917,91 @@ struct x86_pmu { union perf_capabilities intel_cap; /* - * Intel DebugStore bits + * Intel DebugStore and PEBS bits */ - unsigned int bts :1, - bts_active :1, - pebs :1, - pebs_active :1, - pebs_broken :1, - pebs_prec_dist :1, - pebs_no_tlb :1; + unsigned int bts :1, + bts_active :1, + ds_pebs :1, + pebs_active :1, + pebs_broken :1, + pebs_prec_dist :1, + pebs_no_tlb :1, + pebs_no_isolation :1, + pebs_block :1, + pebs_ept :1, + arch_pebs :1; int pebs_record_size; int pebs_buffer_size; - void (*drain_pebs)(struct pt_regs *regs); + u64 pebs_events_mask; + void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); - int max_pebs_events; + u64 (*pebs_latency_data)(struct perf_event *event, u64 status); unsigned long large_pebs_flags; + u64 rtm_abort_event; + u64 pebs_capable; + + /* + * Intel Architectural PEBS + */ + struct arch_pebs_cap arch_pebs_cap; /* * Intel LBR */ - unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ - int lbr_nr; /* hardware stack size */ - u64 lbr_sel_mask; /* LBR_SELECT valid bits */ - const int *lbr_sel_map; /* lbr_select mappings */ + unsigned int lbr_tos, lbr_from, lbr_to, + lbr_info, lbr_nr; /* LBR base regs and size */ + union { + u64 lbr_sel_mask; /* LBR_SELECT valid bits */ + u64 lbr_ctl_mask; /* LBR_CTL valid bits */ + }; + union { + const int *lbr_sel_map; /* lbr_select mappings */ + int *lbr_ctl_map; /* LBR_CTL mappings */ + }; + u64 lbr_callstack_users; /* lbr callstack system wide users */ bool lbr_double_abort; /* duplicated lbr aborts */ bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ + unsigned int lbr_has_info:1; + unsigned int lbr_has_tsx:1; + unsigned int lbr_from_flags:1; + unsigned int lbr_to_cycles:1; + + /* + * Intel Architectural LBR CPUID Enumeration + */ + unsigned int lbr_depth_mask:8; + unsigned int lbr_deep_c_reset:1; + unsigned int lbr_lip:1; + unsigned int lbr_cpl:1; + unsigned int lbr_filter:1; + unsigned int lbr_call_stack:1; + unsigned int lbr_mispred:1; + unsigned int lbr_timed_lbr:1; + unsigned int lbr_br_type:1; + unsigned int lbr_counters:4; + + void (*lbr_reset)(void); + void (*lbr_read)(struct cpu_hw_events *cpuc); + void (*lbr_save)(void *ctx); + void (*lbr_restore)(void *ctx); + /* * Intel PT/LBR/BTS are exclusive */ atomic_t lbr_exclusive[x86_lbr_exclusive_max]; /* + * Intel perf metrics + */ + int num_topdown_events; + + /* * AMD bits */ unsigned int amd_nb_constraints : 1; + u64 perf_ctr_pair_en; /* * Extra registers for events @@ -645,20 +1012,69 @@ struct x86_pmu { /* * Intel host/guest support (KVM) */ - struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); + struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr, void *data); + + /* + * Check period value for PERF_EVENT_IOC_PERIOD ioctl. + */ + int (*check_period) (struct perf_event *event, u64 period); + + int (*aux_output_match) (struct perf_event *event); + + void (*filter)(struct pmu *pmu, int cpu, bool *ret); + /* + * Hybrid support + * + * Most PMU capabilities are the same among different hybrid PMUs. + * The global x86_pmu saves the architecture capabilities, which + * are available for all PMUs. The hybrid_pmu only includes the + * unique capabilities. + */ + int num_hybrid_pmus; + struct x86_hybrid_pmu *hybrid_pmu; + enum intel_cpu_type (*get_hybrid_cpu_type) (void); }; -struct x86_perf_task_context { - u64 lbr_from[MAX_LBR_ENTRIES]; - u64 lbr_to[MAX_LBR_ENTRIES]; - u64 lbr_info[MAX_LBR_ENTRIES]; - int tos; - int valid_lbrs; +struct x86_perf_task_context_opt { int lbr_callstack_users; int lbr_stack_state; int log_id; }; +struct x86_perf_task_context { + u64 lbr_sel; + int tos; + int valid_lbrs; + struct x86_perf_task_context_opt opt; + struct lbr_entry lbr[MAX_LBR_ENTRIES]; +}; + +struct x86_perf_task_context_arch_lbr { + struct x86_perf_task_context_opt opt; + struct lbr_entry entries[]; +}; + +/* + * Add padding to guarantee the 64-byte alignment of the state buffer. + * + * The structure is dynamically allocated. The size of the LBR state may vary + * based on the number of LBR registers. + * + * Do not put anything after the LBR state. + */ +struct x86_perf_task_context_arch_lbr_xsave { + struct x86_perf_task_context_opt opt; + + union { + struct xregs_state xsave; + struct { + struct fxregs_state i387; + struct xstate_header header; + struct arch_lbr_state lbr; + } __attribute__ ((packed, aligned (XSAVE_ALIGNMENT))); + }; +}; + #define x86_add_quirk(func_) \ do { \ static struct x86_pmu_quirk __quirk __initdata = { \ @@ -676,6 +1092,13 @@ do { \ #define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */ #define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ #define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ +#define PMU_FL_TFA 0x20 /* deal with TSX force abort */ +#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ +#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */ +#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ +#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */ +#define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */ +#define PMU_FL_DYN_CONSTRAINT 0x800 /* Needs dynamic constraint */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr @@ -702,8 +1125,42 @@ static struct perf_pmu_events_ht_attr event_attr_##v = { \ .event_str_ht = ht, \ } +#define EVENT_ATTR_STR_HYBRID(_name, v, str, _pmu) \ +static struct perf_pmu_events_hybrid_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, events_hybrid_sysfs_show, NULL),\ + .id = 0, \ + .event_str = str, \ + .pmu_type = _pmu, \ +} + +#define FORMAT_HYBRID_PTR(_id) (&format_attr_hybrid_##_id.attr.attr) + +#define FORMAT_ATTR_HYBRID(_name, _pmu) \ +static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\ + .attr = __ATTR_RO(_name), \ + .pmu_type = _pmu, \ +} + +struct pmu *x86_get_pmu(unsigned int cpu); extern struct x86_pmu x86_pmu __read_mostly; +DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period); +DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update); +DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); +DECLARE_STATIC_CALL(x86_pmu_late_setup, *x86_pmu.late_setup); +DECLARE_STATIC_CALL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable); +DECLARE_STATIC_CALL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable); +DECLARE_STATIC_CALL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all); +DECLARE_STATIC_CALL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all); + +static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) +{ + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) + return &((struct x86_perf_task_context_arch_lbr *)ctx)->opt; + + return &((struct x86_perf_task_context *)ctx)->opt; +} + static inline bool x86_pmu_has_lbr_callstack(void) { return x86_pmu.lbr_sel_map && @@ -711,6 +1168,7 @@ static inline bool x86_pmu_has_lbr_callstack(void) } DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); +DECLARE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); int x86_perf_event_set_period(struct perf_event *event); @@ -735,6 +1193,12 @@ extern u64 __read_mostly hw_cache_extra_regs u64 x86_perf_event_update(struct perf_event *event); +static inline u64 intel_pmu_topdown_event_update(struct perf_event *event, u64 *val) +{ + return x86_perf_event_update(event); +} +DECLARE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update); + static inline unsigned int x86_pmu_config_addr(int index) { return x86_pmu.eventsel + (x86_pmu.addr_offset ? @@ -747,11 +1211,20 @@ static inline unsigned int x86_pmu_event_addr(int index) x86_pmu.addr_offset(index, false) : index); } +static inline unsigned int x86_pmu_fixed_ctr_addr(int index) +{ + return x86_pmu.fixedctr + (x86_pmu.addr_offset ? + x86_pmu.addr_offset(index, false) : index); +} + static inline int x86_pmu_rdpmc_index(int index) { return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; } +bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask, + unsigned long *fixed_cntr_mask); + int x86_add_exclusive(unsigned int what); void x86_del_exclusive(unsigned int what); @@ -760,7 +1233,7 @@ int x86_reserve_hardware(void); void x86_release_hardware(void); -int x86_pmu_max_precise(void); +int x86_pmu_max_precise(struct pmu *pmu); void hw_perf_lbr_event_destroy(struct perf_event *event); @@ -770,14 +1243,32 @@ int x86_pmu_hw_config(struct perf_event *event); void x86_pmu_disable_all(void); +static inline bool has_amd_brs(struct hw_perf_event *hwc) +{ + return hwc->flags & PERF_X86_EVENT_AMD_BRS; +} + +static inline bool is_counter_pair(struct hw_perf_event *hwc) +{ + return hwc->flags & PERF_X86_EVENT_PAIR; +} + static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, u64 enable_mask) { u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); if (hwc->extra_reg.reg) - wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); - wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); + wrmsrq(hwc->extra_reg.reg, hwc->extra_reg.config); + + /* + * Add enabled Merge event on next counter + * if large increment event being enabled on this counter + */ + if (is_counter_pair(hwc)) + wrmsrq(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en); + + wrmsrq(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); } void x86_pmu_enable_all(int added); @@ -790,15 +1281,46 @@ void x86_pmu_stop(struct perf_event *event, int flags); static inline void x86_pmu_disable_event(struct perf_event *event) { + u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config); + wrmsrq(hwc->config_base, hwc->config & ~disable_mask); + + if (is_counter_pair(hwc)) + wrmsrq(x86_pmu_config_addr(hwc->idx + 1), 0); } void x86_pmu_enable_event(struct perf_event *event); int x86_pmu_handle_irq(struct pt_regs *regs); +void x86_pmu_show_pmu_cap(struct pmu *pmu); + +static inline int x86_pmu_num_counters(struct pmu *pmu) +{ + return hweight64(hybrid(pmu, cntr_mask64)); +} + +static inline int x86_pmu_max_num_counters(struct pmu *pmu) +{ + return fls64(hybrid(pmu, cntr_mask64)); +} + +static inline int x86_pmu_num_counters_fixed(struct pmu *pmu) +{ + return hweight64(hybrid(pmu, fixed_cntr_mask64)); +} + +static inline int x86_pmu_max_num_counters_fixed(struct pmu *pmu) +{ + return fls64(hybrid(pmu, fixed_cntr_mask64)); +} + +static inline u64 x86_pmu_get_event_config(struct perf_event *event) +{ + return event->attr.config & hybrid(event->pmu, config_mask); +} + extern struct event_constraint emptyconstraint; extern struct event_constraint unconstrained; @@ -832,20 +1354,189 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) regs->ip = ip; } +/* + * x86control flow change classification + * x86control flow changes include branches, interrupts, traps, faults + */ +enum { + X86_BR_NONE = 0, /* unknown */ + + X86_BR_USER = 1 << 0, /* branch target is user */ + X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ + + X86_BR_CALL = 1 << 2, /* call */ + X86_BR_RET = 1 << 3, /* return */ + X86_BR_SYSCALL = 1 << 4, /* syscall */ + X86_BR_SYSRET = 1 << 5, /* syscall return */ + X86_BR_INT = 1 << 6, /* sw interrupt */ + X86_BR_IRET = 1 << 7, /* return from interrupt */ + X86_BR_JCC = 1 << 8, /* conditional */ + X86_BR_JMP = 1 << 9, /* jump */ + X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ + X86_BR_IND_CALL = 1 << 11,/* indirect calls */ + X86_BR_ABORT = 1 << 12,/* transaction abort */ + X86_BR_IN_TX = 1 << 13,/* in transaction */ + X86_BR_NO_TX = 1 << 14,/* not in transaction */ + X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ + X86_BR_CALL_STACK = 1 << 16,/* call stack */ + X86_BR_IND_JMP = 1 << 17,/* indirect jump */ + + X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */ + +}; + +#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) +#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX) + +#define X86_BR_ANY \ + (X86_BR_CALL |\ + X86_BR_RET |\ + X86_BR_SYSCALL |\ + X86_BR_SYSRET |\ + X86_BR_INT |\ + X86_BR_IRET |\ + X86_BR_JCC |\ + X86_BR_JMP |\ + X86_BR_IRQ |\ + X86_BR_ABORT |\ + X86_BR_IND_CALL |\ + X86_BR_IND_JMP |\ + X86_BR_ZERO_CALL) + +#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) + +#define X86_BR_ANY_CALL \ + (X86_BR_CALL |\ + X86_BR_IND_CALL |\ + X86_BR_ZERO_CALL |\ + X86_BR_SYSCALL |\ + X86_BR_IRQ |\ + X86_BR_INT) + +int common_branch_type(int type); +int branch_type(unsigned long from, unsigned long to, int abort); +int branch_type_fused(unsigned long from, unsigned long to, int abort, + int *offset); + ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); ssize_t intel_event_sysfs_show(char *page, u64 config); -struct attribute **merge_attr(struct attribute **a, struct attribute **b); - ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); +ssize_t events_hybrid_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page); + +static inline bool fixed_counter_disabled(int i, struct pmu *pmu) +{ + u64 intel_ctrl = hybrid(pmu, intel_ctrl); + + return !(intel_ctrl >> (i + INTEL_PMC_IDX_FIXED)); +} #ifdef CONFIG_CPU_SUP_AMD int amd_pmu_init(void); +int amd_pmu_lbr_init(void); +void amd_pmu_lbr_reset(void); +void amd_pmu_lbr_read(void); +void amd_pmu_lbr_add(struct perf_event *event); +void amd_pmu_lbr_del(struct perf_event *event); +void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in); +void amd_pmu_lbr_enable_all(void); +void amd_pmu_lbr_disable_all(void); +int amd_pmu_lbr_hw_config(struct perf_event *event); + +static __always_inline void __amd_pmu_lbr_disable(void) +{ + u64 dbg_ctl, dbg_extn_cfg; + + rdmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + wrmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); + + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } +} + +#ifdef CONFIG_PERF_EVENTS_AMD_BRS + +#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */ + +int amd_brs_init(void); +void amd_brs_disable(void); +void amd_brs_enable(void); +void amd_brs_enable_all(void); +void amd_brs_disable_all(void); +void amd_brs_drain(void); +void amd_brs_lopwr_init(void); +int amd_brs_hw_config(struct perf_event *event); +void amd_brs_reset(void); + +static inline void amd_pmu_brs_add(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + perf_sched_cb_inc(event->pmu); + cpuc->lbr_users++; + /* + * No need to reset BRS because it is reset + * on brs_enable() and it is saturating + */ +} + +static inline void amd_pmu_brs_del(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + cpuc->lbr_users--; + WARN_ON_ONCE(cpuc->lbr_users < 0); + + perf_sched_cb_dec(event->pmu); +} + +void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in); +#else +static inline int amd_brs_init(void) +{ + return 0; +} +static inline void amd_brs_disable(void) {} +static inline void amd_brs_enable(void) {} +static inline void amd_brs_drain(void) {} +static inline void amd_brs_lopwr_init(void) {} +static inline void amd_brs_disable_all(void) {} +static inline int amd_brs_hw_config(struct perf_event *event) +{ + return 0; +} +static inline void amd_brs_reset(void) {} + +static inline void amd_pmu_brs_add(struct perf_event *event) +{ +} + +static inline void amd_pmu_brs_del(struct perf_event *event) +{ +} + +static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) +{ +} + +static inline void amd_brs_enable_all(void) +{ +} + +#endif + #else /* CONFIG_CPU_SUP_AMD */ static inline int amd_pmu_init(void) @@ -853,11 +1544,32 @@ static inline int amd_pmu_init(void) return 0; } +static inline int amd_brs_init(void) +{ + return -EOPNOTSUPP; +} + +static inline void amd_brs_drain(void) +{ +} + +static inline void amd_brs_enable_all(void) +{ +} + +static inline void amd_brs_disable_all(void) +{ +} #endif /* CONFIG_CPU_SUP_AMD */ +static inline int is_pebs_pt(struct perf_event *event) +{ + return !!(event->hw.flags & PERF_X86_EVENT_PEBS_VIA_PT); +} + #ifdef CONFIG_CPU_SUP_INTEL -static inline bool intel_pmu_has_bts(struct perf_event *event) +static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period) { struct hw_perf_event *hwc = &event->hw; unsigned int hw_event, bts_event; @@ -868,7 +1580,33 @@ static inline bool intel_pmu_has_bts(struct perf_event *event) hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - return hw_event == bts_event && hwc->sample_period == 1; + return hw_event == bts_event && period == 1; +} + +static inline bool intel_pmu_has_bts(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + return intel_pmu_has_bts_period(event, hwc->sample_period); +} + +static __always_inline void __intel_pmu_pebs_disable_all(void) +{ + wrmsrq(MSR_IA32_PEBS_ENABLE, 0); +} + +static __always_inline void __intel_pmu_arch_lbr_disable(void) +{ + wrmsrq(MSR_ARCH_LBR_CTL, 0); +} + +static __always_inline void __intel_pmu_lbr_disable(void) +{ + u64 debugctl; + + rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); + debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); } int intel_pmu_save_and_restart(struct perf_event *event); @@ -877,10 +1615,19 @@ struct event_constraint * x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event); -struct intel_shared_regs *allocate_shared_regs(int cpu); +extern int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu); +extern void intel_cpuc_finish(struct cpu_hw_events *cpuc); int intel_pmu_init(void); +int alloc_arch_pebs_buf_on_cpu(int cpu); + +void release_arch_pebs_buf_on_cpu(int cpu); + +void init_arch_pebs_on_cpu(int cpu); + +void fini_arch_pebs_on_cpu(int cpu); + void init_debug_store_on_cpu(int cpu); void fini_debug_store_on_cpu(int cpu); @@ -889,7 +1636,12 @@ void release_ds_buffers(void); void reserve_ds_buffers(void); +void release_lbr_buffers(void); + +void reserve_lbr_buffers(void); + extern struct event_constraint bts_constraint; +extern struct event_constraint vlbr_constraint; void intel_pmu_enable_bts(u64 config); @@ -897,6 +1649,16 @@ void intel_pmu_disable_bts(void); int intel_pmu_drain_bts_buffer(void); +void intel_pmu_late_setup(void); + +u64 grt_latency_data(struct perf_event *event, u64 status); + +u64 cmt_latency_data(struct perf_event *event, u64 status); + +u64 lnl_latency_data(struct perf_event *event, u64 status); + +u64 arl_h_latency_data(struct perf_event *event, u64 status); + extern struct event_constraint intel_core2_pebs_event_constraints[]; extern struct event_constraint intel_atom_pebs_event_constraints[]; @@ -907,6 +1669,8 @@ extern struct event_constraint intel_glm_pebs_event_constraints[]; extern struct event_constraint intel_glp_pebs_event_constraints[]; +extern struct event_constraint intel_grt_pebs_event_constraints[]; + extern struct event_constraint intel_nehalem_pebs_event_constraints[]; extern struct event_constraint intel_westmere_pebs_event_constraints[]; @@ -921,6 +1685,12 @@ extern struct event_constraint intel_bdw_pebs_event_constraints[]; extern struct event_constraint intel_skl_pebs_event_constraints[]; +extern struct event_constraint intel_icl_pebs_event_constraints[]; + +extern struct event_constraint intel_glc_pebs_event_constraints[]; + +extern struct event_constraint intel_lnc_pebs_event_constraints[]; + struct event_constraint *intel_pebs_constraints(struct perf_event *event); void intel_pmu_pebs_add(struct perf_event *event); @@ -935,18 +1705,31 @@ void intel_pmu_pebs_enable_all(void); void intel_pmu_pebs_disable_all(void); -void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in); +void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); + +void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc); -void intel_pmu_auto_reload_read(struct perf_event *event); +void intel_pmu_drain_pebs_buffer(void); -void intel_ds_init(void); +void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); -void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); +void intel_pebs_init(void); + +void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, + struct cpu_hw_events *cpuc, + struct perf_event *event); + +void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in); u64 lbr_from_signext_quirk_wr(u64 val); void intel_pmu_lbr_reset(void); +void intel_pmu_lbr_reset_32(void); + +void intel_pmu_lbr_reset_64(void); + void intel_pmu_lbr_add(struct perf_event *event); void intel_pmu_lbr_del(struct perf_event *event); @@ -957,6 +1740,14 @@ void intel_pmu_lbr_disable_all(void); void intel_pmu_lbr_read(void); +void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc); + +void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc); + +void intel_pmu_lbr_save(void *ctx); + +void intel_pmu_lbr_restore(void *ctx); + void intel_pmu_lbr_init_core(void); void intel_pmu_lbr_init_nhm(void); @@ -973,10 +1764,28 @@ void intel_pmu_lbr_init_skl(void); void intel_pmu_lbr_init_knl(void); +void intel_pmu_lbr_init(void); + +void intel_pmu_arch_lbr_init(void); + void intel_pmu_pebs_data_source_nhm(void); void intel_pmu_pebs_data_source_skl(bool pmem); +void intel_pmu_pebs_data_source_adl(void); + +void intel_pmu_pebs_data_source_grt(void); + +void intel_pmu_pebs_data_source_mtl(void); + +void intel_pmu_pebs_data_source_arl_h(void); + +void intel_pmu_pebs_data_source_cmt(void); + +void intel_pmu_pebs_data_source_lnl(void); + +u64 intel_get_arch_pebs_data_config(struct perf_event *event); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); @@ -998,6 +1807,22 @@ static inline int is_ht_workaround_enabled(void) return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED); } +static inline u64 intel_pmu_pebs_mask(u64 cntr_mask) +{ + return MAX_PEBS_EVENTS_MASK & cntr_mask; +} + +static inline int intel_pmu_max_num_pebs(struct pmu *pmu) +{ + static_assert(MAX_PEBS_EVENTS == 32); + return fls((u32)hybrid(pmu, pebs_events_mask)); +} + +static inline bool intel_pmu_has_pebs(void) +{ + return x86_pmu.ds_pebs || x86_pmu.arch_pebs; +} + #else /* CONFIG_CPU_SUP_INTEL */ static inline void reserve_ds_buffers(void) @@ -1008,14 +1833,26 @@ static inline void release_ds_buffers(void) { } +static inline void release_lbr_buffers(void) +{ +} + +static inline void reserve_lbr_buffers(void) +{ +} + static inline int intel_pmu_init(void) { return 0; } -static inline struct intel_shared_regs *allocate_shared_regs(int cpu) +static inline int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) +{ + return 0; +} + +static inline void intel_cpuc_finish(struct cpu_hw_events *cpuc) { - return NULL; } static inline int is_ht_workaround_enabled(void) @@ -1023,3 +1860,12 @@ static inline int is_ht_workaround_enabled(void) return 0; } #endif /* CONFIG_CPU_SUP_INTEL */ + +#if ((defined CONFIG_CPU_SUP_CENTAUR) || (defined CONFIG_CPU_SUP_ZHAOXIN)) +int zhaoxin_pmu_init(void); +#else +static inline int zhaoxin_pmu_init(void) +{ + return 0; +} +#endif /*CONFIG_CPU_SUP_CENTAUR or CONFIG_CPU_SUP_ZHAOXIN*/ |
