summaryrefslogtreecommitdiff
path: root/include/linux/perf_event.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/perf_event.h')
-rw-r--r--include/linux/perf_event.h172
1 files changed, 128 insertions, 44 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d2a15c0c6f8a..8333f132f4a9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -168,6 +168,15 @@ struct hw_perf_event {
struct hw_perf_event_extra extra_reg;
struct hw_perf_event_extra branch_reg;
};
+ struct { /* aux / Intel-PT */
+ u64 aux_config;
+ /*
+ * For AUX area events, aux_paused cannot be a state
+ * flag because it can be updated asynchronously to
+ * state.
+ */
+ unsigned int aux_paused;
+ };
struct { /* software */
struct hrtimer hrtimer;
};
@@ -291,6 +300,20 @@ struct perf_event_pmu_context;
#define PERF_PMU_CAP_NO_EXCLUDE 0x0040
#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
+#define PERF_PMU_CAP_AUX_PAUSE 0x0200
+
+/**
+ * pmu::scope
+ */
+enum perf_pmu_scope {
+ PERF_PMU_SCOPE_NONE = 0,
+ PERF_PMU_SCOPE_CORE,
+ PERF_PMU_SCOPE_DIE,
+ PERF_PMU_SCOPE_CLUSTER,
+ PERF_PMU_SCOPE_PKG,
+ PERF_PMU_SCOPE_SYS_WIDE,
+ PERF_PMU_MAX_SCOPE,
+};
struct perf_output_handle;
@@ -315,6 +338,11 @@ struct pmu {
*/
int capabilities;
+ /*
+ * PMU scope
+ */
+ unsigned int scope;
+
int __percpu *pmu_disable_count;
struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
@@ -363,6 +391,8 @@ struct pmu {
#define PERF_EF_START 0x01 /* start the counter when adding */
#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
+#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */
+#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */
/*
* Adds/Removes a counter to/from the PMU, can be done inside a
@@ -402,6 +432,18 @@ struct pmu {
*
* ->start() with PERF_EF_RELOAD will reprogram the counter
* value, must be preceded by a ->stop() with PERF_EF_UPDATE.
+ *
+ * ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not
+ * overlap another ->stop() with PERF_EF_PAUSE nor ->start() with
+ * PERF_EF_RESUME.
+ *
+ * ->start() with PERF_EF_RESUME will start as simply as possible but
+ * only if the counter is not otherwise stopped. Will not overlap
+ * another ->start() with PERF_EF_RESUME nor ->stop() with
+ * PERF_EF_PAUSE.
+ *
+ * Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other
+ * ->stop()/->start() invocations, just not itself.
*/
void (*start) (struct perf_event *event, int flags);
void (*stop) (struct perf_event *event, int flags);
@@ -615,10 +657,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
* PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
* cannot be a group leader. If an event with this flag is detached from the
* group it is scheduled out and moved into an unrecoverable ERROR state.
+ * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
+ * PMU scope where it is active.
*/
#define PERF_EV_CAP_SOFTWARE BIT(0)
#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1)
#define PERF_EV_CAP_SIBLING BIT(2)
+#define PERF_EV_CAP_READ_SCOPE BIT(3)
#define SWEVENT_HLIST_BITS 8
#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
@@ -781,11 +826,12 @@ struct perf_event {
unsigned int pending_wakeup;
unsigned int pending_kill;
unsigned int pending_disable;
- unsigned int pending_sigtrap;
unsigned long pending_addr; /* SIGTRAP */
struct irq_work pending_irq;
+ struct irq_work pending_disable_irq;
struct callback_head pending_task;
unsigned int pending_work;
+ struct rcuwait pending_work_wait;
atomic_t event_limit;
@@ -809,11 +855,8 @@ struct perf_event {
u64 (*clock)(void);
perf_overflow_handler_t overflow_handler;
void *overflow_handler_context;
-#ifdef CONFIG_BPF_SYSCALL
- perf_overflow_handler_t orig_overflow_handler;
struct bpf_prog *prog;
u64 bpf_cookie;
-#endif
#ifdef CONFIG_EVENT_TRACING
struct trace_event_call *tp_event;
@@ -883,6 +926,7 @@ struct perf_event_pmu_context {
unsigned int nr_events;
unsigned int nr_cgroups;
+ unsigned int nr_freq;
atomic_t refcount; /* event <-> epc */
struct rcu_head rcu_head;
@@ -897,6 +941,11 @@ struct perf_event_pmu_context {
int rotate_necessary;
};
+static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc)
+{
+ return !list_empty(&epc->flexible_active) || !list_empty(&epc->pinned_active);
+}
+
struct perf_event_groups {
struct rb_root tree;
u64 index;
@@ -959,20 +1008,18 @@ struct perf_event_context {
struct rcu_head rcu_head;
/*
- * Sum (event->pending_sigtrap + event->pending_work)
+ * The count of events for which using the switch-out fast path
+ * should be avoided.
+ *
+ * Sum (event->pending_work + events with
+ * (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)))
*
* The SIGTRAP is targeted at ctx->task, as such it won't do changing
* that until the signal is delivered.
*/
- local_t nr_pending;
+ local_t nr_no_switch_fast;
};
-/*
- * Number of contexts where an event can trigger:
- * task, softirq, hardirq, nmi.
- */
-#define PERF_NR_CONTEXTS 4
-
struct perf_cpu_pmu_context {
struct perf_event_pmu_context epc;
struct perf_event_pmu_context *task_epc;
@@ -1232,6 +1279,11 @@ static inline void perf_sample_save_callchain(struct perf_sample_data *data,
{
int size = 1;
+ if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
+ return;
+ if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_CALLCHAIN))
+ return;
+
data->callchain = perf_callchain(event, regs);
size += data->callchain->nr;
@@ -1240,12 +1292,18 @@ static inline void perf_sample_save_callchain(struct perf_sample_data *data,
}
static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
+ struct perf_event *event,
struct perf_raw_record *raw)
{
struct perf_raw_frag *frag = &raw->frag;
u32 sum = 0;
int size;
+ if (!(event->attr.sample_type & PERF_SAMPLE_RAW))
+ return;
+ if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_RAW))
+ return;
+
do {
sum += frag->size;
if (perf_raw_frag_last(frag))
@@ -1262,6 +1320,11 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
data->sample_flags |= PERF_SAMPLE_RAW;
}
+static inline bool has_branch_stack(struct perf_event *event)
+{
+ return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
+}
+
static inline void perf_sample_save_brstack(struct perf_sample_data *data,
struct perf_event *event,
struct perf_branch_stack *brs,
@@ -1269,6 +1332,11 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data,
{
int size = sizeof(u64); /* nr */
+ if (!has_branch_stack(event))
+ return;
+ if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_BRANCH_STACK))
+ return;
+
if (branch_sample_hw_index(event))
size += sizeof(u64);
size += brs->nr * sizeof(struct perf_branch_entry);
@@ -1342,8 +1410,10 @@ extern int perf_event_output(struct perf_event *event,
struct pt_regs *regs);
static inline bool
-__is_default_overflow_handler(perf_overflow_handler_t overflow_handler)
+is_default_overflow_handler(struct perf_event *event)
{
+ perf_overflow_handler_t overflow_handler = event->overflow_handler;
+
if (likely(overflow_handler == perf_event_output_forward))
return true;
if (unlikely(overflow_handler == perf_event_output_backward))
@@ -1351,22 +1421,6 @@ __is_default_overflow_handler(perf_overflow_handler_t overflow_handler)
return false;
}
-#define is_default_overflow_handler(event) \
- __is_default_overflow_handler((event)->overflow_handler)
-
-#ifdef CONFIG_BPF_SYSCALL
-static inline bool uses_default_overflow_handler(struct perf_event *event)
-{
- if (likely(is_default_overflow_handler(event)))
- return true;
-
- return __is_default_overflow_handler(event->orig_overflow_handler);
-}
-#else
-#define uses_default_overflow_handler(event) \
- is_default_overflow_handler(event)
-#endif
-
extern void
perf_event_header__init_id(struct perf_event_header *header,
struct perf_sample_data *data,
@@ -1598,11 +1652,11 @@ extern int sysctl_perf_cpu_time_max_percent;
extern void perf_sample_event_took(u64 sample_len_ns);
-int perf_event_max_sample_rate_handler(struct ctl_table *table, int write,
+int perf_event_max_sample_rate_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
+int perf_cpu_time_max_percent_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-int perf_event_max_stack_handler(struct ctl_table *table, int write,
+int perf_event_max_stack_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
/* Access to perf_event_open(2) syscall. */
@@ -1618,13 +1672,7 @@ static inline int perf_is_paranoid(void)
return sysctl_perf_event_paranoid > -1;
}
-static inline int perf_allow_kernel(struct perf_event_attr *attr)
-{
- if (sysctl_perf_event_paranoid > 1 && !perfmon_capable())
- return -EACCES;
-
- return security_perf_event_open(attr, PERF_SECURITY_KERNEL);
-}
+int perf_allow_kernel(struct perf_event_attr *attr);
static inline int perf_allow_cpu(struct perf_event_attr *attr)
{
@@ -1642,6 +1690,8 @@ static inline int perf_allow_tracepoint(struct perf_event_attr *attr)
return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT);
}
+extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs);
+
extern void perf_event_init(void);
extern void perf_tp_event(u16 event_type, u64 count, void *record,
int entry_size, struct pt_regs *regs,
@@ -1649,19 +1699,34 @@ extern void perf_tp_event(u16 event_type, u64 count, void *record,
struct task_struct *task);
extern void perf_bp_event(struct perf_event *event, void *data);
-#ifndef perf_misc_flags
-# define perf_misc_flags(regs) \
+extern unsigned long perf_misc_flags(struct perf_event *event, struct pt_regs *regs);
+extern unsigned long perf_instruction_pointer(struct perf_event *event,
+ struct pt_regs *regs);
+
+#ifndef perf_arch_misc_flags
+# define perf_arch_misc_flags(regs) \
(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
-# define perf_instruction_pointer(regs) instruction_pointer(regs)
+# define perf_arch_instruction_pointer(regs) instruction_pointer(regs)
#endif
#ifndef perf_arch_bpf_user_pt_regs
# define perf_arch_bpf_user_pt_regs(regs) regs
#endif
-static inline bool has_branch_stack(struct perf_event *event)
+#ifndef perf_arch_guest_misc_flags
+static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
{
- return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
+ unsigned long guest_state = perf_guest_state();
+
+ if (!(guest_state & PERF_GUEST_ACTIVE))
+ return 0;
+
+ if (guest_state & PERF_GUEST_USER)
+ return PERF_RECORD_MISC_GUEST_USER;
+ else
+ return PERF_RECORD_MISC_GUEST_KERNEL;
}
+# define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
+#endif
static inline bool needs_branch_stack(struct perf_event *event)
{
@@ -1673,6 +1738,13 @@ static inline bool has_aux(struct perf_event *event)
return event->pmu->setup_aux;
}
+static inline bool has_aux_action(struct perf_event *event)
+{
+ return event->attr.aux_sample_size ||
+ event->attr.aux_pause ||
+ event->attr.aux_resume;
+}
+
static inline bool is_write_backward(struct perf_event *event)
{
return !!event->attr.write_backward;
@@ -1697,6 +1769,14 @@ perf_event_addr_filters(struct perf_event *event)
return ifh;
}
+static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
+{
+ /* Only the parent has fasync state */
+ if (event->parent)
+ event = event->parent;
+ return &event->fasync;
+}
+
extern void perf_event_addr_filters_sync(struct perf_event *event);
extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
@@ -1817,6 +1897,10 @@ static inline u64 perf_event_pause(struct perf_event *event, bool reset)
{
return 0;
}
+static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs)
+{
+ return 0;
+}
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)