summaryrefslogtreecommitdiff
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c142
1 files changed, 117 insertions, 25 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 68744c51517e..2c0ee6484990 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -287,17 +287,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
#define TS_MASK ((1ULL << TS_SHIFT) - 1)
#define TS_DELTA_TEST (~TS_MASK)
-/**
- * ring_buffer_event_time_stamp - return the event's extended timestamp
- * @event: the event to get the timestamp of
- *
- * Returns the extended timestamp associated with a data event.
- * An extended time_stamp is a 64-bit timestamp represented
- * internally in a special way that makes the best use of space
- * contained within a ring buffer event. This function decodes
- * it and maps it to a straight u64 value.
- */
-u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event)
+static u64 rb_event_time_stamp(struct ring_buffer_event *event)
{
u64 ts;
@@ -487,6 +477,8 @@ struct rb_time_struct {
#endif
typedef struct rb_time_struct rb_time_t;
+#define MAX_NEST 5
+
/*
* head_page == tail_page && head == tail then buffer is empty.
*/
@@ -524,6 +516,7 @@ struct ring_buffer_per_cpu {
unsigned long read_bytes;
rb_time_t write_stamp;
rb_time_t before_stamp;
+ u64 event_stamp[MAX_NEST];
u64 read_stamp;
/* ring buffer pages to update, > 0 to add, < 0 to remove */
long nr_pages_to_update;
@@ -749,6 +742,99 @@ static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
}
#endif
+/*
+ * Enable this to make sure that the event passed to
+ * ring_buffer_event_time_stamp() is not committed and also
+ * is on the buffer that it passed in.
+ */
+//#define RB_VERIFY_EVENT
+#ifdef RB_VERIFY_EVENT
+static struct list_head *rb_list_head(struct list_head *list);
+static void verify_event(struct ring_buffer_per_cpu *cpu_buffer,
+ void *event)
+{
+ struct buffer_page *page = cpu_buffer->commit_page;
+ struct buffer_page *tail_page = READ_ONCE(cpu_buffer->tail_page);
+ struct list_head *next;
+ long commit, write;
+ unsigned long addr = (unsigned long)event;
+ bool done = false;
+ int stop = 0;
+
+ /* Make sure the event exists and is not committed yet */
+ do {
+ if (page == tail_page || WARN_ON_ONCE(stop++ > 100))
+ done = true;
+ commit = local_read(&page->page->commit);
+ write = local_read(&page->write);
+ if (addr >= (unsigned long)&page->page->data[commit] &&
+ addr < (unsigned long)&page->page->data[write])
+ return;
+
+ next = rb_list_head(page->list.next);
+ page = list_entry(next, struct buffer_page, list);
+ } while (!done);
+ WARN_ON_ONCE(1);
+}
+#else
+static inline void verify_event(struct ring_buffer_per_cpu *cpu_buffer,
+ void *event)
+{
+}
+#endif
+
+
+static inline u64 rb_time_stamp(struct trace_buffer *buffer);
+
+/**
+ * ring_buffer_event_time_stamp - return the event's current time stamp
+ * @buffer: The buffer that the event is on
+ * @event: the event to get the time stamp of
+ *
+ * Note, this must be called after @event is reserved, and before it is
+ * committed to the ring buffer. And must be called from the same
+ * context where the event was reserved (normal, softirq, irq, etc).
+ *
+ * Returns the time stamp associated with the current event.
+ * If the event has an extended time stamp, then that is used as
+ * the time stamp to return.
+ * In the highly unlikely case that the event was nested more than
+ * the max nesting, then the write_stamp of the buffer is returned,
+ * otherwise current time is returned, but that really neither of
+ * the last two cases should ever happen.
+ */
+u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer,
+ struct ring_buffer_event *event)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[smp_processor_id()];
+ unsigned int nest;
+ u64 ts;
+
+ /* If the event includes an absolute time, then just use that */
+ if (event->type_len == RINGBUF_TYPE_TIME_STAMP)
+ return rb_event_time_stamp(event);
+
+ nest = local_read(&cpu_buffer->committing);
+ verify_event(cpu_buffer, event);
+ if (WARN_ON_ONCE(!nest))
+ goto fail;
+
+ /* Read the current saved nesting level time stamp */
+ if (likely(--nest < MAX_NEST))
+ return cpu_buffer->event_stamp[nest];
+
+ /* Shouldn't happen, warn if it does */
+ WARN_ONCE(1, "nest (%d) greater than max", nest);
+
+ fail:
+ /* Can only fail on 32 bit */
+ if (!rb_time_read(&cpu_buffer->write_stamp, &ts))
+ /* Screw it, just read the current time */
+ ts = rb_time_stamp(cpu_buffer->buffer);
+
+ return ts;
+}
+
/**
* ring_buffer_nr_pages - get the number of buffer pages in the ring buffer
* @buffer: The ring_buffer to get the number of pages from
@@ -994,7 +1080,7 @@ static inline u64 rb_time_stamp(struct trace_buffer *buffer)
return ts << DEBUG_SHIFT;
}
-u64 ring_buffer_time_stamp(struct trace_buffer *buffer, int cpu)
+u64 ring_buffer_time_stamp(struct trace_buffer *buffer)
{
u64 time;
@@ -2710,6 +2796,10 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
{
unsigned length = info->length;
u64 delta = info->delta;
+ unsigned int nest = local_read(&cpu_buffer->committing) - 1;
+
+ if (!WARN_ON_ONCE(nest >= MAX_NEST))
+ cpu_buffer->event_stamp[nest] = info->ts;
/*
* If we need to add a timestamp, then we
@@ -2766,7 +2856,7 @@ static u64 rb_time_delta(struct ring_buffer_event *event)
return 0;
case RINGBUF_TYPE_TIME_EXTEND:
- return ring_buffer_event_time_stamp(event);
+ return rb_event_time_stamp(event);
case RINGBUF_TYPE_TIME_STAMP:
return 0;
@@ -3064,7 +3154,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* is called before preempt_count() is updated, since the check will
* be on the NORMAL bit, the TRANSITION bit will then be set. If an
* NMI then comes in, it will set the NMI bit, but when the NMI code
- * does the trace_recursive_unlock() it will clear the TRANSTION bit
+ * does the trace_recursive_unlock() it will clear the TRANSITION bit
* and leave the NMI bit set. But this is fine, because the interrupt
* code that set the TRANSITION bit will then clear the NMI bit when it
* calls trace_recursive_unlock(). If another NMI comes in, it will
@@ -3212,13 +3302,13 @@ static void dump_buffer_page(struct buffer_data_page *bpage,
switch (event->type_len) {
case RINGBUF_TYPE_TIME_EXTEND:
- delta = ring_buffer_event_time_stamp(event);
+ delta = rb_event_time_stamp(event);
ts += delta;
pr_warn(" [%lld] delta:%lld TIME EXTEND\n", ts, delta);
break;
case RINGBUF_TYPE_TIME_STAMP:
- delta = ring_buffer_event_time_stamp(event);
+ delta = rb_event_time_stamp(event);
ts = delta;
pr_warn(" [%lld] absolute:%lld TIME STAMP\n", ts, delta);
break;
@@ -3289,12 +3379,12 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
switch (event->type_len) {
case RINGBUF_TYPE_TIME_EXTEND:
- delta = ring_buffer_event_time_stamp(event);
+ delta = rb_event_time_stamp(event);
ts += delta;
break;
case RINGBUF_TYPE_TIME_STAMP:
- delta = ring_buffer_event_time_stamp(event);
+ delta = rb_event_time_stamp(event);
ts = delta;
break;
@@ -3451,7 +3541,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
info->after, ts)) {
/* Nothing came after this event between C and E */
info->delta = ts - info->after;
- info->ts = ts;
} else {
/*
* Interrupted between C and E:
@@ -3463,6 +3552,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
*/
info->delta = 0;
}
+ info->ts = ts;
info->add_timestamp &= ~RB_ADD_STAMP_FORCE;
}
@@ -4256,12 +4346,12 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
return;
case RINGBUF_TYPE_TIME_EXTEND:
- delta = ring_buffer_event_time_stamp(event);
+ delta = rb_event_time_stamp(event);
cpu_buffer->read_stamp += delta;
return;
case RINGBUF_TYPE_TIME_STAMP:
- delta = ring_buffer_event_time_stamp(event);
+ delta = rb_event_time_stamp(event);
cpu_buffer->read_stamp = delta;
return;
@@ -4286,12 +4376,12 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
return;
case RINGBUF_TYPE_TIME_EXTEND:
- delta = ring_buffer_event_time_stamp(event);
+ delta = rb_event_time_stamp(event);
iter->read_stamp += delta;
return;
case RINGBUF_TYPE_TIME_STAMP:
- delta = ring_buffer_event_time_stamp(event);
+ delta = rb_event_time_stamp(event);
iter->read_stamp = delta;
return;
@@ -4544,7 +4634,7 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
case RINGBUF_TYPE_TIME_STAMP:
if (ts) {
- *ts = ring_buffer_event_time_stamp(event);
+ *ts = rb_event_time_stamp(event);
ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
cpu_buffer->cpu, ts);
}
@@ -4635,7 +4725,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
case RINGBUF_TYPE_TIME_STAMP:
if (ts) {
- *ts = ring_buffer_event_time_stamp(event);
+ *ts = rb_event_time_stamp(event);
ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
cpu_buffer->cpu, ts);
}
@@ -5021,6 +5111,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
rb_time_set(&cpu_buffer->write_stamp, 0);
rb_time_set(&cpu_buffer->before_stamp, 0);
+ memset(cpu_buffer->event_stamp, 0, sizeof(cpu_buffer->event_stamp));
+
cpu_buffer->lost_events = 0;
cpu_buffer->last_overrun = 0;