summaryrefslogtreecommitdiff
path: root/tools/perf/util/mmap.c
diff options
context:
space:
mode:
authorAlexey Budankov <alexey.budankov@linux.intel.com>2018-11-06 12:04:58 +0300
committerArnaldo Carvalho de Melo <acme@redhat.com>2018-12-17 14:55:08 -0300
commitd3d1af6f011a553a00d2bda90b2700c0d56bd8f7 (patch)
tree8f3deb8dcd8b095b3e876fee7d5825c851b10c1c /tools/perf/util/mmap.c
parent0b77383134f3dbb461189a9c4f3b46b20152045d (diff)
perf record: Enable asynchronous trace writing
The trace file offset is read once before mmaps iterating loop and written back after all performance data is enqueued for aio writing. The trace file offset is incremented linearly after every successful aio write operation. record__aio_sync() blocks till completion of the started AIO operation and then proceeds. record__aio_mmap_read_sync() implements a barrier for all incomplete aio write requests. Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@redhat.com> Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/ce2d45e9-d236-871c-7c8f-1bed2d37e8ac@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util/mmap.c')
-rw-r--r--tools/perf/util/mmap.c77
1 files changed, 76 insertions, 1 deletions
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 47cdc3ad6546..61aa381d05d0 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -158,7 +158,8 @@ static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
{
int delta_max;
- if (mp->nr_cblocks) {
+ map->aio.nr_cblocks = mp->nr_cblocks;
+ if (map->aio.nr_cblocks) {
map->aio.data = malloc(perf_mmap__mmap_len(map));
if (!map->aio.data) {
pr_debug2("failed to allocate data buffer, error %m\n");
@@ -187,6 +188,80 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map)
if (map->aio.data)
zfree(&map->aio.data);
}
+
+int perf_mmap__aio_push(struct perf_mmap *md, void *to,
+ int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
+ off_t *off)
+{
+ u64 head = perf_mmap__read_head(md);
+ unsigned char *data = md->base + page_size;
+ unsigned long size, size0 = 0;
+ void *buf;
+ int rc = 0;
+
+ rc = perf_mmap__read_init(md);
+ if (rc < 0)
+ return (rc == -EAGAIN) ? 0 : -1;
+
+ /*
+ * md->base data is copied into md->data buffer to
+ * release space in the kernel buffer as fast as possible,
+ * thru perf_mmap__consume() below.
+ *
+ * That lets the kernel to proceed with storing more
+ * profiling data into the kernel buffer earlier than other
+ * per-cpu kernel buffers are handled.
+ *
+ * Coping can be done in two steps in case the chunk of
+ * profiling data crosses the upper bound of the kernel buffer.
+ * In this case we first move part of data from md->start
+ * till the upper bound and then the reminder from the
+ * beginning of the kernel buffer till the end of
+ * the data chunk.
+ */
+
+ size = md->end - md->start;
+
+ if ((md->start & md->mask) + size != (md->end & md->mask)) {
+ buf = &data[md->start & md->mask];
+ size = md->mask + 1 - (md->start & md->mask);
+ md->start += size;
+ memcpy(md->aio.data, buf, size);
+ size0 = size;
+ }
+
+ buf = &data[md->start & md->mask];
+ size = md->end - md->start;
+ md->start += size;
+ memcpy(md->aio.data + size0, buf, size);
+
+ /*
+ * Increment md->refcount to guard md->data buffer
+ * from premature deallocation because md object can be
+ * released earlier than aio write request started
+ * on mmap->data is complete.
+ *
+ * perf_mmap__put() is done at record__aio_complete()
+ * after started request completion.
+ */
+ perf_mmap__get(md);
+
+ md->prev = head;
+ perf_mmap__consume(md);
+
+ rc = push(to, &md->aio.cblock, md->aio.data, size0 + size, *off);
+ if (!rc) {
+ *off += size0 + size;
+ } else {
+ /*
+ * Decrement md->refcount back if aio write
+ * operation failed to start.
+ */
+ perf_mmap__put(md);
+ }
+
+ return rc;
+}
#else
static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused,
struct mmap_params *mp __maybe_unused)