summaryrefslogtreecommitdiff
path: root/tools/perf/builtin-record.c
diff options
context:
space:
mode:
authorAlexey Budankov <alexey.budankov@linux.intel.com>2018-11-06 12:04:58 +0300
committerArnaldo Carvalho de Melo <acme@redhat.com>2018-12-17 14:55:08 -0300
commitd3d1af6f011a553a00d2bda90b2700c0d56bd8f7 (patch)
tree8f3deb8dcd8b095b3e876fee7d5825c851b10c1c /tools/perf/builtin-record.c
parent0b77383134f3dbb461189a9c4f3b46b20152045d (diff)
perf record: Enable asynchronous trace writing
The trace file offset is read once before mmaps iterating loop and written back after all performance data is enqueued for aio writing. The trace file offset is incremented linearly after every successful aio write operation. record__aio_sync() blocks till completion of the started AIO operation and then proceeds. record__aio_mmap_read_sync() implements a barrier for all incomplete aio write requests. Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@redhat.com> Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/ce2d45e9-d236-871c-7c8f-1bed2d37e8ac@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/builtin-record.c')
-rw-r--r--tools/perf/builtin-record.c218
1 files changed, 214 insertions, 4 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 488779bc4c8d..408d6477c960 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -124,6 +124,183 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse
return 0;
}
+#ifdef HAVE_AIO_SUPPORT
+static int record__aio_write(struct aiocb *cblock, int trace_fd,
+ void *buf, size_t size, off_t off)
+{
+ int rc;
+
+ cblock->aio_fildes = trace_fd;
+ cblock->aio_buf = buf;
+ cblock->aio_nbytes = size;
+ cblock->aio_offset = off;
+ cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
+
+ do {
+ rc = aio_write(cblock);
+ if (rc == 0) {
+ break;
+ } else if (errno != EAGAIN) {
+ cblock->aio_fildes = -1;
+ pr_err("failed to queue perf data, error: %m\n");
+ break;
+ }
+ } while (1);
+
+ return rc;
+}
+
+static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
+{
+ void *rem_buf;
+ off_t rem_off;
+ size_t rem_size;
+ int rc, aio_errno;
+ ssize_t aio_ret, written;
+
+ aio_errno = aio_error(cblock);
+ if (aio_errno == EINPROGRESS)
+ return 0;
+
+ written = aio_ret = aio_return(cblock);
+ if (aio_ret < 0) {
+ if (aio_errno != EINTR)
+ pr_err("failed to write perf data, error: %m\n");
+ written = 0;
+ }
+
+ rem_size = cblock->aio_nbytes - written;
+
+ if (rem_size == 0) {
+ cblock->aio_fildes = -1;
+ /*
+ * md->refcount is incremented in perf_mmap__push() for
+ * every enqueued aio write request so decrement it because
+ * the request is now complete.
+ */
+ perf_mmap__put(md);
+ rc = 1;
+ } else {
+ /*
+ * aio write request may require restart with the
+ * reminder if the kernel didn't write whole
+ * chunk at once.
+ */
+ rem_off = cblock->aio_offset + written;
+ rem_buf = (void *)(cblock->aio_buf + written);
+ record__aio_write(cblock, cblock->aio_fildes,
+ rem_buf, rem_size, rem_off);
+ rc = 0;
+ }
+
+ return rc;
+}
+
+static void record__aio_sync(struct perf_mmap *md)
+{
+ struct aiocb *cblock = &md->aio.cblock;
+ struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
+
+ do {
+ if (cblock->aio_fildes == -1 || record__aio_complete(md, cblock))
+ return;
+
+ while (aio_suspend((const struct aiocb**)&cblock, 1, &timeout)) {
+ if (!(errno == EAGAIN || errno == EINTR))
+ pr_err("failed to sync perf data, error: %m\n");
+ }
+ } while (1);
+}
+
+static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
+{
+ struct record *rec = to;
+ int ret, trace_fd = rec->session->data->file.fd;
+
+ rec->samples++;
+
+ ret = record__aio_write(cblock, trace_fd, bf, size, off);
+ if (!ret) {
+ rec->bytes_written += size;
+ if (switch_output_size(rec))
+ trigger_hit(&switch_output_trigger);
+ }
+
+ return ret;
+}
+
+static off_t record__aio_get_pos(int trace_fd)
+{
+ return lseek(trace_fd, 0, SEEK_CUR);
+}
+
+static void record__aio_set_pos(int trace_fd, off_t pos)
+{
+ lseek(trace_fd, pos, SEEK_SET);
+}
+
+static void record__aio_mmap_read_sync(struct record *rec)
+{
+ int i;
+ struct perf_evlist *evlist = rec->evlist;
+ struct perf_mmap *maps = evlist->mmap;
+
+ if (!rec->opts.nr_cblocks)
+ return;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ struct perf_mmap *map = &maps[i];
+
+ if (map->base)
+ record__aio_sync(map);
+ }
+}
+
+static int nr_cblocks_default = 1;
+
+static int record__aio_parse(const struct option *opt,
+ const char *str __maybe_unused,
+ int unset)
+{
+ struct record_opts *opts = (struct record_opts *)opt->value;
+
+ if (unset)
+ opts->nr_cblocks = 0;
+ else
+ opts->nr_cblocks = nr_cblocks_default;
+
+ return 0;
+}
+#else /* HAVE_AIO_SUPPORT */
+static void record__aio_sync(struct perf_mmap *md __maybe_unused)
+{
+}
+
+static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
+ void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
+{
+ return -1;
+}
+
+static off_t record__aio_get_pos(int trace_fd __maybe_unused)
+{
+ return -1;
+}
+
+static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
+{
+}
+
+static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
+{
+}
+#endif
+
+static int record__aio_enabled(struct record *rec)
+{
+ return rec->opts.nr_cblocks > 0;
+}
+
static int process_synthesized_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@@ -329,7 +506,7 @@ static int record__mmap_evlist(struct record *rec,
if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
opts->auxtrace_mmap_pages,
- opts->auxtrace_snapshot_mode) < 0) {
+ opts->auxtrace_snapshot_mode, opts->nr_cblocks) < 0) {
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
"Consider increasing "
@@ -525,6 +702,8 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
int i;
int rc = 0;
struct perf_mmap *maps;
+ int trace_fd = rec->data.file.fd;
+ off_t off;
if (!evlist)
return 0;
@@ -536,13 +715,29 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
return 0;
+ if (record__aio_enabled(rec))
+ off = record__aio_get_pos(trace_fd);
+
for (i = 0; i < evlist->nr_mmaps; i++) {
struct perf_mmap *map = &maps[i];
if (map->base) {
- if (perf_mmap__push(map, rec, record__pushfn) != 0) {
- rc = -1;
- goto out;
+ if (!record__aio_enabled(rec)) {
+ if (perf_mmap__push(map, rec, record__pushfn) != 0) {
+ rc = -1;
+ goto out;
+ }
+ } else {
+ /*
+ * Call record__aio_sync() to wait till map->data buffer
+ * becomes available after previous aio write request.
+ */
+ record__aio_sync(map);
+ if (perf_mmap__aio_push(map, rec, record__aio_pushfn, &off) != 0) {
+ record__aio_set_pos(trace_fd, off);
+ rc = -1;
+ goto out;
+ }
}
}
@@ -553,6 +748,9 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
}
}
+ if (record__aio_enabled(rec))
+ record__aio_set_pos(trace_fd, off);
+
/*
* Mark the round finished in case we wrote
* at least one event.
@@ -658,6 +856,8 @@ record__switch_output(struct record *rec, bool at_exit)
/* Same Size: "2015122520103046"*/
char timestamp[] = "InvalidTimestamp";
+ record__aio_mmap_read_sync(rec);
+
record__synthesize(rec, true);
if (target__none(&rec->opts.target))
record__synthesize_workload(rec, true);
@@ -1168,6 +1368,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__synthesize_workload(rec, true);
out_child:
+ record__aio_mmap_read_sync(rec);
+
if (forks) {
int exit_status;
@@ -1706,6 +1908,11 @@ static struct option __record_options[] = {
"signal"),
OPT_BOOLEAN(0, "dry-run", &dry_run,
"Parse options then exit"),
+#ifdef HAVE_AIO_SUPPORT
+ OPT_CALLBACK_NOOPT(0, "aio", &record.opts,
+ NULL, "Enable asynchronous trace writing mode",
+ record__aio_parse),
+#endif
OPT_END()
};
@@ -1898,6 +2105,9 @@ int cmd_record(int argc, const char **argv)
goto out;
}
+ if (verbose > 0)
+ pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
+
err = __cmd_record(&record, argc, argv);
out:
perf_evlist__delete(rec->evlist);