diff options
Diffstat (limited to 'kernel/trace/blktrace.c')
-rw-r--r-- | kernel/trace/blktrace.c | 558 |
1 files changed, 240 insertions, 318 deletions
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 5ef0484513ec..3679a6d18934 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -34,7 +34,7 @@ static struct trace_array *blk_tr; static bool blk_tracer_enabled __read_mostly; static LIST_HEAD(running_trace_list); -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock); +static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(running_trace_lock); /* Select an alternative, minimalistic output than the original one */ #define TRACE_BLK_OPT_CLASSIC 0x1 @@ -72,17 +72,17 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action, struct blk_io_trace *t; struct ring_buffer_event *event = NULL; struct trace_buffer *buffer = NULL; - int pc = 0; + unsigned int trace_ctx = 0; int cpu = smp_processor_id(); bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; if (blk_tracer) { buffer = blk_tr->array_buffer.buffer; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + len + cgid_len, - 0, pc); + trace_ctx); if (!event) return; t = ring_buffer_event_data(event); @@ -107,7 +107,7 @@ record_it: memcpy((void *) t + sizeof(*t) + cgid_len, data, len); if (blk_tracer) - trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); } } @@ -121,12 +121,12 @@ static void trace_note_tsk(struct task_struct *tsk) struct blk_trace *bt; tsk->btrace_seq = blktrace_seq; - spin_lock_irqsave(&running_trace_lock, flags); + raw_spin_lock_irqsave(&running_trace_lock, flags); list_for_each_entry(bt, &running_trace_list, running_list) { trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm), 0); } - spin_unlock_irqrestore(&running_trace_lock, flags); + raw_spin_unlock_irqrestore(&running_trace_lock, flags); } static void trace_note_time(struct blk_trace *bt) @@ -145,13 +145,14 @@ static void trace_note_time(struct blk_trace *bt) local_irq_restore(flags); } -void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg, - const char *fmt, ...) +void __blk_trace_note_message(struct blk_trace *bt, + struct cgroup_subsys_state *css, const char *fmt, ...) { int n; va_list args; unsigned long flags; char *buf; + u64 cgid = 0; if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer_enabled)) @@ -170,17 +171,16 @@ void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg, n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); va_end(args); - if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) - blkcg = NULL; #ifdef CONFIG_BLK_CGROUP - trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, - blkcg ? cgroup_id(blkcg->css.cgroup) : 1); -#else - trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0); + if (css && (blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) + cgid = cgroup_id(css->cgroup); + else + cgid = 1; #endif + trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, cgid); local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(__trace_note_message); +EXPORT_SYMBOL_GPL(__blk_trace_note_message); static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, pid_t pid) @@ -205,7 +205,7 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), #define BLK_TC_PREFLUSH BLK_TC_FLUSH /* The ilog2() calls fall out because they're constant */ -#define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \ +#define MASK_TC_BIT(rw, __name) ((__force u32)(rw & REQ_ ## __name) << \ (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name)) /* @@ -213,8 +213,8 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), * blk_io_trace structure and places it in a per-cpu subbuffer. */ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, - int op, int op_flags, u32 what, int error, int pdu_len, - void *pdu_data, u64 cgid) + const blk_opf_t opf, u32 what, int error, + int pdu_len, void *pdu_data, u64 cgid) { struct task_struct *tsk = current; struct ring_buffer_event *event = NULL; @@ -222,20 +222,22 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, struct blk_io_trace *t; unsigned long flags = 0; unsigned long *sequence; + unsigned int trace_ctx = 0; pid_t pid; - int cpu, pc = 0; + int cpu; bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; + const enum req_op op = opf & REQ_OP_MASK; if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) return; what |= ddir_act[op_is_write(op) ? WRITE : READ]; - what |= MASK_TC_BIT(op_flags, SYNC); - what |= MASK_TC_BIT(op_flags, RAHEAD); - what |= MASK_TC_BIT(op_flags, META); - what |= MASK_TC_BIT(op_flags, PREFLUSH); - what |= MASK_TC_BIT(op_flags, FUA); + what |= MASK_TC_BIT(opf, SYNC); + what |= MASK_TC_BIT(opf, RAHEAD); + what |= MASK_TC_BIT(opf, META); + what |= MASK_TC_BIT(opf, PREFLUSH); + what |= MASK_TC_BIT(opf, FUA); if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE) what |= BLK_TC_ACT(BLK_TC_DISCARD); if (op == REQ_OP_FLUSH) @@ -252,10 +254,10 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, tracing_record_cmdline(current); buffer = blk_tr->array_buffer.buffer; - pc = preempt_count(); + trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, sizeof(*t) + pdu_len + cgid_len, - 0, pc); + trace_ctx); if (!event) return; t = ring_buffer_event_data(event); @@ -301,7 +303,7 @@ record_it: memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); if (blk_tracer) { - trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); + trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); return; } } @@ -309,12 +311,20 @@ record_it: local_irq_restore(flags); } -static void blk_trace_free(struct blk_trace *bt) +static void blk_trace_free(struct request_queue *q, struct blk_trace *bt) { - debugfs_remove(bt->msg_file); - debugfs_remove(bt->dropped_file); relay_close(bt->rchan); - debugfs_remove(bt->dir); + + /* + * If 'bt->dir' is not set, then both 'dropped' and 'msg' are created + * under 'q->debugfs_dir', thus lookup and remove them. + */ + if (!bt->dir) { + debugfs_lookup_and_remove("dropped", q->debugfs_dir); + debugfs_lookup_and_remove("msg", q->debugfs_dir); + } else { + debugfs_remove(bt->dir); + } free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); @@ -336,10 +346,42 @@ static void put_probe_ref(void) mutex_unlock(&blk_probe_mutex); } -static void blk_trace_cleanup(struct blk_trace *bt) +static int blk_trace_start(struct blk_trace *bt) +{ + if (bt->trace_state != Blktrace_setup && + bt->trace_state != Blktrace_stopped) + return -EINVAL; + + blktrace_seq++; + smp_mb(); + bt->trace_state = Blktrace_running; + raw_spin_lock_irq(&running_trace_lock); + list_add(&bt->running_list, &running_trace_list); + raw_spin_unlock_irq(&running_trace_lock); + trace_note_time(bt); + + return 0; +} + +static int blk_trace_stop(struct blk_trace *bt) +{ + if (bt->trace_state != Blktrace_running) + return -EINVAL; + + bt->trace_state = Blktrace_stopped; + raw_spin_lock_irq(&running_trace_lock); + list_del_init(&bt->running_list); + raw_spin_unlock_irq(&running_trace_lock); + relay_flush(bt->rchan); + + return 0; +} + +static void blk_trace_cleanup(struct request_queue *q, struct blk_trace *bt) { + blk_trace_stop(bt); synchronize_rcu(); - blk_trace_free(bt); + blk_trace_free(q, bt); put_probe_ref(); } @@ -348,12 +390,11 @@ static int __blk_trace_remove(struct request_queue *q) struct blk_trace *bt; bt = rcu_replace_pointer(q->blk_trace, NULL, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (!bt) return -EINVAL; - if (bt->trace_state != Blktrace_running) - blk_trace_cleanup(bt); + blk_trace_cleanup(q, bt); return 0; } @@ -362,9 +403,9 @@ int blk_trace_remove(struct request_queue *q) { int ret; - mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); ret = __blk_trace_remove(q); - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex); return ret; } @@ -402,7 +443,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, return PTR_ERR(msg); bt = filp->private_data; - __trace_note_message(bt, NULL, "%s", msg); + __blk_trace_note_message(bt, NULL, "%s", msg); kfree(msg); return count; @@ -449,7 +490,7 @@ static struct dentry *blk_create_buf_file_callback(const char *filename, &relay_file_operations); } -static struct rchan_callbacks blk_relay_callbacks = { +static const struct rchan_callbacks blk_relay_callbacks = { .subbuf_start = blk_subbuf_start_callback, .create_buf_file = blk_create_buf_file_callback, .remove_buf_file = blk_remove_buf_file_callback, @@ -458,14 +499,9 @@ static struct rchan_callbacks blk_relay_callbacks = { static void blk_trace_setup_lba(struct blk_trace *bt, struct block_device *bdev) { - struct hd_struct *part = NULL; - - if (bdev) - part = bdev->bd_part; - - if (part) { - bt->start_lba = part->start_sect; - bt->end_lba = part->start_sect + part->nr_sects; + if (bdev) { + bt->start_lba = bdev->bd_start_sect; + bt->end_lba = bdev->bd_start_sect + bdev_nr_sectors(bdev); } else { bt->start_lba = 0; bt->end_lba = -1ULL; @@ -483,14 +519,12 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct dentry *dir = NULL; int ret; + lockdep_assert_held(&q->debugfs_mutex); + if (!buts->buf_size || !buts->buf_nr) return -EINVAL; - if (!blk_debugfs_root) - return -ENOENT; - - strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); - buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; + strscpy_pad(buts->name, name, BLKTRACE_BDEV_SIZE); /* * some device names have larger paths - convert the slashes @@ -503,7 +537,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, * we can be. */ if (rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex))) { + lockdep_is_held(&q->debugfs_mutex))) { pr_warn("Concurrent blktraces are not allowed on %s\n", buts->name); return -EBUSY; @@ -522,21 +556,36 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!bt->msg_data) goto err; - ret = -ENOENT; - - dir = debugfs_lookup(buts->name, blk_debugfs_root); - if (!dir) + /* + * When tracing the whole disk reuse the existing debugfs directory + * created by the block layer on init. For partitions block devices, + * and scsi-generic block devices we create a temporary new debugfs + * directory that will be removed once the trace ends. + */ + if (bdev && !bdev_is_partition(bdev)) + dir = q->debugfs_dir; + else bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); + /* + * As blktrace relies on debugfs for its interface the debugfs directory + * is required, contrary to the usual mantra of not checking for debugfs + * files or directories. + */ + if (IS_ERR_OR_NULL(dir)) { + pr_warn("debugfs_dir not present for %s so skipping\n", + buts->name); + ret = -ENOENT; + goto err; + } + bt->dev = dev; atomic_set(&bt->dropped, 0); INIT_LIST_HEAD(&bt->running_list); ret = -EIO; - bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, - &blk_dropped_fops); - - bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); + debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops); + debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); bt->rchan = relay_open("trace", dir, buts->buf_size, buts->buf_nr, &blk_relay_callbacks, bt); @@ -563,15 +612,14 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, ret = 0; err: - if (dir && !bt->dir) - dput(dir); if (ret) - blk_trace_free(bt); + blk_trace_free(q, bt); return ret; } -static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev, - struct block_device *bdev, char __user *arg) +int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, + char __user *arg) { struct blk_user_trace_setup buts; int ret; @@ -580,29 +628,18 @@ static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (ret) return -EFAULT; + mutex_lock(&q->debugfs_mutex); ret = do_blk_trace_setup(q, name, dev, bdev, &buts); + mutex_unlock(&q->debugfs_mutex); if (ret) return ret; if (copy_to_user(arg, &buts, sizeof(buts))) { - __blk_trace_remove(q); + blk_trace_remove(q); return -EFAULT; } return 0; } - -int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, - struct block_device *bdev, - char __user *arg) -{ - int ret; - - mutex_lock(&q->blk_trace_mutex); - ret = __blk_trace_setup(q, name, dev, bdev, arg); - mutex_unlock(&q->blk_trace_mutex); - - return ret; -} EXPORT_SYMBOL_GPL(blk_trace_setup); #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) @@ -626,12 +663,14 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, .pid = cbuts.pid, }; + mutex_lock(&q->debugfs_mutex); ret = do_blk_trace_setup(q, name, dev, bdev, &buts); + mutex_unlock(&q->debugfs_mutex); if (ret) return ret; if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) { - __blk_trace_remove(q); + blk_trace_remove(q); return -EFAULT; } @@ -641,53 +680,26 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, static int __blk_trace_startstop(struct request_queue *q, int start) { - int ret; struct blk_trace *bt; bt = rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (bt == NULL) return -EINVAL; - /* - * For starting a trace, we can transition from a setup or stopped - * trace. For stopping a trace, the state must be running - */ - ret = -EINVAL; - if (start) { - if (bt->trace_state == Blktrace_setup || - bt->trace_state == Blktrace_stopped) { - blktrace_seq++; - smp_mb(); - bt->trace_state = Blktrace_running; - spin_lock_irq(&running_trace_lock); - list_add(&bt->running_list, &running_trace_list); - spin_unlock_irq(&running_trace_lock); - - trace_note_time(bt); - ret = 0; - } - } else { - if (bt->trace_state == Blktrace_running) { - bt->trace_state = Blktrace_stopped; - spin_lock_irq(&running_trace_lock); - list_del_init(&bt->running_list); - spin_unlock_irq(&running_trace_lock); - relay_flush(bt->rchan); - ret = 0; - } - } - - return ret; + if (start) + return blk_trace_start(bt); + else + return blk_trace_stop(bt); } int blk_trace_startstop(struct request_queue *q, int start) { int ret; - mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); ret = __blk_trace_startstop(q, start); - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex); return ret; } @@ -700,7 +712,7 @@ EXPORT_SYMBOL_GPL(blk_trace_startstop); */ /** - * blk_trace_ioctl: - handle the ioctls associated with tracing + * blk_trace_ioctl - handle the ioctls associated with tracing * @bdev: the block device * @cmd: the ioctl cmd * @arg: the argument data, if any @@ -708,65 +720,53 @@ EXPORT_SYMBOL_GPL(blk_trace_startstop); **/ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) { - struct request_queue *q; + struct request_queue *q = bdev_get_queue(bdev); int ret, start = 0; char b[BDEVNAME_SIZE]; - q = bdev_get_queue(bdev); - if (!q) - return -ENXIO; - - mutex_lock(&q->blk_trace_mutex); - switch (cmd) { case BLKTRACESETUP: - bdevname(bdev, b); - ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); + snprintf(b, sizeof(b), "%pg", bdev); + ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) case BLKTRACESETUP32: - bdevname(bdev, b); + snprintf(b, sizeof(b), "%pg", bdev); ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; #endif case BLKTRACESTART: start = 1; - /* fall through */ + fallthrough; case BLKTRACESTOP: - ret = __blk_trace_startstop(q, start); + ret = blk_trace_startstop(q, start); break; case BLKTRACETEARDOWN: - ret = __blk_trace_remove(q); + ret = blk_trace_remove(q); break; default: ret = -ENOTTY; break; } - - mutex_unlock(&q->blk_trace_mutex); return ret; } /** - * blk_trace_shutdown: - stop and cleanup trace structures + * blk_trace_shutdown - stop and cleanup trace structures * @q: the request queue associated with the device * **/ void blk_trace_shutdown(struct request_queue *q) { - mutex_lock(&q->blk_trace_mutex); if (rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex))) { - __blk_trace_startstop(q, 0); + lockdep_is_held(&q->debugfs_mutex))) __blk_trace_remove(q); - } - - mutex_unlock(&q->blk_trace_mutex); } #ifdef CONFIG_BLK_CGROUP static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) { + struct cgroup_subsys_state *blkcg_css; struct blk_trace *bt; /* We don't use the 'bt' value here except as an optimization... */ @@ -774,24 +774,25 @@ static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) return 0; - if (!bio->bi_blkg) + blkcg_css = bio_blkcg_css(bio); + if (!blkcg_css) return 0; - return cgroup_id(bio_blkcg(bio)->css.cgroup); + return cgroup_id(blkcg_css->cgroup); } #else -u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) +static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) { return 0; } #endif static u64 -blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) +blk_trace_request_get_cgid(struct request *rq) { if (!rq->bio) return 0; /* Use the first bio */ - return blk_trace_bio_get_cgid(q, rq->bio); + return blk_trace_bio_get_cgid(rq->q, rq->bio); } /* @@ -810,7 +811,7 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) * Records an action against a request. Will log the bio offset + size. * **/ -static void blk_add_trace_rq(struct request *rq, int error, +static void blk_add_trace_rq(struct request *rq, blk_status_t error, unsigned int nr_bytes, u32 what, u64 cgid) { struct blk_trace *bt; @@ -827,38 +828,40 @@ static void blk_add_trace_rq(struct request *rq, int error, else what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), - rq->cmd_flags, what, error, 0, NULL, cgid); + __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, rq->cmd_flags, + what, blk_status_to_errno(error), 0, NULL, cgid); rcu_read_unlock(); } -static void blk_add_trace_rq_insert(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_insert(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_issue(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_issue(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); +} + +static void blk_add_trace_rq_merge(void *ignore, struct request *rq) +{ + blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE, + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_requeue(void *ignore, - struct request_queue *q, - struct request *rq) +static void blk_add_trace_rq_requeue(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } static void blk_add_trace_rq_complete(void *ignore, struct request *rq, - int error, unsigned int nr_bytes) + blk_status_t error, unsigned int nr_bytes) { blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE, - blk_trace_request_get_cgid(rq->q, rq)); + blk_trace_request_get_cgid(rq)); } /** @@ -885,15 +888,14 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, } __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio_op(bio), bio->bi_opf, what, error, 0, NULL, + bio->bi_opf, what, error, 0, NULL, blk_trace_bio_get_cgid(q, bio)); rcu_read_unlock(); } -static void blk_add_trace_bio_bounce(void *ignore, - struct request_queue *q, struct bio *bio) +static void blk_add_trace_bio_bounce(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); + blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_BOUNCE, 0); } static void blk_add_trace_bio_complete(void *ignore, @@ -903,63 +905,26 @@ static void blk_add_trace_bio_complete(void *ignore, blk_status_to_errno(bio->bi_status)); } -static void blk_add_trace_bio_backmerge(void *ignore, - struct request_queue *q, - struct request *rq, - struct bio *bio) -{ - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); -} - -static void blk_add_trace_bio_frontmerge(void *ignore, - struct request_queue *q, - struct request *rq, - struct bio *bio) +static void blk_add_trace_bio_backmerge(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); + blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_BACKMERGE, + 0); } -static void blk_add_trace_bio_queue(void *ignore, - struct request_queue *q, struct bio *bio) +static void blk_add_trace_bio_frontmerge(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); + blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_FRONTMERGE, + 0); } -static void blk_add_trace_getrq(void *ignore, - struct request_queue *q, - struct bio *bio, int rw) +static void blk_add_trace_bio_queue(void *ignore, struct bio *bio) { - if (bio) - blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); - else { - struct blk_trace *bt; - - rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); - if (bt) - __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, - NULL, 0); - rcu_read_unlock(); - } + blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_QUEUE, 0); } - -static void blk_add_trace_sleeprq(void *ignore, - struct request_queue *q, - struct bio *bio, int rw) +static void blk_add_trace_getrq(void *ignore, struct bio *bio) { - if (bio) - blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); - else { - struct blk_trace *bt; - - rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); - if (bt) - __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, - 0, 0, NULL, 0); - rcu_read_unlock(); - } + blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_GETRQ, 0); } static void blk_add_trace_plug(void *ignore, struct request_queue *q) @@ -969,7 +934,7 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q) rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (bt) - __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); + __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); rcu_read_unlock(); } @@ -989,15 +954,14 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q, else what = BLK_TA_UNPLUG_TIMER; - __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); + __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); } rcu_read_unlock(); } -static void blk_add_trace_split(void *ignore, - struct request_queue *q, struct bio *bio, - unsigned int pdu) +static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu) { + struct request_queue *q = bio->bi_bdev->bd_disk->queue; struct blk_trace *bt; rcu_read_lock(); @@ -1006,8 +970,7 @@ static void blk_add_trace_split(void *ignore, __be64 rpdu = cpu_to_be64(pdu); __blk_add_trace(bt, bio->bi_iter.bi_sector, - bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, - BLK_TA_SPLIT, + bio->bi_iter.bi_size, bio->bi_opf, BLK_TA_SPLIT, blk_status_to_errno(bio->bi_status), sizeof(rpdu), &rpdu, blk_trace_bio_get_cgid(q, bio)); @@ -1018,20 +981,16 @@ static void blk_add_trace_split(void *ignore, /** * blk_add_trace_bio_remap - Add a trace for a bio-remap operation * @ignore: trace callback data parameter (not used) - * @q: queue the io is for * @bio: the source bio - * @dev: target device + * @dev: source device * @from: source sector * - * Description: - * Device mapper or raid target sometimes need to split a bio because - * it spans a stripe (or similar). Add a trace for that action. - * + * Called after a bio is remapped to a different device and/or sector. **/ -static void blk_add_trace_bio_remap(void *ignore, - struct request_queue *q, struct bio *bio, - dev_t dev, sector_t from) +static void blk_add_trace_bio_remap(void *ignore, struct bio *bio, dev_t dev, + sector_t from) { + struct request_queue *q = bio->bi_bdev->bd_disk->queue; struct blk_trace *bt; struct blk_io_trace_remap r; @@ -1047,7 +1006,7 @@ static void blk_add_trace_bio_remap(void *ignore, r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio_op(bio), bio->bi_opf, BLK_TA_REMAP, + bio->bi_opf, BLK_TA_REMAP, blk_status_to_errno(bio->bi_status), sizeof(r), &r, blk_trace_bio_get_cgid(q, bio)); rcu_read_unlock(); @@ -1056,7 +1015,6 @@ static void blk_add_trace_bio_remap(void *ignore, /** * blk_add_trace_rq_remap - Add a trace for a request-remap operation * @ignore: trace callback data parameter (not used) - * @q: queue the io is for * @rq: the source request * @dev: target device * @from: source sector @@ -1066,34 +1024,31 @@ static void blk_add_trace_bio_remap(void *ignore, * Add a trace for that action. * **/ -static void blk_add_trace_rq_remap(void *ignore, - struct request_queue *q, - struct request *rq, dev_t dev, +static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, sector_t from) { struct blk_trace *bt; struct blk_io_trace_remap r; rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); + bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; } r.device_from = cpu_to_be32(dev); - r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); + r.device_to = cpu_to_be32(disk_devt(rq->q->disk)); r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), - rq_data_dir(rq), 0, BLK_TA_REMAP, 0, - sizeof(r), &r, blk_trace_request_get_cgid(q, rq)); + rq->cmd_flags, BLK_TA_REMAP, 0, + sizeof(r), &r, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } /** * blk_add_driver_data - Add binary message with driver-specific data - * @q: queue the io is for * @rq: io request * @data: driver-specific data * @len: length of driver-specific data @@ -1102,22 +1057,20 @@ static void blk_add_trace_rq_remap(void *ignore, * Some drivers might want to write driver-specific data per request. * **/ -void blk_add_driver_data(struct request_queue *q, - struct request *rq, - void *data, size_t len) +void blk_add_driver_data(struct request *rq, void *data, size_t len) { struct blk_trace *bt; rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); + bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; } - __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, + __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, BLK_TA_DRV_DATA, 0, len, data, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(blk_add_driver_data); @@ -1130,6 +1083,8 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); WARN_ON(ret); + ret = register_trace_block_rq_merge(blk_add_trace_rq_merge, NULL); + WARN_ON(ret); ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); WARN_ON(ret); ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); @@ -1146,8 +1101,6 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_getrq(blk_add_trace_getrq, NULL); WARN_ON(ret); - ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); - WARN_ON(ret); ret = register_trace_block_plug(blk_add_trace_plug, NULL); WARN_ON(ret); ret = register_trace_block_unplug(blk_add_trace_unplug, NULL); @@ -1167,7 +1120,6 @@ static void blk_unregister_tracepoints(void) unregister_trace_block_split(blk_add_trace_split, NULL); unregister_trace_block_unplug(blk_add_trace_unplug, NULL); unregister_trace_block_plug(blk_add_trace_plug, NULL); - unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); unregister_trace_block_getrq(blk_add_trace_getrq, NULL); unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); @@ -1176,6 +1128,7 @@ static void blk_unregister_tracepoints(void) unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL); unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); + unregister_trace_block_rq_merge(blk_add_trace_rq_merge, NULL); unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); @@ -1319,7 +1272,7 @@ static void blk_log_action(struct trace_iterator *iter, const char *act, * ones now use the 64bit ino as the whole ID and * no longer use generation. * - * Regarldess of the content, always output + * Regardless of the content, always output * "LOW32,HIGH32" so that FILEID_INO32_GEN fid can * be mapped back to @id on both 64 and 32bit ino * setups. See __kernfs_fh_to_dentry(). @@ -1361,7 +1314,7 @@ static void blk_log_dump_pdu(struct trace_seq *s, i == 0 ? "" : " ", pdu_buf[i]); /* - * stop when the rest is just zeroes and indicate so + * stop when the rest is just zeros and indicate so * with a ".." appended */ if (i == end && end != pdu_len - 1) { @@ -1578,7 +1531,8 @@ blk_trace_event_print_binary(struct trace_iterator *iter, int flags, static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) { - if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) + if ((iter->ent->type != TRACE_BLK) || + !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) return TRACE_TYPE_UNHANDLED; return print_one_line(iter, true); @@ -1642,13 +1596,15 @@ static int blk_trace_remove_queue(struct request_queue *q) struct blk_trace *bt; bt = rcu_replace_pointer(q->blk_trace, NULL, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (bt == NULL) return -EINVAL; + blk_trace_stop(bt); + put_probe_ref(); synchronize_rcu(); - blk_trace_free(bt); + blk_trace_free(q, bt); return 0; } @@ -1679,7 +1635,7 @@ static int blk_trace_setup_queue(struct request_queue *q, return 0; free_bt: - blk_trace_free(bt); + blk_trace_free(q, bt); return ret; } @@ -1791,36 +1747,19 @@ static ssize_t blk_trace_mask2str(char *buf, int mask) return p - buf; } -static struct request_queue *blk_trace_get_queue(struct block_device *bdev) -{ - if (bdev->bd_disk == NULL) - return NULL; - - return bdev_get_queue(bdev); -} - static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - struct request_queue *q; - struct block_device *bdev; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev_get_queue(bdev); struct blk_trace *bt; ssize_t ret = -ENXIO; - bdev = bdget(part_devt(p)); - if (bdev == NULL) - goto out; - - q = blk_trace_get_queue(bdev); - if (q == NULL) - goto out_bdput; - - mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); bt = rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (attr == &dev_attr_enable) { ret = sprintf(buf, "%u\n", !!bt); goto out_unlock_bdev; @@ -1838,10 +1777,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, ret = sprintf(buf, "%llu\n", bt->end_lba); out_unlock_bdev: - mutex_unlock(&q->blk_trace_mutex); -out_bdput: - bdput(bdev); -out: + mutex_unlock(&q->debugfs_mutex); return ret; } @@ -1849,9 +1785,8 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct block_device *bdev; - struct request_queue *q; - struct hd_struct *p; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev_get_queue(bdev); struct blk_trace *bt; u64 value; ssize_t ret = -EINVAL; @@ -1867,24 +1802,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, goto out; value = ret; } - } else if (kstrtoull(buf, 0, &value)) - goto out; - - ret = -ENXIO; - - p = dev_to_part(dev); - bdev = bdget(part_devt(p)); - if (bdev == NULL) - goto out; - - q = blk_trace_get_queue(bdev); - if (q == NULL) - goto out_bdput; + } else { + if (kstrtoull(buf, 0, &value)) + goto out; + } - mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); bt = rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (attr == &dev_attr_enable) { if (!!value == !!bt) { ret = 0; @@ -1901,7 +1827,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, if (bt == NULL) { ret = blk_trace_setup_queue(q, bdev); bt = rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); } if (ret == 0) { @@ -1916,37 +1842,33 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, } out_unlock_bdev: - mutex_unlock(&q->blk_trace_mutex); -out_bdput: - bdput(bdev); + mutex_unlock(&q->debugfs_mutex); out: return ret ? ret : count; } - -int blk_trace_init_sysfs(struct device *dev) -{ - return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); -} - -void blk_trace_remove_sysfs(struct device *dev) -{ - sysfs_remove_group(&dev->kobj, &blk_trace_attr_group); -} - #endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_EVENT_TRACING -void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes) +/** + * blk_fill_rwbs - Fill the buffer rwbs by mapping op to character string. + * @rwbs: buffer to be filled + * @opf: request operation type (REQ_OP_XXX) and flags for the tracepoint + * + * Description: + * Maps each request operation and flag to a single character and fills the + * buffer provided by the caller with resulting string. + * + **/ +void blk_fill_rwbs(char *rwbs, blk_opf_t opf) { int i = 0; - if (op & REQ_PREFLUSH) + if (opf & REQ_PREFLUSH) rwbs[i++] = 'F'; - switch (op & REQ_OP_MASK) { + switch (opf & REQ_OP_MASK) { case REQ_OP_WRITE: - case REQ_OP_WRITE_SAME: rwbs[i++] = 'W'; break; case REQ_OP_DISCARD: @@ -1966,13 +1888,13 @@ void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes) rwbs[i++] = 'N'; } - if (op & REQ_FUA) + if (opf & REQ_FUA) rwbs[i++] = 'F'; - if (op & REQ_RAHEAD) + if (opf & REQ_RAHEAD) rwbs[i++] = 'A'; - if (op & REQ_SYNC) + if (opf & REQ_SYNC) rwbs[i++] = 'S'; - if (op & REQ_META) + if (opf & REQ_META) rwbs[i++] = 'M'; rwbs[i] = '\0'; |