summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/bpf_trace.c27
-rw-r--r--kernel/trace/ftrace.c90
-rw-r--r--kernel/trace/ring_buffer.c258
-rw-r--r--kernel/trace/trace.c838
-rw-r--r--kernel/trace/trace.h18
-rw-r--r--kernel/trace/trace_benchmark.c5
-rw-r--r--kernel/trace/trace_events_trigger.c63
-rw-r--r--kernel/trace/trace_events_user.c209
-rw-r--r--kernel/trace/trace_output.c6
-rw-r--r--kernel/trace/trace_sched_switch.c515
-rw-r--r--kernel/trace/trace_selftest.c2
11 files changed, 1205 insertions, 826 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 7ac6c52b25eb..0a5c4efc73c3 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1412,14 +1412,14 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
__bpf_kfunc_end_defs();
-BTF_SET8_START(key_sig_kfunc_set)
+BTF_KFUNCS_START(key_sig_kfunc_set)
BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE)
#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE)
#endif
-BTF_SET8_END(key_sig_kfunc_set)
+BTF_KFUNCS_END(key_sig_kfunc_set)
static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = {
.owner = THIS_MODULE,
@@ -1475,9 +1475,9 @@ __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str,
__bpf_kfunc_end_defs();
-BTF_SET8_START(fs_kfunc_set_ids)
+BTF_KFUNCS_START(fs_kfunc_set_ids)
BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
-BTF_SET8_END(fs_kfunc_set_ids)
+BTF_KFUNCS_END(fs_kfunc_set_ids)
static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
@@ -1629,7 +1629,7 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_trace_vprintk:
return bpf_get_trace_vprintk_proto();
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -2679,6 +2679,7 @@ static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
struct bpf_link_info *info)
{
+ u64 __user *ucookies = u64_to_user_ptr(info->kprobe_multi.cookies);
u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs);
struct bpf_kprobe_multi_link *kmulti_link;
u32 ucount = info->kprobe_multi.count;
@@ -2686,6 +2687,8 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
if (!uaddrs ^ !ucount)
return -EINVAL;
+ if (ucookies && !ucount)
+ return -EINVAL;
kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
info->kprobe_multi.count = kmulti_link->cnt;
@@ -2699,6 +2702,18 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
else
ucount = kmulti_link->cnt;
+ if (ucookies) {
+ if (kmulti_link->cookies) {
+ if (copy_to_user(ucookies, kmulti_link->cookies, ucount * sizeof(u64)))
+ return -EFAULT;
+ } else {
+ for (i = 0; i < ucount; i++) {
+ if (put_user(0, ucookies + i))
+ return -EFAULT;
+ }
+ }
+ }
+
if (kallsyms_show_value(current_cred())) {
if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64)))
return -EFAULT;
@@ -3241,7 +3256,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
.uprobe = uprobe,
};
struct bpf_prog *prog = link->link.prog;
- bool sleepable = prog->aux->sleepable;
+ bool sleepable = prog->sleepable;
struct bpf_run_ctx *old_run_ctx;
int err = 0;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 83ba342aef31..da1710499698 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1160,7 +1160,7 @@ __ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
* Search a given @hash to see if a given instruction pointer (@ip)
* exists in it.
*
- * Returns the entry that holds the @ip if found. NULL otherwise.
+ * Returns: the entry that holds the @ip if found. NULL otherwise.
*/
struct ftrace_func_entry *
ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
@@ -1282,7 +1282,7 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
/**
* ftrace_free_filter - remove all filters for an ftrace_ops
- * @ops - the ops to remove the filters from
+ * @ops: the ops to remove the filters from
*/
void ftrace_free_filter(struct ftrace_ops *ops)
{
@@ -1587,7 +1587,7 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
* @end: end of range to search (inclusive). @end points to the last byte
* to check.
*
- * Returns rec->ip if the related ftrace location is a least partly within
+ * Returns: rec->ip if the related ftrace location is a least partly within
* the given address range. That is, the first address of the instruction
* that is either a NOP or call to the function tracer. It checks the ftrace
* internal tables to determine if the address belongs or not.
@@ -1607,9 +1607,10 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end)
* ftrace_location - return the ftrace location
* @ip: the instruction pointer to check
*
- * If @ip matches the ftrace location, return @ip.
- * If @ip matches sym+0, return sym's ftrace location.
- * Otherwise, return 0.
+ * Returns:
+ * * If @ip matches the ftrace location, return @ip.
+ * * If @ip matches sym+0, return sym's ftrace location.
+ * * Otherwise, return 0.
*/
unsigned long ftrace_location(unsigned long ip)
{
@@ -1639,7 +1640,7 @@ out:
* @start: start of range to search
* @end: end of range to search (inclusive). @end points to the last byte to check.
*
- * Returns 1 if @start and @end contains a ftrace location.
+ * Returns: 1 if @start and @end contains a ftrace location.
* That is, the instruction that is either a NOP or call to
* the function tracer. It checks the ftrace internal tables to
* determine if the address belongs or not.
@@ -2574,7 +2575,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip,
* wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
* is not set, then it wants to convert to the normal callback.
*
- * Returns the address of the trampoline to set to
+ * Returns: the address of the trampoline to set to
*/
unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec)
{
@@ -2615,7 +2616,7 @@ unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec)
* a function that saves all the regs. Basically the '_EN' version
* represents the current state of the function.
*
- * Returns the address of the trampoline that is currently being called
+ * Returns: the address of the trampoline that is currently being called
*/
unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
{
@@ -2719,7 +2720,7 @@ struct ftrace_rec_iter {
/**
* ftrace_rec_iter_start - start up iterating over traced functions
*
- * Returns an iterator handle that is used to iterate over all
+ * Returns: an iterator handle that is used to iterate over all
* the records that represent address locations where functions
* are traced.
*
@@ -2751,7 +2752,7 @@ struct ftrace_rec_iter *ftrace_rec_iter_start(void)
* ftrace_rec_iter_next - get the next record to process.
* @iter: The handle to the iterator.
*
- * Returns the next iterator after the given iterator @iter.
+ * Returns: the next iterator after the given iterator @iter.
*/
struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter)
{
@@ -2776,7 +2777,7 @@ struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter)
* ftrace_rec_iter_record - get the record at the iterator location
* @iter: The current iterator location
*
- * Returns the record that the current @iter is at.
+ * Returns: the record that the current @iter is at.
*/
struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter)
{
@@ -4010,6 +4011,8 @@ ftrace_avail_addrs_open(struct inode *inode, struct file *file)
* ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
* tracing_lseek() should be used as the lseek routine, and
* release must call ftrace_regex_release().
+ *
+ * Returns: 0 on success or a negative errno value on failure
*/
int
ftrace_regex_open(struct ftrace_ops *ops, int flag,
@@ -4626,7 +4629,7 @@ struct ftrace_func_mapper {
/**
* allocate_ftrace_func_mapper - allocate a new ftrace_func_mapper
*
- * Returns a ftrace_func_mapper descriptor that can be used to map ips to data.
+ * Returns: a ftrace_func_mapper descriptor that can be used to map ips to data.
*/
struct ftrace_func_mapper *allocate_ftrace_func_mapper(void)
{
@@ -4646,7 +4649,7 @@ struct ftrace_func_mapper *allocate_ftrace_func_mapper(void)
* @mapper: The mapper that has the ip maps
* @ip: the instruction pointer to find the data for
*
- * Returns the data mapped to @ip if found otherwise NULL. The return
+ * Returns: the data mapped to @ip if found otherwise NULL. The return
* is actually the address of the mapper data pointer. The address is
* returned for use cases where the data is no bigger than a long, and
* the user can use the data pointer as its data instead of having to
@@ -4672,7 +4675,7 @@ void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper,
* @ip: The instruction pointer address to map @data to
* @data: The data to map to @ip
*
- * Returns 0 on success otherwise an error.
+ * Returns: 0 on success otherwise an error.
*/
int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper,
unsigned long ip, void *data)
@@ -4701,7 +4704,7 @@ int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper,
* @mapper: The mapper that has the ip maps
* @ip: The instruction pointer address to remove the data from
*
- * Returns the data if it is found, otherwise NULL.
+ * Returns: the data if it is found, otherwise NULL.
* Note, if the data pointer is used as the data itself, (see
* ftrace_func_mapper_find_ip(), then the return value may be meaningless,
* if the data pointer was set to zero.
@@ -5625,10 +5628,10 @@ EXPORT_SYMBOL_GPL(modify_ftrace_direct);
/**
* ftrace_set_filter_ip - set a function to filter on in ftrace by address
- * @ops - the ops to set the filter with
- * @ip - the address to add to or remove from the filter.
- * @remove - non zero to remove the ip from the filter
- * @reset - non zero to reset all filters before applying this filter.
+ * @ops: the ops to set the filter with
+ * @ip: the address to add to or remove from the filter.
+ * @remove: non zero to remove the ip from the filter
+ * @reset: non zero to reset all filters before applying this filter.
*
* Filters denote which functions should be enabled when tracing is enabled
* If @ip is NULL, it fails to update filter.
@@ -5647,11 +5650,11 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);
/**
* ftrace_set_filter_ips - set functions to filter on in ftrace by addresses
- * @ops - the ops to set the filter with
- * @ips - the array of addresses to add to or remove from the filter.
- * @cnt - the number of addresses in @ips
- * @remove - non zero to remove ips from the filter
- * @reset - non zero to reset all filters before applying this filter.
+ * @ops: the ops to set the filter with
+ * @ips: the array of addresses to add to or remove from the filter.
+ * @cnt: the number of addresses in @ips
+ * @remove: non zero to remove ips from the filter
+ * @reset: non zero to reset all filters before applying this filter.
*
* Filters denote which functions should be enabled when tracing is enabled
* If @ips array or any ip specified within is NULL , it fails to update filter.
@@ -5670,7 +5673,7 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter_ips);
/**
* ftrace_ops_set_global_filter - setup ops to use global filters
- * @ops - the ops which will use the global filters
+ * @ops: the ops which will use the global filters
*
* ftrace users who need global function trace filtering should call this.
* It can set the global filter only if ops were not initialized before.
@@ -5694,10 +5697,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
/**
* ftrace_set_filter - set a function to filter on in ftrace
- * @ops - the ops to set the filter with
- * @buf - the string that holds the function filter text.
- * @len - the length of the string.
- * @reset - non zero to reset all filters before applying this filter.
+ * @ops: the ops to set the filter with
+ * @buf: the string that holds the function filter text.
+ * @len: the length of the string.
+ * @reset: non-zero to reset all filters before applying this filter.
*
* Filters denote which functions should be enabled when tracing is enabled.
* If @buf is NULL and reset is set, all functions will be enabled for tracing.
@@ -5716,10 +5719,10 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter);
/**
* ftrace_set_notrace - set a function to not trace in ftrace
- * @ops - the ops to set the notrace filter with
- * @buf - the string that holds the function notrace text.
- * @len - the length of the string.
- * @reset - non zero to reset all filters before applying this filter.
+ * @ops: the ops to set the notrace filter with
+ * @buf: the string that holds the function notrace text.
+ * @len: the length of the string.
+ * @reset: non-zero to reset all filters before applying this filter.
*
* Notrace Filters denote which functions should not be enabled when tracing
* is enabled. If @buf is NULL and reset is set, all functions will be enabled
@@ -5738,9 +5741,9 @@ int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
EXPORT_SYMBOL_GPL(ftrace_set_notrace);
/**
* ftrace_set_global_filter - set a function to filter on with global tracers
- * @buf - the string that holds the function filter text.
- * @len - the length of the string.
- * @reset - non zero to reset all filters before applying this filter.
+ * @buf: the string that holds the function filter text.
+ * @len: the length of the string.
+ * @reset: non-zero to reset all filters before applying this filter.
*
* Filters denote which functions should be enabled when tracing is enabled.
* If @buf is NULL and reset is set, all functions will be enabled for tracing.
@@ -5753,9 +5756,9 @@ EXPORT_SYMBOL_GPL(ftrace_set_global_filter);
/**
* ftrace_set_global_notrace - set a function to not trace with global tracers
- * @buf - the string that holds the function notrace text.
- * @len - the length of the string.
- * @reset - non zero to reset all filters before applying this filter.
+ * @buf: the string that holds the function notrace text.
+ * @len: the length of the string.
+ * @reset: non-zero to reset all filters before applying this filter.
*
* Notrace Filters denote which functions should not be enabled when tracing
* is enabled. If @buf is NULL and reset is set, all functions will be enabled
@@ -7443,7 +7446,7 @@ NOKPROBE_SYMBOL(ftrace_ops_assist_func);
* have its own recursion protection, then it should call the
* ftrace_ops_assist_func() instead.
*
- * Returns the function that the trampoline should call for @ops.
+ * Returns: the function that the trampoline should call for @ops.
*/
ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
{
@@ -7897,7 +7900,7 @@ void ftrace_kill(void)
/**
* ftrace_is_dead - Test if ftrace is dead or not.
*
- * Returns 1 if ftrace is "dead", zero otherwise.
+ * Returns: 1 if ftrace is "dead", zero otherwise.
*/
int ftrace_is_dead(void)
{
@@ -8142,8 +8145,7 @@ static int kallsyms_callback(void *data, const char *name, unsigned long addr)
* @addrs array, which needs to be big enough to store at least @cnt
* addresses.
*
- * This function returns 0 if all provided symbols are found,
- * -ESRCH otherwise.
+ * Returns: 0 if all provided symbols are found, -ESRCH otherwise.
*/
int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs)
{
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0699027b4f4c..25476ead681b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -384,7 +384,7 @@ struct rb_irq_work {
struct irq_work work;
wait_queue_head_t waiters;
wait_queue_head_t full_waiters;
- long wait_index;
+ atomic_t seq;
bool waiters_pending;
bool full_waiters_pending;
bool wakeup_full;
@@ -754,10 +754,24 @@ static void rb_wake_up_waiters(struct irq_work *work)
{
struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
+ /* For waiters waiting for the first wake up */
+ (void)atomic_fetch_inc_release(&rbwork->seq);
+
wake_up_all(&rbwork->waiters);
if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
+ /* Only cpu_buffer sets the above flags */
+ struct ring_buffer_per_cpu *cpu_buffer =
+ container_of(rbwork, struct ring_buffer_per_cpu, irq_work);
+
+ /* Called from interrupt context */
+ raw_spin_lock(&cpu_buffer->reader_lock);
rbwork->wakeup_full = false;
rbwork->full_waiters_pending = false;
+
+ /* Waking up all waiters, they will reset the shortest full */
+ cpu_buffer->shortest_full = 0;
+ raw_spin_unlock(&cpu_buffer->reader_lock);
+
wake_up_all(&rbwork->full_waiters);
}
}
@@ -798,30 +812,115 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
rbwork = &cpu_buffer->irq_work;
}
- rbwork->wait_index++;
- /* make sure the waiters see the new index */
- smp_wmb();
-
/* This can be called in any context */
irq_work_queue(&rbwork->work);
}
+static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ bool ret = false;
+
+ /* Reads of all CPUs always waits for any data */
+ if (cpu == RING_BUFFER_ALL_CPUS)
+ return !ring_buffer_empty(buffer);
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ if (!ring_buffer_empty_cpu(buffer, cpu)) {
+ unsigned long flags;
+ bool pagebusy;
+
+ if (!full)
+ return true;
+
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+ ret = !pagebusy && full_hit(buffer, cpu, full);
+
+ if (!ret && (!cpu_buffer->shortest_full ||
+ cpu_buffer->shortest_full > full)) {
+ cpu_buffer->shortest_full = full;
+ }
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ }
+ return ret;
+}
+
+static inline bool
+rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer,
+ int cpu, int full, ring_buffer_cond_fn cond, void *data)
+{
+ if (rb_watermark_hit(buffer, cpu, full))
+ return true;
+
+ if (cond(data))
+ return true;
+
+ /*
+ * The events can happen in critical sections where
+ * checking a work queue can cause deadlocks.
+ * After adding a task to the queue, this flag is set
+ * only to notify events to try to wake up the queue
+ * using irq_work.
+ *
+ * We don't clear it even if the buffer is no longer
+ * empty. The flag only causes the next event to run
+ * irq_work to do the work queue wake up. The worse
+ * that can happen if we race with !trace_empty() is that
+ * an event will cause an irq_work to try to wake up
+ * an empty queue.
+ *
+ * There's no reason to protect this flag either, as
+ * the work queue and irq_work logic will do the necessary
+ * synchronization for the wake ups. The only thing
+ * that is necessary is that the wake up happens after
+ * a task has been queued. It's OK for spurious wake ups.
+ */
+ if (full)
+ rbwork->full_waiters_pending = true;
+ else
+ rbwork->waiters_pending = true;
+
+ return false;
+}
+
+struct rb_wait_data {
+ struct rb_irq_work *irq_work;
+ int seq;
+};
+
+/*
+ * The default wait condition for ring_buffer_wait() is to just to exit the
+ * wait loop the first time it is woken up.
+ */
+static bool rb_wait_once(void *data)
+{
+ struct rb_wait_data *rdata = data;
+ struct rb_irq_work *rbwork = rdata->irq_work;
+
+ return atomic_read_acquire(&rbwork->seq) != rdata->seq;
+}
+
/**
* ring_buffer_wait - wait for input to the ring buffer
* @buffer: buffer to wait on
* @cpu: the cpu buffer to wait on
* @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
+ * @cond: condition function to break out of wait (NULL to run once)
+ * @data: the data to pass to @cond.
*
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
* as data is added to any of the @buffer's cpu buffers. Otherwise
* it will wait for data to be added to a specific cpu buffer.
*/
-int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
+int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
+ ring_buffer_cond_fn cond, void *data)
{
struct ring_buffer_per_cpu *cpu_buffer;
- DEFINE_WAIT(wait);
- struct rb_irq_work *work;
- long wait_index;
+ struct wait_queue_head *waitq;
+ struct rb_irq_work *rbwork;
+ struct rb_wait_data rdata;
int ret = 0;
/*
@@ -830,90 +929,31 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
* caller on the appropriate wait queue.
*/
if (cpu == RING_BUFFER_ALL_CPUS) {
- work = &buffer->irq_work;
+ rbwork = &buffer->irq_work;
/* Full only makes sense on per cpu reads */
full = 0;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return -ENODEV;
cpu_buffer = buffer->buffers[cpu];
- work = &cpu_buffer->irq_work;
- }
-
- wait_index = READ_ONCE(work->wait_index);
-
- while (true) {
- if (full)
- prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
- else
- prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
-
- /*
- * The events can happen in critical sections where
- * checking a work queue can cause deadlocks.
- * After adding a task to the queue, this flag is set
- * only to notify events to try to wake up the queue
- * using irq_work.
- *
- * We don't clear it even if the buffer is no longer
- * empty. The flag only causes the next event to run
- * irq_work to do the work queue wake up. The worse
- * that can happen if we race with !trace_empty() is that
- * an event will cause an irq_work to try to wake up
- * an empty queue.
- *
- * There's no reason to protect this flag either, as
- * the work queue and irq_work logic will do the necessary
- * synchronization for the wake ups. The only thing
- * that is necessary is that the wake up happens after
- * a task has been queued. It's OK for spurious wake ups.
- */
- if (full)
- work->full_waiters_pending = true;
- else
- work->waiters_pending = true;
-
- if (signal_pending(current)) {
- ret = -EINTR;
- break;
- }
-
- if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
- break;
-
- if (cpu != RING_BUFFER_ALL_CPUS &&
- !ring_buffer_empty_cpu(buffer, cpu)) {
- unsigned long flags;
- bool pagebusy;
- bool done;
-
- if (!full)
- break;
-
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
- pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
- done = !pagebusy && full_hit(buffer, cpu, full);
-
- if (!cpu_buffer->shortest_full ||
- cpu_buffer->shortest_full > full)
- cpu_buffer->shortest_full = full;
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
- if (done)
- break;
- }
-
- schedule();
-
- /* Make sure to see the new wait index */
- smp_rmb();
- if (wait_index != work->wait_index)
- break;
+ rbwork = &cpu_buffer->irq_work;
}
if (full)
- finish_wait(&work->full_waiters, &wait);
+ waitq = &rbwork->full_waiters;
else
- finish_wait(&work->waiters, &wait);
+ waitq = &rbwork->waiters;
+
+ /* Set up to exit loop as soon as it is woken */
+ if (!cond) {
+ cond = rb_wait_once;
+ rdata.irq_work = rbwork;
+ rdata.seq = atomic_read_acquire(&rbwork->seq);
+ data = &rdata;
+ }
+
+ ret = wait_event_interruptible((*waitq),
+ rb_wait_cond(rbwork, buffer, cpu, full, cond, data));
return ret;
}
@@ -937,30 +977,44 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table, int full)
{
struct ring_buffer_per_cpu *cpu_buffer;
- struct rb_irq_work *work;
+ struct rb_irq_work *rbwork;
if (cpu == RING_BUFFER_ALL_CPUS) {
- work = &buffer->irq_work;
+ rbwork = &buffer->irq_work;
full = 0;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return EPOLLERR;
cpu_buffer = buffer->buffers[cpu];
- work = &cpu_buffer->irq_work;
+ rbwork = &cpu_buffer->irq_work;
}
if (full) {
- poll_wait(filp, &work->full_waiters, poll_table);
- work->full_waiters_pending = true;
- if (!cpu_buffer->shortest_full ||
- cpu_buffer->shortest_full > full)
- cpu_buffer->shortest_full = full;
- } else {
- poll_wait(filp, &work->waiters, poll_table);
- work->waiters_pending = true;
+ poll_wait(filp, &rbwork->full_waiters, poll_table);
+
+ if (rb_watermark_hit(buffer, cpu, full))
+ return EPOLLIN | EPOLLRDNORM;
+ /*
+ * Only allow full_waiters_pending update to be seen after
+ * the shortest_full is set (in rb_watermark_hit). If the
+ * writer sees the full_waiters_pending flag set, it will
+ * compare the amount in the ring buffer to shortest_full.
+ * If the amount in the ring buffer is greater than the
+ * shortest_full percent, it will call the irq_work handler
+ * to wake up this list. The irq_handler will reset shortest_full
+ * back to zero. That's done under the reader_lock, but
+ * the below smp_mb() makes sure that the update to
+ * full_waiters_pending doesn't leak up into the above.
+ */
+ smp_mb();
+ rbwork->full_waiters_pending = true;
+ return 0;
}
+ poll_wait(filp, &rbwork->waiters, poll_table);
+ rbwork->waiters_pending = true;
+
/*
* There's a tight race between setting the waiters_pending and
* checking if the ring buffer is empty. Once the waiters_pending bit
@@ -976,9 +1030,6 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
*/
smp_mb();
- if (full)
- return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0;
-
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
return EPOLLIN | EPOLLRDNORM;
@@ -1009,7 +1060,7 @@ static inline u64 rb_time_stamp(struct trace_buffer *buffer)
u64 ts;
/* Skip retpolines :-( */
- if (IS_ENABLED(CONFIG_RETPOLINE) && likely(buffer->clock == trace_clock_local))
+ if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && likely(buffer->clock == trace_clock_local))
ts = trace_clock_local();
else
ts = buffer->clock();
@@ -1472,7 +1523,8 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
list_add(&bpage->list, pages);
- page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags,
+ page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
+ mflags | __GFP_ZERO,
cpu_buffer->buffer->subbuf_order);
if (!page)
goto free_pages;
@@ -1557,7 +1609,8 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
cpu_buffer->reader_page = bpage;
- page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, cpu_buffer->buffer->subbuf_order);
+ page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_ZERO,
+ cpu_buffer->buffer->subbuf_order);
if (!page)
goto fail_free_reader;
bpage->page = page_address(page);
@@ -4337,7 +4390,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
cpu_buffer = iter->cpu_buffer;
reader = cpu_buffer->reader_page;
head_page = cpu_buffer->head_page;
- commit_page = cpu_buffer->commit_page;
+ commit_page = READ_ONCE(cpu_buffer->commit_page);
commit_ts = commit_page->page->time_stamp;
/*
@@ -5525,7 +5578,8 @@ ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu)
if (bpage->data)
goto out;
- page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY,
+ page = alloc_pages_node(cpu_to_node(cpu),
+ GFP_KERNEL | __GFP_NORETRY | __GFP_ZERO,
cpu_buffer->buffer->subbuf_order);
if (!page) {
kfree(bpage);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 71a96decc276..233d1af39fff 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -13,7 +13,7 @@
* Copyright (C) 2004 Nadia Yvette Chambers
*/
#include <linux/ring_buffer.h>
-#include <generated/utsrelease.h>
+#include <linux/utsname.h>
#include <linux/stacktrace.h>
#include <linux/writeback.h>
#include <linux/kallsyms.h>
@@ -39,7 +39,6 @@
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/panic_notifier.h>
-#include <linux/kmemleak.h>
#include <linux/poll.h>
#include <linux/nmi.h>
#include <linux/fs.h>
@@ -105,7 +104,7 @@ dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
* tracing is active, only save the comm when a trace event
* occurred.
*/
-static DEFINE_PER_CPU(bool, trace_taskinfo_save);
+DEFINE_PER_CPU(bool, trace_taskinfo_save);
/*
* Kill all tracing for good (never come back).
@@ -131,9 +130,12 @@ cpumask_var_t __read_mostly tracing_buffer_mask;
* /proc/sys/kernel/ftrace_dump_on_oops
* Set 1 if you want to dump buffers of all CPUs
* Set 2 if you want to dump the buffer of the CPU that triggered oops
+ * Set instance name if you want to dump the specific trace instance
+ * Multiple instance dump is also supported, and instances are seperated
+ * by commas.
*/
-
-enum ftrace_dump_mode ftrace_dump_on_oops;
+/* Set to string format zero to disable by default */
+char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
/* When set, tracing will stop when a WARN*() is hit */
int __disable_trace_on_warning;
@@ -179,7 +181,6 @@ static void ftrace_trace_userstack(struct trace_array *tr,
struct trace_buffer *buffer,
unsigned int trace_ctx);
-#define MAX_TRACER_SIZE 100
static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
static char *default_bootup_tracer;
@@ -202,19 +203,33 @@ static int __init set_cmdline_ftrace(char *str)
}
__setup("ftrace=", set_cmdline_ftrace);
+int ftrace_dump_on_oops_enabled(void)
+{
+ if (!strcmp("0", ftrace_dump_on_oops))
+ return 0;
+ else
+ return 1;
+}
+
static int __init set_ftrace_dump_on_oops(char *str)
{
- if (*str++ != '=' || !*str || !strcmp("1", str)) {
- ftrace_dump_on_oops = DUMP_ALL;
+ if (!*str) {
+ strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
return 1;
}
- if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
- ftrace_dump_on_oops = DUMP_ORIG;
- return 1;
- }
+ if (*str == ',') {
+ strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
+ strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
+ return 1;
+ }
- return 0;
+ if (*str++ == '=') {
+ strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
+ return 1;
+ }
+
+ return 0;
}
__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
@@ -1301,6 +1316,50 @@ static void free_snapshot(struct trace_array *tr)
tr->allocated_snapshot = false;
}
+static int tracing_arm_snapshot_locked(struct trace_array *tr)
+{
+ int ret;
+
+ lockdep_assert_held(&trace_types_lock);
+
+ spin_lock(&tr->snapshot_trigger_lock);
+ if (tr->snapshot == UINT_MAX) {
+ spin_unlock(&tr->snapshot_trigger_lock);
+ return -EBUSY;
+ }
+
+ tr->snapshot++;
+ spin_unlock(&tr->snapshot_trigger_lock);
+
+ ret = tracing_alloc_snapshot_instance(tr);
+ if (ret) {
+ spin_lock(&tr->snapshot_trigger_lock);
+ tr->snapshot--;
+ spin_unlock(&tr->snapshot_trigger_lock);
+ }
+
+ return ret;
+}
+
+int tracing_arm_snapshot(struct trace_array *tr)
+{
+ int ret;
+
+ mutex_lock(&trace_types_lock);
+ ret = tracing_arm_snapshot_locked(tr);
+ mutex_unlock(&trace_types_lock);
+
+ return ret;
+}
+
+void tracing_disarm_snapshot(struct trace_array *tr)
+{
+ spin_lock(&tr->snapshot_trigger_lock);
+ if (!WARN_ON(!tr->snapshot))
+ tr->snapshot--;
+ spin_unlock(&tr->snapshot_trigger_lock);
+}
+
/**
* tracing_alloc_snapshot - allocate snapshot buffer.
*
@@ -1374,10 +1433,6 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
mutex_lock(&trace_types_lock);
- ret = tracing_alloc_snapshot_instance(tr);
- if (ret)
- goto fail_unlock;
-
if (tr->current_trace->use_max_tr) {
ret = -EBUSY;
goto fail_unlock;
@@ -1396,6 +1451,10 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
goto fail_unlock;
}
+ ret = tracing_arm_snapshot_locked(tr);
+ if (ret)
+ goto fail_unlock;
+
local_irq_disable();
arch_spin_lock(&tr->max_lock);
tr->cond_snapshot = cond_snapshot;
@@ -1440,6 +1499,8 @@ int tracing_snapshot_cond_disable(struct trace_array *tr)
arch_spin_unlock(&tr->max_lock);
local_irq_enable();
+ tracing_disarm_snapshot(tr);
+
return ret;
}
EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
@@ -1482,6 +1543,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr)
}
EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
#define free_snapshot(tr) do { } while (0)
+#define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
#endif /* CONFIG_TRACER_SNAPSHOT */
void tracer_tracing_off(struct trace_array *tr)
@@ -1955,15 +2017,36 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
#endif /* CONFIG_TRACER_MAX_TRACE */
+struct pipe_wait {
+ struct trace_iterator *iter;
+ int wait_index;
+};
+
+static bool wait_pipe_cond(void *data)
+{
+ struct pipe_wait *pwait = data;
+ struct trace_iterator *iter = pwait->iter;
+
+ if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
+ return true;
+
+ return iter->closed;
+}
+
static int wait_on_pipe(struct trace_iterator *iter, int full)
{
+ struct pipe_wait pwait;
int ret;
/* Iterators are static, they should be filled or empty */
if (trace_buffer_iter(iter, iter->cpu_file))
return 0;
- ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
+ pwait.wait_index = atomic_read_acquire(&iter->wait_index);
+ pwait.iter = iter;
+
+ ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
+ wait_pipe_cond, &pwait);
#ifdef CONFIG_TRACER_MAX_TRACE
/*
@@ -2299,98 +2382,6 @@ void tracing_reset_all_online_cpus(void)
mutex_unlock(&trace_types_lock);
}
-/*
- * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
- * is the tgid last observed corresponding to pid=i.
- */
-static int *tgid_map;
-
-/* The maximum valid index into tgid_map. */
-static size_t tgid_map_max;
-
-#define SAVED_CMDLINES_DEFAULT 128
-#define NO_CMDLINE_MAP UINT_MAX
-/*
- * Preemption must be disabled before acquiring trace_cmdline_lock.
- * The various trace_arrays' max_lock must be acquired in a context
- * where interrupt is disabled.
- */
-static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-struct saved_cmdlines_buffer {
- unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
- unsigned *map_cmdline_to_pid;
- unsigned cmdline_num;
- int cmdline_idx;
- char saved_cmdlines[];
-};
-static struct saved_cmdlines_buffer *savedcmd;
-
-static inline char *get_saved_cmdlines(int idx)
-{
- return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
-}
-
-static inline void set_cmdline(int idx, const char *cmdline)
-{
- strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
-}
-
-static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
-{
- int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
-
- kfree(s->map_cmdline_to_pid);
- kmemleak_free(s);
- free_pages((unsigned long)s, order);
-}
-
-static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
-{
- struct saved_cmdlines_buffer *s;
- struct page *page;
- int orig_size, size;
- int order;
-
- /* Figure out how much is needed to hold the given number of cmdlines */
- orig_size = sizeof(*s) + val * TASK_COMM_LEN;
- order = get_order(orig_size);
- size = 1 << (order + PAGE_SHIFT);
- page = alloc_pages(GFP_KERNEL, order);
- if (!page)
- return NULL;
-
- s = page_address(page);
- kmemleak_alloc(s, size, 1, GFP_KERNEL);
- memset(s, 0, sizeof(*s));
-
- /* Round up to actual allocation */
- val = (size - sizeof(*s)) / TASK_COMM_LEN;
- s->cmdline_num = val;
-
- s->map_cmdline_to_pid = kmalloc_array(val,
- sizeof(*s->map_cmdline_to_pid),
- GFP_KERNEL);
- if (!s->map_cmdline_to_pid) {
- free_saved_cmdlines_buffer(s);
- return NULL;
- }
-
- s->cmdline_idx = 0;
- memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
- sizeof(s->map_pid_to_cmdline));
- memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
- val * sizeof(*s->map_cmdline_to_pid));
-
- return s;
-}
-
-static int trace_create_savedcmd(void)
-{
- savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
-
- return savedcmd ? 0 : -ENOMEM;
-}
-
int is_tracing_stopped(void)
{
return global_trace.stop_count;
@@ -2483,201 +2474,6 @@ void tracing_stop(void)
return tracing_stop_tr(&global_trace);
}
-static int trace_save_cmdline(struct task_struct *tsk)
-{
- unsigned tpid, idx;
-
- /* treat recording of idle task as a success */
- if (!tsk->pid)
- return 1;
-
- tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
-
- /*
- * It's not the end of the world if we don't get
- * the lock, but we also don't want to spin
- * nor do we want to disable interrupts,
- * so if we miss here, then better luck next time.
- *
- * This is called within the scheduler and wake up, so interrupts
- * had better been disabled and run queue lock been held.
- */
- lockdep_assert_preemption_disabled();
- if (!arch_spin_trylock(&trace_cmdline_lock))
- return 0;
-
- idx = savedcmd->map_pid_to_cmdline[tpid];
- if (idx == NO_CMDLINE_MAP) {
- idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
-
- savedcmd->map_pid_to_cmdline[tpid] = idx;
- savedcmd->cmdline_idx = idx;
- }
-
- savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
- set_cmdline(idx, tsk->comm);
-
- arch_spin_unlock(&trace_cmdline_lock);
-
- return 1;
-}
-
-static void __trace_find_cmdline(int pid, char comm[])
-{
- unsigned map;
- int tpid;
-
- if (!pid) {
- strcpy(comm, "<idle>");
- return;
- }
-
- if (WARN_ON_ONCE(pid < 0)) {
- strcpy(comm, "<XXX>");
- return;
- }
-
- tpid = pid & (PID_MAX_DEFAULT - 1);
- map = savedcmd->map_pid_to_cmdline[tpid];
- if (map != NO_CMDLINE_MAP) {
- tpid = savedcmd->map_cmdline_to_pid[map];
- if (tpid == pid) {
- strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
- return;
- }
- }
- strcpy(comm, "<...>");
-}
-
-void trace_find_cmdline(int pid, char comm[])
-{
- preempt_disable();
- arch_spin_lock(&trace_cmdline_lock);
-
- __trace_find_cmdline(pid, comm);
-
- arch_spin_unlock(&trace_cmdline_lock);
- preempt_enable();
-}
-
-static int *trace_find_tgid_ptr(int pid)
-{
- /*
- * Pairs with the smp_store_release in set_tracer_flag() to ensure that
- * if we observe a non-NULL tgid_map then we also observe the correct
- * tgid_map_max.
- */
- int *map = smp_load_acquire(&tgid_map);
-
- if (unlikely(!map || pid > tgid_map_max))
- return NULL;
-
- return &map[pid];
-}
-
-int trace_find_tgid(int pid)
-{
- int *ptr = trace_find_tgid_ptr(pid);
-
- return ptr ? *ptr : 0;
-}
-
-static int trace_save_tgid(struct task_struct *tsk)
-{
- int *ptr;
-
- /* treat recording of idle task as a success */
- if (!tsk->pid)
- return 1;
-
- ptr = trace_find_tgid_ptr(tsk->pid);
- if (!ptr)
- return 0;
-
- *ptr = tsk->tgid;
- return 1;
-}
-
-static bool tracing_record_taskinfo_skip(int flags)
-{
- if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
- return true;
- if (!__this_cpu_read(trace_taskinfo_save))
- return true;
- return false;
-}
-
-/**
- * tracing_record_taskinfo - record the task info of a task
- *
- * @task: task to record
- * @flags: TRACE_RECORD_CMDLINE for recording comm
- * TRACE_RECORD_TGID for recording tgid
- */
-void tracing_record_taskinfo(struct task_struct *task, int flags)
-{
- bool done;
-
- if (tracing_record_taskinfo_skip(flags))
- return;
-
- /*
- * Record as much task information as possible. If some fail, continue
- * to try to record the others.
- */
- done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
- done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
-
- /* If recording any information failed, retry again soon. */
- if (!done)
- return;
-
- __this_cpu_write(trace_taskinfo_save, false);
-}
-
-/**
- * tracing_record_taskinfo_sched_switch - record task info for sched_switch
- *
- * @prev: previous task during sched_switch
- * @next: next task during sched_switch
- * @flags: TRACE_RECORD_CMDLINE for recording comm
- * TRACE_RECORD_TGID for recording tgid
- */
-void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
- struct task_struct *next, int flags)
-{
- bool done;
-
- if (tracing_record_taskinfo_skip(flags))
- return;
-
- /*
- * Record as much task information as possible. If some fail, continue
- * to try to record the others.
- */
- done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
- done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
- done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
- done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
-
- /* If recording any information failed, retry again soon. */
- if (!done)
- return;
-
- __this_cpu_write(trace_taskinfo_save, false);
-}
-
-/* Helpers to record a specific task information */
-void tracing_record_cmdline(struct task_struct *task)
-{
- tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
-}
-
-void tracing_record_tgid(struct task_struct *task)
-{
- tracing_record_taskinfo(task, TRACE_RECORD_TGID);
-}
-
/*
* Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
* overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
@@ -4368,7 +4164,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
get_total_entries(buf, &total, &entries);
seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
- name, UTS_RELEASE);
+ name, init_utsname()->release);
seq_puts(m, "# -----------------------------------"
"---------------------------------\n");
seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
@@ -5436,8 +5232,6 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
{
- int *map;
-
if ((mask == TRACE_ITER_RECORD_TGID) ||
(mask == TRACE_ITER_RECORD_CMD))
lockdep_assert_held(&event_mutex);
@@ -5460,20 +5254,8 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
trace_event_enable_cmd_record(enabled);
if (mask == TRACE_ITER_RECORD_TGID) {
- if (!tgid_map) {
- tgid_map_max = pid_max;
- map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
- GFP_KERNEL);
- /*
- * Pairs with smp_load_acquire() in
- * trace_find_tgid_ptr() to ensure that if it observes
- * the tgid_map we just allocated then it also observes
- * the corresponding tgid_map_max value.
- */
- smp_store_release(&tgid_map, map);
- }
- if (!tgid_map) {
+ if (trace_alloc_tgid_map() < 0) {
tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
return -ENOMEM;
}
@@ -5917,207 +5699,6 @@ static const struct file_operations tracing_readme_fops = {
.llseek = generic_file_llseek,
};
-static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
-{
- int pid = ++(*pos);
-
- return trace_find_tgid_ptr(pid);
-}
-
-static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
-{
- int pid = *pos;
-
- return trace_find_tgid_ptr(pid);
-}
-
-static void saved_tgids_stop(struct seq_file *m, void *v)
-{
-}
-
-static int saved_tgids_show(struct seq_file *m, void *v)
-{
- int *entry = (int *)v;
- int pid = entry - tgid_map;
- int tgid = *entry;
-
- if (tgid == 0)
- return SEQ_SKIP;
-
- seq_printf(m, "%d %d\n", pid, tgid);
- return 0;
-}
-
-static const struct seq_operations tracing_saved_tgids_seq_ops = {
- .start = saved_tgids_start,
- .stop = saved_tgids_stop,
- .next = saved_tgids_next,
- .show = saved_tgids_show,
-};
-
-static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
-{
- int ret;
-
- ret = tracing_check_open_get_tr(NULL);
- if (ret)
- return ret;
-
- return seq_open(filp, &tracing_saved_tgids_seq_ops);
-}
-
-
-static const struct file_operations tracing_saved_tgids_fops = {
- .open = tracing_saved_tgids_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
-{
- unsigned int *ptr = v;
-
- if (*pos || m->count)
- ptr++;
-
- (*pos)++;
-
- for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
- ptr++) {
- if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
- continue;
-
- return ptr;
- }
-
- return NULL;
-}
-
-static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
-{
- void *v;
- loff_t l = 0;
-
- preempt_disable();
- arch_spin_lock(&trace_cmdline_lock);
-
- v = &savedcmd->map_cmdline_to_pid[0];
- while (l <= *pos) {
- v = saved_cmdlines_next(m, v, &l);
- if (!v)
- return NULL;
- }
-
- return v;
-}
-
-static void saved_cmdlines_stop(struct seq_file *m, void *v)
-{
- arch_spin_unlock(&trace_cmdline_lock);
- preempt_enable();
-}
-
-static int saved_cmdlines_show(struct seq_file *m, void *v)
-{
- char buf[TASK_COMM_LEN];
- unsigned int *pid = v;
-
- __trace_find_cmdline(*pid, buf);
- seq_printf(m, "%d %s\n", *pid, buf);
- return 0;
-}
-
-static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
- .start = saved_cmdlines_start,
- .next = saved_cmdlines_next,
- .stop = saved_cmdlines_stop,
- .show = saved_cmdlines_show,
-};
-
-static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
-{
- int ret;
-
- ret = tracing_check_open_get_tr(NULL);
- if (ret)
- return ret;
-
- return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
-}
-
-static const struct file_operations tracing_saved_cmdlines_fops = {
- .open = tracing_saved_cmdlines_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-static ssize_t
-tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- char buf[64];
- int r;
-
- preempt_disable();
- arch_spin_lock(&trace_cmdline_lock);
- r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
- arch_spin_unlock(&trace_cmdline_lock);
- preempt_enable();
-
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-}
-
-static int tracing_resize_saved_cmdlines(unsigned int val)
-{
- struct saved_cmdlines_buffer *s, *savedcmd_temp;
-
- s = allocate_cmdlines_buffer(val);
- if (!s)
- return -ENOMEM;
-
- preempt_disable();
- arch_spin_lock(&trace_cmdline_lock);
- savedcmd_temp = savedcmd;
- savedcmd = s;
- arch_spin_unlock(&trace_cmdline_lock);
- preempt_enable();
- free_saved_cmdlines_buffer(savedcmd_temp);
-
- return 0;
-}
-
-static ssize_t
-tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- unsigned long val;
- int ret;
-
- ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
- if (ret)
- return ret;
-
- /* must have at least 1 entry or less than PID_MAX_DEFAULT */
- if (!val || val > PID_MAX_DEFAULT)
- return -EINVAL;
-
- ret = tracing_resize_saved_cmdlines((unsigned int)val);
- if (ret < 0)
- return ret;
-
- *ppos += cnt;
-
- return cnt;
-}
-
-static const struct file_operations tracing_saved_cmdlines_size_fops = {
- .open = tracing_open_generic,
- .read = tracing_saved_cmdlines_size_read,
- .write = tracing_saved_cmdlines_size_write,
-};
-
#ifdef CONFIG_TRACE_EVAL_MAP_FILE
static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item *ptr)
@@ -6594,11 +6175,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
*/
synchronize_rcu();
free_snapshot(tr);
+ tracing_disarm_snapshot(tr);
}
- if (t->use_max_tr && !tr->allocated_snapshot) {
- ret = tracing_alloc_snapshot_instance(tr);
- if (ret < 0)
+ if (!had_max_tr && t->use_max_tr) {
+ ret = tracing_arm_snapshot_locked(tr);
+ if (ret)
goto out;
}
#else
@@ -6607,8 +6189,13 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
if (t->init) {
ret = tracer_init(t, tr);
- if (ret)
+ if (ret) {
+#ifdef CONFIG_TRACER_MAX_TRACE
+ if (t->use_max_tr)
+ tracing_disarm_snapshot(tr);
+#endif
goto out;
+ }
}
tr->current_trace = t;
@@ -7292,6 +6879,8 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
return 0;
}
+#define TRACE_MARKER_MAX_SIZE 4096
+
static ssize_t
tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
@@ -7319,6 +6908,9 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if ((ssize_t)cnt < 0)
return -EINVAL;
+ if (cnt > TRACE_MARKER_MAX_SIZE)
+ cnt = TRACE_MARKER_MAX_SIZE;
+
meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
again:
size = cnt + meta_size;
@@ -7327,11 +6919,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (cnt < FAULTED_SIZE)
size += FAULTED_SIZE - cnt;
- if (size > TRACE_SEQ_BUFFER_SIZE) {
- cnt -= size - TRACE_SEQ_BUFFER_SIZE;
- goto again;
- }
-
buffer = tr->array_buffer.buffer;
event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
tracing_gen_ctx());
@@ -7710,10 +7297,11 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (tr->allocated_snapshot)
ret = resize_buffer_duplicate_size(&tr->max_buffer,
&tr->array_buffer, iter->cpu_file);
- else
- ret = tracing_alloc_snapshot_instance(tr);
- if (ret < 0)
+
+ ret = tracing_arm_snapshot_locked(tr);
+ if (ret)
break;
+
/* Now, we're going to swap */
if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
local_irq_disable();
@@ -7723,6 +7311,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
(void *)tr, 1);
}
+ tracing_disarm_snapshot(tr);
break;
default:
if (tr->allocated_snapshot) {
@@ -8392,6 +7981,20 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
return size;
}
+static int tracing_buffers_flush(struct file *file, fl_owner_t id)
+{
+ struct ftrace_buffer_info *info = file->private_data;
+ struct trace_iterator *iter = &info->iter;
+
+ iter->closed = true;
+ /* Make sure the waiters see the new wait_index */
+ (void)atomic_fetch_inc_release(&iter->wait_index);
+
+ ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
+
+ return 0;
+}
+
static int tracing_buffers_release(struct inode *inode, struct file *file)
{
struct ftrace_buffer_info *info = file->private_data;
@@ -8403,12 +8006,6 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
__trace_array_put(iter->tr);
- iter->wait_index++;
- /* Make sure the waiters see the new wait_index */
- smp_wmb();
-
- ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
-
if (info->spare)
ring_buffer_free_read_page(iter->array_buffer->buffer,
info->spare_cpu, info->spare);
@@ -8491,6 +8088,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
.spd_release = buffer_spd_release,
};
struct buffer_ref *ref;
+ bool woken = false;
int page_size;
int entries, i;
ssize_t ret = 0;
@@ -8564,17 +8162,17 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
/* did we read anything? */
if (!spd.nr_pages) {
- long wait_index;
if (ret)
goto out;
+ if (woken)
+ goto out;
+
ret = -EAGAIN;
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
goto out;
- wait_index = READ_ONCE(iter->wait_index);
-
ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
if (ret)
goto out;
@@ -8583,10 +8181,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
if (!tracer_tracing_is_on(iter->tr))
goto out;
- /* Make sure we see the new wait_index */
- smp_rmb();
- if (wait_index != iter->wait_index)
- goto out;
+ /* Iterate one more time to collect any new data then exit */
+ woken = true;
goto again;
}
@@ -8609,9 +8205,8 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned
mutex_lock(&trace_types_lock);
- iter->wait_index++;
/* Make sure the waiters see the new wait_index */
- smp_wmb();
+ (void)atomic_fetch_inc_release(&iter->wait_index);
ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
@@ -8624,6 +8219,7 @@ static const struct file_operations tracing_buffers_fops = {
.read = tracing_buffers_read,
.poll = tracing_buffers_poll,
.release = tracing_buffers_release,
+ .flush = tracing_buffers_flush,
.splice_read = tracing_buffers_splice_read,
.unlocked_ioctl = tracing_buffers_ioctl,
.llseek = no_llseek,
@@ -8847,8 +8443,13 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
- if (glob[0] == '!')
- return unregister_ftrace_function_probe_func(glob+1, tr, ops);
+ if (glob[0] == '!') {
+ ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
+ if (!ret)
+ tracing_disarm_snapshot(tr);
+
+ return ret;
+ }
if (!param)
goto out_reg;
@@ -8867,12 +8468,13 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
return ret;
out_reg:
- ret = tracing_alloc_snapshot_instance(tr);
+ ret = tracing_arm_snapshot(tr);
if (ret < 0)
goto out;
ret = register_ftrace_function_probe(glob, tr, ops, count);
-
+ if (ret < 0)
+ tracing_disarm_snapshot(tr);
out:
return ret < 0 ? ret : 0;
}
@@ -9679,7 +9281,9 @@ trace_array_create_systems(const char *name, const char *systems)
raw_spin_lock_init(&tr->start_lock);
tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
-
+#ifdef CONFIG_TRACER_MAX_TRACE
+ spin_lock_init(&tr->snapshot_trigger_lock);
+#endif
tr->current_trace = &nop_trace;
INIT_LIST_HEAD(&tr->systems);
@@ -10244,14 +9848,14 @@ static struct notifier_block trace_die_notifier = {
static int trace_die_panic_handler(struct notifier_block *self,
unsigned long ev, void *unused)
{
- if (!ftrace_dump_on_oops)
+ if (!ftrace_dump_on_oops_enabled())
return NOTIFY_DONE;
/* The die notifier requires DIE_OOPS to trigger */
if (self == &trace_die_notifier && ev != DIE_OOPS)
return NOTIFY_DONE;
- ftrace_dump(ftrace_dump_on_oops);
+ ftrace_dump(DUMP_PARAM);
return NOTIFY_DONE;
}
@@ -10292,12 +9896,12 @@ trace_printk_seq(struct trace_seq *s)
trace_seq_init(s);
}
-void trace_init_global_iter(struct trace_iterator *iter)
+static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
{
- iter->tr = &global_trace;
+ iter->tr = tr;
iter->trace = iter->tr->current_trace;
iter->cpu_file = RING_BUFFER_ALL_CPUS;
- iter->array_buffer = &global_trace.array_buffer;
+ iter->array_buffer = &tr->array_buffer;
if (iter->trace && iter->trace->open)
iter->trace->open(iter);
@@ -10317,22 +9921,19 @@ void trace_init_global_iter(struct trace_iterator *iter)
iter->fmt_size = STATIC_FMT_BUF_SIZE;
}
-void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
+void trace_init_global_iter(struct trace_iterator *iter)
+{
+ trace_init_iter(iter, &global_trace);
+}
+
+static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
{
/* use static because iter can be a bit big for the stack */
static struct trace_iterator iter;
- static atomic_t dump_running;
- struct trace_array *tr = &global_trace;
unsigned int old_userobj;
unsigned long flags;
int cnt = 0, cpu;
- /* Only allow one dump user at a time. */
- if (atomic_inc_return(&dump_running) != 1) {
- atomic_dec(&dump_running);
- return;
- }
-
/*
* Always turn off tracing when we dump.
* We don't need to show trace output of what happens
@@ -10341,12 +9942,12 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
* If the user does a sysrq-z, then they can re-enable
* tracing with echo 1 > tracing_on.
*/
- tracing_off();
+ tracer_tracing_off(tr);
local_irq_save(flags);
/* Simulate the iterator */
- trace_init_global_iter(&iter);
+ trace_init_iter(&iter, tr);
for_each_tracing_cpu(cpu) {
atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
@@ -10357,21 +9958,15 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
/* don't look at user memory in panic mode */
tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
- switch (oops_dump_mode) {
- case DUMP_ALL:
- iter.cpu_file = RING_BUFFER_ALL_CPUS;
- break;
- case DUMP_ORIG:
+ if (dump_mode == DUMP_ORIG)
iter.cpu_file = raw_smp_processor_id();
- break;
- case DUMP_NONE:
- goto out_enable;
- default:
- printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
+ else
iter.cpu_file = RING_BUFFER_ALL_CPUS;
- }
- printk(KERN_TRACE "Dumping ftrace buffer:\n");
+ if (tr == &global_trace)
+ printk(KERN_TRACE "Dumping ftrace buffer:\n");
+ else
+ printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
/* Did function tracer already get disabled? */
if (ftrace_is_dead()) {
@@ -10413,15 +10008,84 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
else
printk(KERN_TRACE "---------------------------------\n");
- out_enable:
tr->trace_flags |= old_userobj;
for_each_tracing_cpu(cpu) {
atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
}
- atomic_dec(&dump_running);
local_irq_restore(flags);
}
+
+static void ftrace_dump_by_param(void)
+{
+ bool first_param = true;
+ char dump_param[MAX_TRACER_SIZE];
+ char *buf, *token, *inst_name;
+ struct trace_array *tr;
+
+ strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
+ buf = dump_param;
+
+ while ((token = strsep(&buf, ",")) != NULL) {
+ if (first_param) {
+ first_param = false;
+ if (!strcmp("0", token))
+ continue;
+ else if (!strcmp("1", token)) {
+ ftrace_dump_one(&global_trace, DUMP_ALL);
+ continue;
+ }
+ else if (!strcmp("2", token) ||
+ !strcmp("orig_cpu", token)) {
+ ftrace_dump_one(&global_trace, DUMP_ORIG);
+ continue;
+ }
+ }
+
+ inst_name = strsep(&token, "=");
+ tr = trace_array_find(inst_name);
+ if (!tr) {
+ printk(KERN_TRACE "Instance %s not found\n", inst_name);
+ continue;
+ }
+
+ if (token && (!strcmp("2", token) ||
+ !strcmp("orig_cpu", token)))
+ ftrace_dump_one(tr, DUMP_ORIG);
+ else
+ ftrace_dump_one(tr, DUMP_ALL);
+ }
+}
+
+void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
+{
+ static atomic_t dump_running;
+
+ /* Only allow one dump user at a time. */
+ if (atomic_inc_return(&dump_running) != 1) {
+ atomic_dec(&dump_running);
+ return;
+ }
+
+ switch (oops_dump_mode) {
+ case DUMP_ALL:
+ ftrace_dump_one(&global_trace, DUMP_ALL);
+ break;
+ case DUMP_ORIG:
+ ftrace_dump_one(&global_trace, DUMP_ORIG);
+ break;
+ case DUMP_PARAM:
+ ftrace_dump_by_param();
+ break;
+ case DUMP_NONE:
+ break;
+ default:
+ printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
+ ftrace_dump_one(&global_trace, DUMP_ALL);
+ }
+
+ atomic_dec(&dump_running);
+}
EXPORT_SYMBOL_GPL(ftrace_dump);
#define WRITE_BUFSIZE 4096
@@ -10649,7 +10313,9 @@ __init static int tracer_alloc_buffers(void)
global_trace.current_trace = &nop_trace;
global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
-
+#ifdef CONFIG_TRACER_MAX_TRACE
+ spin_lock_init(&global_trace.snapshot_trigger_lock);
+#endif
ftrace_init_global_array_ops(&global_trace);
init_trace_flags_index(&global_trace);
@@ -10686,7 +10352,7 @@ __init static int tracer_alloc_buffers(void)
out_free_pipe_cpumask:
free_cpumask_var(global_trace.pipe_cpumask);
out_free_savedcmd:
- free_saved_cmdlines_buffer(savedcmd);
+ trace_free_saved_cmdlines_buffer();
out_free_temp_buffer:
ring_buffer_free(temp_buffer);
out_rm_hp_state:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 00f873910c5d..64450615ca0c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -334,8 +334,8 @@ struct trace_array {
*/
struct array_buffer max_buffer;
bool allocated_snapshot;
-#endif
-#ifdef CONFIG_TRACER_MAX_TRACE
+ spinlock_t snapshot_trigger_lock;
+ unsigned int snapshot;
unsigned long max_latency;
#ifdef CONFIG_FSNOTIFY
struct dentry *d_max_latency;
@@ -1375,6 +1375,16 @@ static inline void trace_buffer_unlock_commit(struct trace_array *tr,
trace_buffer_unlock_commit_regs(tr, buffer, event, trace_ctx, NULL);
}
+DECLARE_PER_CPU(bool, trace_taskinfo_save);
+int trace_save_cmdline(struct task_struct *tsk);
+int trace_create_savedcmd(void);
+int trace_alloc_tgid_map(void);
+void trace_free_saved_cmdlines_buffer(void);
+
+extern const struct file_operations tracing_saved_cmdlines_fops;
+extern const struct file_operations tracing_saved_tgids_fops;
+extern const struct file_operations tracing_saved_cmdlines_size_fops;
+
DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
DECLARE_PER_CPU(int, trace_buffered_event_cnt);
void trace_buffered_event_disable(void);
@@ -1973,12 +1983,16 @@ static inline void trace_event_eval_update(struct trace_eval_map **map, int len)
#ifdef CONFIG_TRACER_SNAPSHOT
void tracing_snapshot_instance(struct trace_array *tr);
int tracing_alloc_snapshot_instance(struct trace_array *tr);
+int tracing_arm_snapshot(struct trace_array *tr);
+void tracing_disarm_snapshot(struct trace_array *tr);
#else
static inline void tracing_snapshot_instance(struct trace_array *tr) { }
static inline int tracing_alloc_snapshot_instance(struct trace_array *tr)
{
return 0;
}
+static inline int tracing_arm_snapshot(struct trace_array *tr) { return 0; }
+static inline void tracing_disarm_snapshot(struct trace_array *tr) { }
#endif
#ifdef CONFIG_PREEMPT_TRACER
diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
index 54d5fa35c90a..811b08439406 100644
--- a/kernel/trace/trace_benchmark.c
+++ b/kernel/trace/trace_benchmark.c
@@ -92,7 +92,6 @@ static void trace_do_benchmark(void)
bm_total += delta;
bm_totalsq += delta * delta;
-
if (bm_cnt > 1) {
/*
* Apply Welford's method to calculate standard deviation:
@@ -105,7 +104,7 @@ static void trace_do_benchmark(void)
stddev = 0;
delta = bm_total;
- do_div(delta, bm_cnt);
+ delta = div64_u64(delta, bm_cnt);
avg = delta;
if (stddev > 0) {
@@ -127,7 +126,7 @@ static void trace_do_benchmark(void)
seed = stddev;
if (!last_seed)
break;
- do_div(seed, last_seed);
+ seed = div64_u64(seed, last_seed);
seed += last_seed;
do_div(seed, 2);
} while (i++ < 10 && last_seed != seed);
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index b33c3861fbbb..4bec043c8690 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -597,20 +597,12 @@ out:
return ret;
}
-/**
- * unregister_trigger - Generic event_command @unreg implementation
- * @glob: The raw string used to register the trigger
- * @test: Trigger-specific data used to find the trigger to remove
- * @file: The trace_event_file associated with the event
- *
- * Common implementation for event trigger unregistration.
- *
- * Usually used directly as the @unreg method in event command
- * implementations.
+/*
+ * True if the trigger was found and unregistered, else false.
*/
-static void unregister_trigger(char *glob,
- struct event_trigger_data *test,
- struct trace_event_file *file)
+static bool try_unregister_trigger(char *glob,
+ struct event_trigger_data *test,
+ struct trace_event_file *file)
{
struct event_trigger_data *data = NULL, *iter;
@@ -626,8 +618,32 @@ static void unregister_trigger(char *glob,
}
}
- if (data && data->ops->free)
- data->ops->free(data);
+ if (data) {
+ if (data->ops->free)
+ data->ops->free(data);
+
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * unregister_trigger - Generic event_command @unreg implementation
+ * @glob: The raw string used to register the trigger
+ * @test: Trigger-specific data used to find the trigger to remove
+ * @file: The trace_event_file associated with the event
+ *
+ * Common implementation for event trigger unregistration.
+ *
+ * Usually used directly as the @unreg method in event command
+ * implementations.
+ */
+static void unregister_trigger(char *glob,
+ struct event_trigger_data *test,
+ struct trace_event_file *file)
+{
+ try_unregister_trigger(glob, test, file);
}
/*
@@ -1470,12 +1486,23 @@ register_snapshot_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
- int ret = tracing_alloc_snapshot_instance(file->tr);
+ int ret = tracing_arm_snapshot(file->tr);
if (ret < 0)
return ret;
- return register_trigger(glob, data, file);
+ ret = register_trigger(glob, data, file);
+ if (ret < 0)
+ tracing_disarm_snapshot(file->tr);
+ return ret;
+}
+
+static void unregister_snapshot_trigger(char *glob,
+ struct event_trigger_data *data,
+ struct trace_event_file *file)
+{
+ if (try_unregister_trigger(glob, data, file))
+ tracing_disarm_snapshot(file->tr);
}
static int
@@ -1510,7 +1537,7 @@ static struct event_command trigger_snapshot_cmd = {
.trigger_type = ETT_SNAPSHOT,
.parse = event_trigger_parse,
.reg = register_snapshot_trigger,
- .unreg = unregister_trigger,
+ .unreg = unregister_snapshot_trigger,
.get_trigger_ops = snapshot_get_trigger_ops,
.set_filter = set_trigger_filter,
};
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index e76f5e1efdf2..70d428c394b6 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -34,7 +34,8 @@
/* Limit how long of an event name plus args within the subsystem. */
#define MAX_EVENT_DESC 512
-#define EVENT_NAME(user_event) ((user_event)->tracepoint.name)
+#define EVENT_NAME(user_event) ((user_event)->reg_name)
+#define EVENT_TP_NAME(user_event) ((user_event)->tracepoint.name)
#define MAX_FIELD_ARRAY_SIZE 1024
/*
@@ -54,10 +55,13 @@
* allows isolation for events by various means.
*/
struct user_event_group {
- char *system_name;
- struct hlist_node node;
- struct mutex reg_mutex;
+ char *system_name;
+ char *system_multi_name;
+ struct hlist_node node;
+ struct mutex reg_mutex;
DECLARE_HASHTABLE(register_table, 8);
+ /* ID that moves forward within the group for multi-event names */
+ u64 multi_id;
};
/* Group for init_user_ns mapping, top-most group */
@@ -78,6 +82,7 @@ static unsigned int current_user_events;
*/
struct user_event {
struct user_event_group *group;
+ char *reg_name;
struct tracepoint tracepoint;
struct trace_event_call call;
struct trace_event_class class;
@@ -127,6 +132,8 @@ struct user_event_enabler {
#define ENABLE_BIT(e) ((int)((e)->values & ENABLE_VAL_BIT_MASK))
+#define EVENT_MULTI_FORMAT(f) ((f) & USER_EVENT_REG_MULTI_FORMAT)
+
/* Used for asynchronous faulting in of pages */
struct user_event_enabler_fault {
struct work_struct work;
@@ -202,6 +209,8 @@ static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm);
static struct user_event_mm *user_event_mm_get_all(struct user_event *user);
static void user_event_mm_put(struct user_event_mm *mm);
static int destroy_user_event(struct user_event *user);
+static bool user_fields_match(struct user_event *user, int argc,
+ const char **argv);
static u32 user_event_key(char *name)
{
@@ -328,6 +337,7 @@ out:
static void user_event_group_destroy(struct user_event_group *group)
{
kfree(group->system_name);
+ kfree(group->system_multi_name);
kfree(group);
}
@@ -346,6 +356,11 @@ static char *user_event_group_system_name(void)
return system_name;
}
+static char *user_event_group_system_multi_name(void)
+{
+ return kstrdup(USER_EVENTS_MULTI_SYSTEM, GFP_KERNEL);
+}
+
static struct user_event_group *current_user_event_group(void)
{
return init_group;
@@ -365,6 +380,11 @@ static struct user_event_group *user_event_group_create(void)
if (!group->system_name)
goto error;
+ group->system_multi_name = user_event_group_system_multi_name();
+
+ if (!group->system_multi_name)
+ goto error;
+
mutex_init(&group->reg_mutex);
hash_init(group->register_table);
@@ -1480,6 +1500,11 @@ static int destroy_user_event(struct user_event *user)
hash_del(&user->node);
user_event_destroy_validators(user);
+
+ /* If we have different names, both must be freed */
+ if (EVENT_NAME(user) != EVENT_TP_NAME(user))
+ kfree(EVENT_TP_NAME(user));
+
kfree(user->call.print_fmt);
kfree(EVENT_NAME(user));
kfree(user);
@@ -1493,17 +1518,36 @@ static int destroy_user_event(struct user_event *user)
}
static struct user_event *find_user_event(struct user_event_group *group,
- char *name, u32 *outkey)
+ char *name, int argc, const char **argv,
+ u32 flags, u32 *outkey)
{
struct user_event *user;
u32 key = user_event_key(name);
*outkey = key;
- hash_for_each_possible(group->register_table, user, node, key)
- if (!strcmp(EVENT_NAME(user), name))
+ hash_for_each_possible(group->register_table, user, node, key) {
+ /*
+ * Single-format events shouldn't return multi-format
+ * events. Callers expect the underlying tracepoint to match
+ * the name exactly in these cases. Only check like-formats.
+ */
+ if (EVENT_MULTI_FORMAT(flags) != EVENT_MULTI_FORMAT(user->reg_flags))
+ continue;
+
+ if (strcmp(EVENT_NAME(user), name))
+ continue;
+
+ if (user_fields_match(user, argc, argv))
return user_event_get(user);
+ /* Scan others if this is a multi-format event */
+ if (EVENT_MULTI_FORMAT(flags))
+ continue;
+
+ return ERR_PTR(-EADDRINUSE);
+ }
+
return NULL;
}
@@ -1860,6 +1904,9 @@ static bool user_fields_match(struct user_event *user, int argc,
struct list_head *head = &user->fields;
int i = 0;
+ if (argc == 0)
+ return list_empty(head);
+
list_for_each_entry_reverse(field, head, link) {
if (!user_field_match(field, argc, argv, &i))
return false;
@@ -1877,13 +1924,15 @@ static bool user_event_match(const char *system, const char *event,
struct user_event *user = container_of(ev, struct user_event, devent);
bool match;
- match = strcmp(EVENT_NAME(user), event) == 0 &&
- (!system || strcmp(system, USER_EVENTS_SYSTEM) == 0);
+ match = strcmp(EVENT_NAME(user), event) == 0;
+
+ if (match && system) {
+ match = strcmp(system, user->group->system_name) == 0 ||
+ strcmp(system, user->group->system_multi_name) == 0;
+ }
- if (match && argc > 0)
+ if (match)
match = user_fields_match(user, argc, argv);
- else if (match && argc == 0)
- match = list_empty(&user->fields);
return match;
}
@@ -1913,6 +1962,33 @@ static int user_event_trace_register(struct user_event *user)
return ret;
}
+static int user_event_set_tp_name(struct user_event *user)
+{
+ lockdep_assert_held(&user->group->reg_mutex);
+
+ if (EVENT_MULTI_FORMAT(user->reg_flags)) {
+ char *multi_name;
+
+ multi_name = kasprintf(GFP_KERNEL_ACCOUNT, "%s.%llx",
+ user->reg_name, user->group->multi_id);
+
+ if (!multi_name)
+ return -ENOMEM;
+
+ user->call.name = multi_name;
+ user->tracepoint.name = multi_name;
+
+ /* Inc to ensure unique multi-event name next time */
+ user->group->multi_id++;
+ } else {
+ /* Non Multi-format uses register name */
+ user->call.name = user->reg_name;
+ user->tracepoint.name = user->reg_name;
+ }
+
+ return 0;
+}
+
/*
* Parses the event name, arguments and flags then registers if successful.
* The name buffer lifetime is owned by this method for success cases only.
@@ -1922,11 +1998,11 @@ static int user_event_parse(struct user_event_group *group, char *name,
char *args, char *flags,
struct user_event **newuser, int reg_flags)
{
- int ret;
- u32 key;
struct user_event *user;
+ char **argv = NULL;
int argc = 0;
- char **argv;
+ int ret;
+ u32 key;
/* Currently don't support any text based flags */
if (flags != NULL)
@@ -1935,41 +2011,34 @@ static int user_event_parse(struct user_event_group *group, char *name,
if (!user_event_capable(reg_flags))
return -EPERM;
+ if (args) {
+ argv = argv_split(GFP_KERNEL, args, &argc);
+
+ if (!argv)
+ return -ENOMEM;
+ }
+
/* Prevent dyn_event from racing */
mutex_lock(&event_mutex);
- user = find_user_event(group, name, &key);
+ user = find_user_event(group, name, argc, (const char **)argv,
+ reg_flags, &key);
mutex_unlock(&event_mutex);
- if (user) {
- if (args) {
- argv = argv_split(GFP_KERNEL, args, &argc);
- if (!argv) {
- ret = -ENOMEM;
- goto error;
- }
+ if (argv)
+ argv_free(argv);
- ret = user_fields_match(user, argc, (const char **)argv);
- argv_free(argv);
-
- } else
- ret = list_empty(&user->fields);
-
- if (ret) {
- *newuser = user;
- /*
- * Name is allocated by caller, free it since it already exists.
- * Caller only worries about failure cases for freeing.
- */
- kfree(name);
- } else {
- ret = -EADDRINUSE;
- goto error;
- }
+ if (IS_ERR(user))
+ return PTR_ERR(user);
+
+ if (user) {
+ *newuser = user;
+ /*
+ * Name is allocated by caller, free it since it already exists.
+ * Caller only worries about failure cases for freeing.
+ */
+ kfree(name);
return 0;
-error:
- user_event_put(user, false);
- return ret;
}
user = kzalloc(sizeof(*user), GFP_KERNEL_ACCOUNT);
@@ -1982,7 +2051,13 @@ error:
INIT_LIST_HEAD(&user->validators);
user->group = group;
- user->tracepoint.name = name;
+ user->reg_name = name;
+ user->reg_flags = reg_flags;
+
+ ret = user_event_set_tp_name(user);
+
+ if (ret)
+ goto put_user;
ret = user_event_parse_fields(user, args);
@@ -1996,11 +2071,14 @@ error:
user->call.data = user;
user->call.class = &user->class;
- user->call.name = name;
user->call.flags = TRACE_EVENT_FL_TRACEPOINT;
user->call.tp = &user->tracepoint;
user->call.event.funcs = &user_event_funcs;
- user->class.system = group->system_name;
+
+ if (EVENT_MULTI_FORMAT(user->reg_flags))
+ user->class.system = group->system_multi_name;
+ else
+ user->class.system = group->system_name;
user->class.fields_array = user_event_fields_array;
user->class.get_fields = user_event_get_fields;
@@ -2022,8 +2100,6 @@ error:
if (ret)
goto put_user_lock;
- user->reg_flags = reg_flags;
-
if (user->reg_flags & USER_EVENT_REG_PERSIST) {
/* Ensure we track self ref and caller ref (2) */
refcount_set(&user->refcnt, 2);
@@ -2047,30 +2123,43 @@ put_user:
user_event_destroy_fields(user);
user_event_destroy_validators(user);
kfree(user->call.print_fmt);
+
+ /* Caller frees reg_name on error, but not multi-name */
+ if (EVENT_NAME(user) != EVENT_TP_NAME(user))
+ kfree(EVENT_TP_NAME(user));
+
kfree(user);
return ret;
}
/*
- * Deletes a previously created event if it is no longer being used.
+ * Deletes previously created events if they are no longer being used.
*/
static int delete_user_event(struct user_event_group *group, char *name)
{
- u32 key;
- struct user_event *user = find_user_event(group, name, &key);
+ struct user_event *user;
+ struct hlist_node *tmp;
+ u32 key = user_event_key(name);
+ int ret = -ENOENT;
- if (!user)
- return -ENOENT;
+ /* Attempt to delete all event(s) with the name passed in */
+ hash_for_each_possible_safe(group->register_table, user, tmp, node, key) {
+ if (strcmp(EVENT_NAME(user), name))
+ continue;
- user_event_put(user, true);
+ if (!user_event_last_ref(user))
+ return -EBUSY;
- if (!user_event_last_ref(user))
- return -EBUSY;
+ if (!user_event_capable(user->reg_flags))
+ return -EPERM;
- if (!user_event_capable(user->reg_flags))
- return -EPERM;
+ ret = destroy_user_event(user);
- return destroy_user_event(user);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
}
/*
@@ -2628,7 +2717,7 @@ static int user_seq_show(struct seq_file *m, void *p)
hash_for_each(group->register_table, i, user, node) {
status = user->status;
- seq_printf(m, "%s", EVENT_NAME(user));
+ seq_printf(m, "%s", EVENT_TP_NAME(user));
if (status != 0)
seq_puts(m, " #");
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3e7fa44dc2b2..d8b302d01083 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1587,12 +1587,11 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter,
{
struct print_entry *field;
struct trace_seq *s = &iter->seq;
- int max = iter->ent_size - offsetof(struct print_entry, buf);
trace_assign_type(field, iter->ent);
seq_print_ip_sym(s, field->ip, flags);
- trace_seq_printf(s, ": %.*s", max, field->buf);
+ trace_seq_printf(s, ": %s", field->buf);
return trace_handle_return(s);
}
@@ -1601,11 +1600,10 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct print_entry *field;
- int max = iter->ent_size - offsetof(struct print_entry, buf);
trace_assign_type(field, iter->ent);
- trace_seq_printf(&iter->seq, "# %lx %.*s", field->ip, max, field->buf);
+ trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf);
return trace_handle_return(&iter->seq);
}
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index c9ffdcfe622e..8a407adb0e1c 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
+#include <linux/kmemleak.h>
#include <linux/ftrace.h>
#include <trace/events/sched.h>
@@ -148,3 +149,517 @@ void tracing_stop_tgid_record(void)
{
tracing_stop_sched_switch(RECORD_TGID);
}
+
+/*
+ * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
+ * is the tgid last observed corresponding to pid=i.
+ */
+static int *tgid_map;
+
+/* The maximum valid index into tgid_map. */
+static size_t tgid_map_max;
+
+#define SAVED_CMDLINES_DEFAULT 128
+#define NO_CMDLINE_MAP UINT_MAX
+/*
+ * Preemption must be disabled before acquiring trace_cmdline_lock.
+ * The various trace_arrays' max_lock must be acquired in a context
+ * where interrupt is disabled.
+ */
+static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+struct saved_cmdlines_buffer {
+ unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
+ unsigned *map_cmdline_to_pid;
+ unsigned cmdline_num;
+ int cmdline_idx;
+ char saved_cmdlines[];
+};
+static struct saved_cmdlines_buffer *savedcmd;
+
+/* Holds the size of a cmdline and pid element */
+#define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s) \
+ (TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0]))
+
+static inline char *get_saved_cmdlines(int idx)
+{
+ return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
+}
+
+static inline void set_cmdline(int idx, const char *cmdline)
+{
+ strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
+}
+
+static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
+{
+ int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
+
+ kmemleak_free(s);
+ free_pages((unsigned long)s, order);
+}
+
+static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
+{
+ struct saved_cmdlines_buffer *s;
+ struct page *page;
+ int orig_size, size;
+ int order;
+
+ /* Figure out how much is needed to hold the given number of cmdlines */
+ orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
+ order = get_order(orig_size);
+ size = 1 << (order + PAGE_SHIFT);
+ page = alloc_pages(GFP_KERNEL, order);
+ if (!page)
+ return NULL;
+
+ s = page_address(page);
+ kmemleak_alloc(s, size, 1, GFP_KERNEL);
+ memset(s, 0, sizeof(*s));
+
+ /* Round up to actual allocation */
+ val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
+ s->cmdline_num = val;
+
+ /* Place map_cmdline_to_pid array right after saved_cmdlines */
+ s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN];
+
+ s->cmdline_idx = 0;
+ memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
+ sizeof(s->map_pid_to_cmdline));
+ memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
+ val * sizeof(*s->map_cmdline_to_pid));
+
+ return s;
+}
+
+int trace_create_savedcmd(void)
+{
+ savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
+
+ return savedcmd ? 0 : -ENOMEM;
+}
+
+int trace_save_cmdline(struct task_struct *tsk)
+{
+ unsigned tpid, idx;
+
+ /* treat recording of idle task as a success */
+ if (!tsk->pid)
+ return 1;
+
+ tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
+
+ /*
+ * It's not the end of the world if we don't get
+ * the lock, but we also don't want to spin
+ * nor do we want to disable interrupts,
+ * so if we miss here, then better luck next time.
+ *
+ * This is called within the scheduler and wake up, so interrupts
+ * had better been disabled and run queue lock been held.
+ */
+ lockdep_assert_preemption_disabled();
+ if (!arch_spin_trylock(&trace_cmdline_lock))
+ return 0;
+
+ idx = savedcmd->map_pid_to_cmdline[tpid];
+ if (idx == NO_CMDLINE_MAP) {
+ idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
+
+ savedcmd->map_pid_to_cmdline[tpid] = idx;
+ savedcmd->cmdline_idx = idx;
+ }
+
+ savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
+ set_cmdline(idx, tsk->comm);
+
+ arch_spin_unlock(&trace_cmdline_lock);
+
+ return 1;
+}
+
+static void __trace_find_cmdline(int pid, char comm[])
+{
+ unsigned map;
+ int tpid;
+
+ if (!pid) {
+ strcpy(comm, "<idle>");
+ return;
+ }
+
+ if (WARN_ON_ONCE(pid < 0)) {
+ strcpy(comm, "<XXX>");
+ return;
+ }
+
+ tpid = pid & (PID_MAX_DEFAULT - 1);
+ map = savedcmd->map_pid_to_cmdline[tpid];
+ if (map != NO_CMDLINE_MAP) {
+ tpid = savedcmd->map_cmdline_to_pid[map];
+ if (tpid == pid) {
+ strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
+ return;
+ }
+ }
+ strcpy(comm, "<...>");
+}
+
+void trace_find_cmdline(int pid, char comm[])
+{
+ preempt_disable();
+ arch_spin_lock(&trace_cmdline_lock);
+
+ __trace_find_cmdline(pid, comm);
+
+ arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
+}
+
+static int *trace_find_tgid_ptr(int pid)
+{
+ /*
+ * Pairs with the smp_store_release in set_tracer_flag() to ensure that
+ * if we observe a non-NULL tgid_map then we also observe the correct
+ * tgid_map_max.
+ */
+ int *map = smp_load_acquire(&tgid_map);
+
+ if (unlikely(!map || pid > tgid_map_max))
+ return NULL;
+
+ return &map[pid];
+}
+
+int trace_find_tgid(int pid)
+{
+ int *ptr = trace_find_tgid_ptr(pid);
+
+ return ptr ? *ptr : 0;
+}
+
+static int trace_save_tgid(struct task_struct *tsk)
+{
+ int *ptr;
+
+ /* treat recording of idle task as a success */
+ if (!tsk->pid)
+ return 1;
+
+ ptr = trace_find_tgid_ptr(tsk->pid);
+ if (!ptr)
+ return 0;
+
+ *ptr = tsk->tgid;
+ return 1;
+}
+
+static bool tracing_record_taskinfo_skip(int flags)
+{
+ if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
+ return true;
+ if (!__this_cpu_read(trace_taskinfo_save))
+ return true;
+ return false;
+}
+
+/**
+ * tracing_record_taskinfo - record the task info of a task
+ *
+ * @task: task to record
+ * @flags: TRACE_RECORD_CMDLINE for recording comm
+ * TRACE_RECORD_TGID for recording tgid
+ */
+void tracing_record_taskinfo(struct task_struct *task, int flags)
+{
+ bool done;
+
+ if (tracing_record_taskinfo_skip(flags))
+ return;
+
+ /*
+ * Record as much task information as possible. If some fail, continue
+ * to try to record the others.
+ */
+ done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
+ done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
+
+ /* If recording any information failed, retry again soon. */
+ if (!done)
+ return;
+
+ __this_cpu_write(trace_taskinfo_save, false);
+}
+
+/**
+ * tracing_record_taskinfo_sched_switch - record task info for sched_switch
+ *
+ * @prev: previous task during sched_switch
+ * @next: next task during sched_switch
+ * @flags: TRACE_RECORD_CMDLINE for recording comm
+ * TRACE_RECORD_TGID for recording tgid
+ */
+void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
+ struct task_struct *next, int flags)
+{
+ bool done;
+
+ if (tracing_record_taskinfo_skip(flags))
+ return;
+
+ /*
+ * Record as much task information as possible. If some fail, continue
+ * to try to record the others.
+ */
+ done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
+ done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
+ done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
+ done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
+
+ /* If recording any information failed, retry again soon. */
+ if (!done)
+ return;
+
+ __this_cpu_write(trace_taskinfo_save, false);
+}
+
+/* Helpers to record a specific task information */
+void tracing_record_cmdline(struct task_struct *task)
+{
+ tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
+}
+
+void tracing_record_tgid(struct task_struct *task)
+{
+ tracing_record_taskinfo(task, TRACE_RECORD_TGID);
+}
+
+int trace_alloc_tgid_map(void)
+{
+ int *map;
+
+ if (tgid_map)
+ return 0;
+
+ tgid_map_max = pid_max;
+ map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
+ GFP_KERNEL);
+ if (!map)
+ return -ENOMEM;
+
+ /*
+ * Pairs with smp_load_acquire() in
+ * trace_find_tgid_ptr() to ensure that if it observes
+ * the tgid_map we just allocated then it also observes
+ * the corresponding tgid_map_max value.
+ */
+ smp_store_release(&tgid_map, map);
+ return 0;
+}
+
+static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ int pid = ++(*pos);
+
+ return trace_find_tgid_ptr(pid);
+}
+
+static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
+{
+ int pid = *pos;
+
+ return trace_find_tgid_ptr(pid);
+}
+
+static void saved_tgids_stop(struct seq_file *m, void *v)
+{
+}
+
+static int saved_tgids_show(struct seq_file *m, void *v)
+{
+ int *entry = (int *)v;
+ int pid = entry - tgid_map;
+ int tgid = *entry;
+
+ if (tgid == 0)
+ return SEQ_SKIP;
+
+ seq_printf(m, "%d %d\n", pid, tgid);
+ return 0;
+}
+
+static const struct seq_operations tracing_saved_tgids_seq_ops = {
+ .start = saved_tgids_start,
+ .stop = saved_tgids_stop,
+ .next = saved_tgids_next,
+ .show = saved_tgids_show,
+};
+
+static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
+{
+ int ret;
+
+ ret = tracing_check_open_get_tr(NULL);
+ if (ret)
+ return ret;
+
+ return seq_open(filp, &tracing_saved_tgids_seq_ops);
+}
+
+
+const struct file_operations tracing_saved_tgids_fops = {
+ .open = tracing_saved_tgids_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ unsigned int *ptr = v;
+
+ if (*pos || m->count)
+ ptr++;
+
+ (*pos)++;
+
+ for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
+ ptr++) {
+ if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
+ continue;
+
+ return ptr;
+ }
+
+ return NULL;
+}
+
+static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
+{
+ void *v;
+ loff_t l = 0;
+
+ preempt_disable();
+ arch_spin_lock(&trace_cmdline_lock);
+
+ v = &savedcmd->map_cmdline_to_pid[0];
+ while (l <= *pos) {
+ v = saved_cmdlines_next(m, v, &l);
+ if (!v)
+ return NULL;
+ }
+
+ return v;
+}
+
+static void saved_cmdlines_stop(struct seq_file *m, void *v)
+{
+ arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
+}
+
+static int saved_cmdlines_show(struct seq_file *m, void *v)
+{
+ char buf[TASK_COMM_LEN];
+ unsigned int *pid = v;
+
+ __trace_find_cmdline(*pid, buf);
+ seq_printf(m, "%d %s\n", *pid, buf);
+ return 0;
+}
+
+static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
+ .start = saved_cmdlines_start,
+ .next = saved_cmdlines_next,
+ .stop = saved_cmdlines_stop,
+ .show = saved_cmdlines_show,
+};
+
+static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
+{
+ int ret;
+
+ ret = tracing_check_open_get_tr(NULL);
+ if (ret)
+ return ret;
+
+ return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
+}
+
+const struct file_operations tracing_saved_cmdlines_fops = {
+ .open = tracing_saved_cmdlines_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static ssize_t
+tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[64];
+ int r;
+
+ preempt_disable();
+ arch_spin_lock(&trace_cmdline_lock);
+ r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
+ arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+void trace_free_saved_cmdlines_buffer(void)
+{
+ free_saved_cmdlines_buffer(savedcmd);
+}
+
+static int tracing_resize_saved_cmdlines(unsigned int val)
+{
+ struct saved_cmdlines_buffer *s, *savedcmd_temp;
+
+ s = allocate_cmdlines_buffer(val);
+ if (!s)
+ return -ENOMEM;
+
+ preempt_disable();
+ arch_spin_lock(&trace_cmdline_lock);
+ savedcmd_temp = savedcmd;
+ savedcmd = s;
+ arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
+ free_saved_cmdlines_buffer(savedcmd_temp);
+
+ return 0;
+}
+
+static ssize_t
+tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
+ return ret;
+
+ /* must have at least 1 entry or less than PID_MAX_DEFAULT */
+ if (!val || val > PID_MAX_DEFAULT)
+ return -EINVAL;
+
+ ret = tracing_resize_saved_cmdlines((unsigned int)val);
+ if (ret < 0)
+ return ret;
+
+ *ppos += cnt;
+
+ return cnt;
+}
+
+const struct file_operations tracing_saved_cmdlines_size_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_saved_cmdlines_size_read,
+ .write = tracing_saved_cmdlines_size_write,
+};
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 529590499b1f..e9c5058a8efd 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -768,7 +768,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) {
ftrace_graph_stop();
printk(KERN_WARNING "BUG: Function graph tracer hang!\n");
- if (ftrace_dump_on_oops) {
+ if (ftrace_dump_on_oops_enabled()) {
ftrace_dump(DUMP_ALL);
/* ftrace_dump() disables tracing */
tracing_on();