diff options
Diffstat (limited to 'kernel/taskstats.c')
| -rw-r--r-- | kernel/taskstats.c | 244 |
1 files changed, 118 insertions, 126 deletions
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 145bb4d3bd4d..0cd680ccc7e5 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -1,24 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * taskstats.c - Export per-task statistics to userland * * Copyright (C) Shailabh Nagar, IBM Corp. 2006 * (C) Balbir Singh, IBM Corp. 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #include <linux/kernel.h> #include <linux/taskstats_kern.h> #include <linux/tsacct_kern.h> +#include <linux/acct.h> #include <linux/delayacct.h> #include <linux/cpumask.h> #include <linux/percpu.h> @@ -30,6 +21,7 @@ #include <linux/pid_namespace.h> #include <net/genetlink.h> #include <linux/atomic.h> +#include <linux/sched/cputime.h> /* * Maximum length of a cpumask that can be specified in @@ -41,20 +33,15 @@ static DEFINE_PER_CPU(__u32, taskstats_seqnum); static int family_registered; struct kmem_cache *taskstats_cache; -static struct genl_family family = { - .id = GENL_ID_GENERATE, - .name = TASKSTATS_GENL_NAME, - .version = TASKSTATS_GENL_VERSION, - .maxattr = TASKSTATS_CMD_ATTR_MAX, -}; +static struct genl_family family; -static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { +static const struct nla_policy taskstats_cmd_get_policy[] = { [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; -static const struct nla_policy cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] = { +static const struct nla_policy cgroupstats_cmd_get_policy[] = { [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, }; @@ -111,13 +98,8 @@ static int send_reply(struct sk_buff *skb, struct genl_info *info) { struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); void *reply = genlmsg_data(genlhdr); - int rc; - rc = genlmsg_end(skb, reply); - if (rc < 0) { - nlmsg_free(skb); - return rc; - } + genlmsg_end(skb, reply); return genlmsg_reply(skb, info); } @@ -132,17 +114,14 @@ static void send_cpu_listeners(struct sk_buff *skb, struct listener *s, *tmp; struct sk_buff *skb_next, *skb_cur = skb; void *reply = genlmsg_data(genlhdr); - int rc, delcount = 0; + int delcount = 0; - rc = genlmsg_end(skb, reply); - if (rc < 0) { - nlmsg_free(skb); - return; - } + genlmsg_end(skb, reply); - rc = 0; down_read(&listeners->sem); list_for_each_entry(s, &listeners->list, list) { + int rc; + skb_next = NULL; if (!list_is_last(&s->list, &listeners->list)) { skb_next = skb_clone(skb_cur, GFP_KERNEL); @@ -175,6 +154,23 @@ static void send_cpu_listeners(struct sk_buff *skb, up_write(&listeners->sem); } +static void exe_add_tsk(struct taskstats *stats, struct task_struct *tsk) +{ + /* No idea if I'm allowed to access that here, now. */ + struct file *exe_file = get_task_exe_file(tsk); + + if (exe_file) { + /* Following cp_new_stat64() in stat.c . */ + stats->ac_exe_dev = + huge_encode_dev(exe_file->f_inode->i_sb->s_dev); + stats->ac_exe_inode = exe_file->f_inode->i_ino; + fput(exe_file); + } else { + stats->ac_exe_dev = 0; + stats->ac_exe_inode = 0; + } +} + static void fill_stats(struct user_namespace *user_ns, struct pid_namespace *pid_ns, struct task_struct *tsk, struct taskstats *stats) @@ -197,17 +193,16 @@ static void fill_stats(struct user_namespace *user_ns, /* fill in extended acct fields */ xacct_add_tsk(stats, tsk); + + /* add executable info */ + exe_add_tsk(stats, tsk); } static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) { struct task_struct *tsk; - rcu_read_lock(); - tsk = find_task_by_vpid(pid); - if (tsk) - get_task_struct(tsk); - rcu_read_unlock(); + tsk = find_get_task_by_vpid(pid); if (!tsk) return -ESRCH; fill_stats(current_user_ns(), task_active_pid_ns(current), tsk, stats); @@ -220,6 +215,8 @@ static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) struct task_struct *tsk, *first; unsigned long flags; int rc = -ESRCH; + u64 delta, utime, stime; + u64 start_time; /* * Add additional stats from live tasks except zombie thread group @@ -236,8 +233,8 @@ static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) else memset(stats, 0, sizeof(*stats)); - tsk = first; - do { + start_time = ktime_get_ns(); + for_each_thread(first, tsk) { if (tsk->exit_state) continue; /* @@ -248,9 +245,19 @@ static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) */ delayacct_add_tsk(stats, tsk); + /* calculate task elapsed time in nsec */ + delta = start_time - tsk->start_time; + /* Convert to micro seconds */ + do_div(delta, NSEC_PER_USEC); + stats->ac_etime += delta; + + task_cputime(tsk, &utime, &stime); + stats->ac_utime += div_u64(utime, NSEC_PER_USEC); + stats->ac_stime += div_u64(stime, NSEC_PER_USEC); + stats->nvcsw += tsk->nvcsw; stats->nivcsw += tsk->nivcsw; - } while_each_thread(first, tsk); + } unlock_task_sighand(first, &flags); rc = 0; @@ -290,6 +297,7 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) struct listener_list *listeners; struct listener *s, *tmp, *s2; unsigned int cpu; + int ret = 0; if (!cpumask_subset(mask, cpu_possible_mask)) return -EINVAL; @@ -304,9 +312,10 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) for_each_cpu(cpu, mask) { s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, cpu_to_node(cpu)); - if (!s) + if (!s) { + ret = -ENOMEM; goto cleanup; - + } s->pid = pid; s->valid = 1; @@ -339,7 +348,7 @@ cleanup: } up_write(&listeners->sem); } - return 0; + return ret; } static int parse(struct nlattr *na, struct cpumask *mask) @@ -358,16 +367,12 @@ static int parse(struct nlattr *na, struct cpumask *mask) data = kmalloc(len, GFP_KERNEL); if (!data) return -ENOMEM; - nla_strlcpy(data, na, len); + nla_strscpy(data, na, len); ret = cpulist_parse(data, mask); kfree(data); return ret; } -#if defined(CONFIG_64BIT) && !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) -#define TASKSTATS_NEEDS_PADDING 1 -#endif - static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) { struct nlattr *na, *ret; @@ -377,38 +382,20 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) ? TASKSTATS_TYPE_AGGR_PID : TASKSTATS_TYPE_AGGR_TGID; - /* - * The taskstats structure is internally aligned on 8 byte - * boundaries but the layout of the aggregrate reply, with - * two NLA headers and the pid (each 4 bytes), actually - * force the entire structure to be unaligned. This causes - * the kernel to issue unaligned access warnings on some - * architectures like ia64. Unfortunately, some software out there - * doesn't properly unroll the NLA packet and assumes that the start - * of the taskstats structure will always be 20 bytes from the start - * of the netlink payload. Aligning the start of the taskstats - * structure breaks this software, which we don't want. So, for now - * the alignment only happens on architectures that require it - * and those users will have to update to fixed versions of those - * packages. Space is reserved in the packet only when needed. - * This ifdef should be removed in several years e.g. 2012 once - * we can be confident that fixed versions are installed on most - * systems. We add the padding before the aggregate since the - * aggregate is already a defined type. - */ -#ifdef TASKSTATS_NEEDS_PADDING - if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0) - goto err; -#endif - na = nla_nest_start(skb, aggr); + na = nla_nest_start_noflag(skb, aggr); if (!na) goto err; - if (nla_put(skb, type, sizeof(pid), &pid) < 0) + if (nla_put(skb, type, sizeof(pid), &pid) < 0) { + nla_nest_cancel(skb, na); goto err; - ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); - if (!ret) + } + ret = nla_reserve_64bit(skb, TASKSTATS_TYPE_STATS, + sizeof(struct taskstats), TASKSTATS_TYPE_NULL); + if (!ret) { + nla_nest_cancel(skb, na); goto err; + } nla_nest_end(skb, na); return nla_data(ret); @@ -424,15 +411,14 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) struct nlattr *na; size_t size; u32 fd; - struct fd f; na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; if (!na) return -EINVAL; fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); - f = fdget(fd); - if (!f.file) + CLASS(fd, f)(fd); + if (fd_empty(f)) return 0; size = nla_total_size(sizeof(struct cgroupstats)); @@ -440,30 +426,25 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) - goto err; + return rc; na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, sizeof(struct cgroupstats)); if (na == NULL) { nlmsg_free(rep_skb); - rc = -EMSGSIZE; - goto err; + return -EMSGSIZE; } stats = nla_data(na); memset(stats, 0, sizeof(*stats)); - rc = cgroupstats_build(stats, f.file->f_dentry); + rc = cgroupstats_build(stats, fd_file(f)->f_path.dentry); if (rc < 0) { nlmsg_free(rep_skb); - goto err; + return rc; } - rc = send_reply(rep_skb, info); - -err: - fdput(f); - return rc; + return send_reply(rep_skb, info); } static int cmd_attr_register_cpumask(struct genl_info *info) @@ -503,10 +484,9 @@ static size_t taskstats_packet_size(void) size_t size; size = nla_total_size(sizeof(u32)) + - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); -#ifdef TASKSTATS_NEEDS_PADDING - size += nla_total_size(0); /* Padding for alignment */ -#endif + nla_total_size_64bit(sizeof(struct taskstats)) + + nla_total_size(0); + return size; } @@ -585,25 +565,33 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; - struct taskstats *stats; + struct taskstats *stats_new, *stats; - if (sig->stats || thread_group_empty(tsk)) - goto ret; + /* Pairs with smp_store_release() below. */ + stats = smp_load_acquire(&sig->stats); + if (stats || thread_group_empty(tsk)) + return stats; /* No problem if kmem_cache_zalloc() fails */ - stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); + stats_new = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); spin_lock_irq(&tsk->sighand->siglock); - if (!sig->stats) { - sig->stats = stats; - stats = NULL; + stats = sig->stats; + if (!stats) { + /* + * Pairs with smp_store_release() above and order the + * kmem_cache_zalloc(). + */ + smp_store_release(&sig->stats, stats_new); + stats = stats_new; + stats_new = NULL; } spin_unlock_irq(&tsk->sighand->siglock); - if (stats) - kmem_cache_free(taskstats_cache, stats); -ret: - return sig->stats; + if (stats_new) + kmem_cache_free(taskstats_cache, stats_new); + + return stats; } /* Send pid data out on exit */ @@ -632,7 +620,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) fill_tgid_exit(tsk); } - listeners = __this_cpu_ptr(&listener_array); + listeners = raw_cpu_ptr(&listener_array); if (list_empty(&listeners->list)) return; @@ -646,6 +634,8 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) goto err; fill_stats(&init_user_ns, &init_pid_ns, tsk, stats); + if (group_dead) + stats->ac_flag |= AGROUP; /* * Doesn't matter if tsk is the leader or the last group member leaving @@ -667,17 +657,32 @@ err: nlmsg_free(rep_skb); } -static struct genl_ops taskstats_ops = { - .cmd = TASKSTATS_CMD_GET, - .doit = taskstats_user_cmd, - .policy = taskstats_cmd_get_policy, - .flags = GENL_ADMIN_PERM, +static const struct genl_ops taskstats_ops[] = { + { + .cmd = TASKSTATS_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = taskstats_user_cmd, + .policy = taskstats_cmd_get_policy, + .maxattr = ARRAY_SIZE(taskstats_cmd_get_policy) - 1, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = CGROUPSTATS_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = cgroupstats_user_cmd, + .policy = cgroupstats_cmd_get_policy, + .maxattr = ARRAY_SIZE(cgroupstats_cmd_get_policy) - 1, + }, }; -static struct genl_ops cgroupstats_ops = { - .cmd = CGROUPSTATS_CMD_GET, - .doit = cgroupstats_user_cmd, - .policy = cgroupstats_cmd_get_policy, +static struct genl_family family __ro_after_init = { + .name = TASKSTATS_GENL_NAME, + .version = TASKSTATS_GENL_VERSION, + .module = THIS_MODULE, + .ops = taskstats_ops, + .n_ops = ARRAY_SIZE(taskstats_ops), + .resv_start_op = CGROUPSTATS_CMD_GET + 1, + .netnsok = true, }; /* Needed early in initialization */ @@ -700,22 +705,9 @@ static int __init taskstats_init(void) if (rc) return rc; - rc = genl_register_ops(&family, &taskstats_ops); - if (rc < 0) - goto err; - - rc = genl_register_ops(&family, &cgroupstats_ops); - if (rc < 0) - goto err_cgroup_ops; - family_registered = 1; pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); return 0; -err_cgroup_ops: - genl_unregister_ops(&family, &taskstats_ops); -err: - genl_unregister_family(&family); - return rc; } /* |
