diff options
Diffstat (limited to 'kernel/acct.c')
| -rw-r--r-- | kernel/acct.c | 627 |
1 files changed, 301 insertions, 326 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 8d6e145138bb..2a2b3c874acd 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/kernel/acct.c * @@ -24,7 +25,7 @@ * Now we silently close acct_file on attempt to reopen. Cleaned sys_acct(). * XTerms and EMACS are manifestations of pure evil. 21/10/98, AV. * - * Fixed a nasty interaction with with sys_umount(). If the accointing + * Fixed a nasty interaction with sys_umount(). If the accounting * was suspeneded we failed to stop it on umount(). Messy. * Another one: remount to readonly didn't stop accounting. * Question: what should we do if we have CAP_SYS_ADMIN but not @@ -39,26 +40,23 @@ * is one more bug... 10/11/98, AV. * * Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold - * ->mmap_sem to walk the vma list of current->mm. Nasty, since it leaks + * ->mmap_lock to walk the vma list of current->mm. Nasty, since it leaks * a struct file opened for write. Fixed. 2/6/2000, AV. */ -#include <linux/mm.h> #include <linux/slab.h> #include <linux/acct.h> #include <linux/capability.h> -#include <linux/file.h> #include <linux/tty.h> -#include <linux/security.h> -#include <linux/vfs.h> +#include <linux/statfs.h> #include <linux/jiffies.h> -#include <linux/times.h> #include <linux/syscalls.h> -#include <linux/mount.h> -#include <asm/uaccess.h> +#include <linux/namei.h> +#include <linux/sched/cputime.h> + #include <asm/div64.h> -#include <linux/blkdev.h> /* sector_div */ #include <linux/pid_namespace.h> +#include <linux/fs_pin.h> /* * These constants control the amount of freespace that suspend and @@ -67,190 +65,232 @@ * Turned into sysctl-controllable parameters. AV, 12/11/98 */ -int acct_parm[3] = {4, 2, 30}; +static int acct_parm[3] = {4, 2, 30}; #define RESUME (acct_parm[0]) /* >foo% free space - resume */ #define SUSPEND (acct_parm[1]) /* <foo% free space - suspend */ #define ACCT_TIMEOUT (acct_parm[2]) /* foo second timeout between checks */ +#ifdef CONFIG_SYSCTL +static const struct ctl_table kern_acct_table[] = { + { + .procname = "acct", + .data = &acct_parm, + .maxlen = 3*sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +}; + +static __init int kernel_acct_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_acct_table); + return 0; +} +late_initcall(kernel_acct_sysctls_init); +#endif /* CONFIG_SYSCTL */ + /* * External references and all of the globals. */ -static void do_acct_process(struct bsd_acct_struct *acct, - struct pid_namespace *ns, struct file *); -/* - * This structure is used so that all the data protected by lock - * can be placed in the same cache line as the lock. This primes - * the cache line to have the data after getting the lock. - */ struct bsd_acct_struct { - int active; + struct fs_pin pin; + atomic_long_t count; + struct rcu_head rcu; + struct mutex lock; + bool active; + bool check_space; unsigned long needcheck; struct file *file; struct pid_namespace *ns; - struct list_head list; + struct work_struct work; + struct completion done; + acct_t ac; }; -static DEFINE_SPINLOCK(acct_lock); -static LIST_HEAD(acct_list); +static void fill_ac(struct bsd_acct_struct *acct); +static void acct_write_process(struct bsd_acct_struct *acct); /* * Check the amount of free space and suspend/resume accordingly. */ -static int check_free_space(struct bsd_acct_struct *acct, struct file *file) +static bool check_free_space(struct bsd_acct_struct *acct) { struct kstatfs sbuf; - int res; - int act; - u64 resume; - u64 suspend; - spin_lock(&acct_lock); - res = acct->active; - if (!file || time_is_before_jiffies(acct->needcheck)) - goto out; - spin_unlock(&acct_lock); + if (!acct->check_space) + return acct->active; /* May block */ - if (vfs_statfs(&file->f_path, &sbuf)) - return res; - suspend = sbuf.f_blocks * SUSPEND; - resume = sbuf.f_blocks * RESUME; - - do_div(suspend, 100); - do_div(resume, 100); - - if (sbuf.f_bavail <= suspend) - act = -1; - else if (sbuf.f_bavail >= resume) - act = 1; - else - act = 0; - - /* - * If some joker switched acct->file under us we'ld better be - * silent and _not_ touch anything. - */ - spin_lock(&acct_lock); - if (file != acct->file) { - if (act) - res = act>0; - goto out; - } + if (vfs_statfs(&acct->file->f_path, &sbuf)) + return acct->active; if (acct->active) { - if (act < 0) { - acct->active = 0; - printk(KERN_INFO "Process accounting paused\n"); + u64 suspend = sbuf.f_blocks * SUSPEND; + do_div(suspend, 100); + if (sbuf.f_bavail <= suspend) { + acct->active = false; + pr_info("Process accounting paused\n"); } } else { - if (act > 0) { - acct->active = 1; - printk(KERN_INFO "Process accounting resumed\n"); + u64 resume = sbuf.f_blocks * RESUME; + do_div(resume, 100); + if (sbuf.f_bavail >= resume) { + acct->active = true; + pr_info("Process accounting resumed\n"); } } acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; - res = acct->active; -out: - spin_unlock(&acct_lock); - return res; + return acct->active; } -/* - * Close the old accounting file (if currently open) and then replace - * it with file (if non-NULL). - * - * NOTE: acct_lock MUST be held on entry and exit. - */ -static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, - struct pid_namespace *ns) +static void acct_put(struct bsd_acct_struct *p) +{ + if (atomic_long_dec_and_test(&p->count)) + kfree_rcu(p, rcu); +} + +static inline struct bsd_acct_struct *to_acct(struct fs_pin *p) +{ + return p ? container_of(p, struct bsd_acct_struct, pin) : NULL; +} + +static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) { - struct file *old_acct = NULL; - struct pid_namespace *old_ns = NULL; - - if (acct->file) { - old_acct = acct->file; - old_ns = acct->ns; - acct->active = 0; - acct->file = NULL; - acct->ns = NULL; - list_del(&acct->list); + struct bsd_acct_struct *res; +again: + smp_rmb(); + rcu_read_lock(); + res = to_acct(READ_ONCE(ns->bacct)); + if (!res) { + rcu_read_unlock(); + return NULL; } - if (file) { - acct->file = file; - acct->ns = ns; - acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; - acct->active = 1; - list_add(&acct->list, &acct_list); + if (!atomic_long_inc_not_zero(&res->count)) { + rcu_read_unlock(); + cpu_relax(); + goto again; } - if (old_acct) { - mnt_unpin(old_acct->f_path.mnt); - spin_unlock(&acct_lock); - do_acct_process(acct, old_ns, old_acct); - filp_close(old_acct, NULL); - spin_lock(&acct_lock); + rcu_read_unlock(); + mutex_lock(&res->lock); + if (res != to_acct(READ_ONCE(ns->bacct))) { + mutex_unlock(&res->lock); + acct_put(res); + goto again; } + return res; } -static int acct_on(struct filename *pathname) +static void acct_pin_kill(struct fs_pin *pin) { - struct file *file; - struct vfsmount *mnt; - struct pid_namespace *ns; - struct bsd_acct_struct *acct = NULL; + struct bsd_acct_struct *acct = to_acct(pin); + mutex_lock(&acct->lock); + /* + * Fill the accounting struct with the exiting task's info + * before punting to the workqueue. + */ + fill_ac(acct); + schedule_work(&acct->work); + wait_for_completion(&acct->done); + cmpxchg(&acct->ns->bacct, pin, NULL); + mutex_unlock(&acct->lock); + pin_remove(pin); + acct_put(acct); +} +static void close_work(struct work_struct *work) +{ + struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); + struct file *file = acct->file; + + /* We were fired by acct_pin_kill() which holds acct->lock. */ + acct_write_process(acct); + if (file->f_op->flush) + file->f_op->flush(file, NULL); + __fput_sync(file); + complete(&acct->done); +} + +DEFINE_FREE(fput_sync, struct file *, if (!IS_ERR_OR_NULL(_T)) __fput_sync(_T)) +static int acct_on(const char __user *name) +{ /* Difference from BSD - they don't do O_APPEND */ - file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0); + const int open_flags = O_WRONLY|O_APPEND|O_LARGEFILE; + struct pid_namespace *ns = task_active_pid_ns(current); + struct filename *pathname __free(putname) = getname(name); + struct file *original_file __free(fput) = NULL; // in that order + struct path internal __free(path_put) = {}; // in that order + struct file *file __free(fput_sync) = NULL; // in that order + struct bsd_acct_struct *acct; + struct vfsmount *mnt; + struct fs_pin *old; + + if (IS_ERR(pathname)) + return PTR_ERR(pathname); + original_file = file_open_name(pathname, open_flags, 0); + if (IS_ERR(original_file)) + return PTR_ERR(original_file); + + mnt = mnt_clone_internal(&original_file->f_path); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + + internal.mnt = mnt; + internal.dentry = dget(mnt->mnt_root); + + file = dentry_open(&internal, open_flags, current_cred()); if (IS_ERR(file)) return PTR_ERR(file); - if (!S_ISREG(file_inode(file)->i_mode)) { - filp_close(file, NULL); + if (!S_ISREG(file_inode(file)->i_mode)) return -EACCES; - } - - if (!file->f_op->write) { - filp_close(file, NULL); - return -EIO; - } - ns = task_active_pid_ns(current); - if (ns->bacct == NULL) { - acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); - if (acct == NULL) { - filp_close(file, NULL); - return -ENOMEM; - } - } + /* Exclude kernel kernel internal filesystems. */ + if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT)) + return -EINVAL; - spin_lock(&acct_lock); - if (ns->bacct == NULL) { - ns->bacct = acct; - acct = NULL; - } + /* Exclude procfs and sysfs. */ + if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE) + return -EINVAL; - mnt = file->f_path.mnt; - mnt_pin(mnt); - acct_file_reopen(ns->bacct, file, ns); - spin_unlock(&acct_lock); + if (!(file->f_mode & FMODE_CAN_WRITE)) + return -EIO; - mntput(mnt); /* it's pinned, now give up active reference */ - kfree(acct); + acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); + if (!acct) + return -ENOMEM; + + atomic_long_set(&acct->count, 1); + init_fs_pin(&acct->pin, acct_pin_kill); + acct->file = no_free_ptr(file); + acct->needcheck = jiffies; + acct->ns = ns; + mutex_init(&acct->lock); + INIT_WORK(&acct->work, close_work); + init_completion(&acct->done); + mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ + pin_insert(&acct->pin, original_file->f_path.mnt); + rcu_read_lock(); + old = xchg(&ns->bacct, &acct->pin); + mutex_unlock(&acct->lock); + pin_kill(old); return 0; } +static DEFINE_MUTEX(acct_on_mutex); + /** * sys_acct - enable/disable process accounting * @name: file name for accounting records or NULL to shutdown accounting * - * Returns 0 for success or negative errno values for failure. - * * sys_acct() is the only system call needed to implement process * accounting. It takes the name of the file where accounting records * should be written. If the filename is NULL, accounting will be * shutdown. + * + * Returns: 0 for success or negative errno values for failure. */ SYSCALL_DEFINE1(acct, const char __user *, name) { @@ -260,85 +300,25 @@ SYSCALL_DEFINE1(acct, const char __user *, name) return -EPERM; if (name) { - struct filename *tmp = getname(name); - if (IS_ERR(tmp)) - return (PTR_ERR(tmp)); - error = acct_on(tmp); - putname(tmp); + mutex_lock(&acct_on_mutex); + error = acct_on(name); + mutex_unlock(&acct_on_mutex); } else { - struct bsd_acct_struct *acct; - - acct = task_active_pid_ns(current)->bacct; - if (acct == NULL) - return 0; - - spin_lock(&acct_lock); - acct_file_reopen(acct, NULL, NULL); - spin_unlock(&acct_lock); + rcu_read_lock(); + pin_kill(task_active_pid_ns(current)->bacct); } return error; } -/** - * acct_auto_close - turn off a filesystem's accounting if it is on - * @m: vfsmount being shut down - * - * If the accounting is turned on for a file in the subtree pointed to - * to by m, turn accounting off. Done when m is about to die. - */ -void acct_auto_close_mnt(struct vfsmount *m) -{ - struct bsd_acct_struct *acct; - - spin_lock(&acct_lock); -restart: - list_for_each_entry(acct, &acct_list, list) - if (acct->file && acct->file->f_path.mnt == m) { - acct_file_reopen(acct, NULL, NULL); - goto restart; - } - spin_unlock(&acct_lock); -} - -/** - * acct_auto_close - turn off a filesystem's accounting if it is on - * @sb: super block for the filesystem - * - * If the accounting is turned on for a file in the filesystem pointed - * to by sb, turn accounting off. - */ -void acct_auto_close(struct super_block *sb) -{ - struct bsd_acct_struct *acct; - - spin_lock(&acct_lock); -restart: - list_for_each_entry(acct, &acct_list, list) - if (acct->file && acct->file->f_path.dentry->d_sb == sb) { - acct_file_reopen(acct, NULL, NULL); - goto restart; - } - spin_unlock(&acct_lock); -} - void acct_exit_ns(struct pid_namespace *ns) { - struct bsd_acct_struct *acct = ns->bacct; - - if (acct == NULL) - return; - - spin_lock(&acct_lock); - if (acct->file != NULL) - acct_file_reopen(acct, NULL, NULL); - spin_unlock(&acct_lock); - - kfree(acct); + rcu_read_lock(); + pin_kill(ns->bacct); } /* - * encode an unsigned long into a comp_t + * encode an u64 into a comp_t * * This routine has been adopted from the encode_comp_t() function in * the kern_acct.c file of the FreeBSD operating system. The encoding @@ -349,7 +329,7 @@ void acct_exit_ns(struct pid_namespace *ns) #define EXPSIZE 3 /* Base 8 (3 bit) exponent. */ #define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */ -static comp_t encode_comp_t(unsigned long value) +static comp_t encode_comp_t(u64 value) { int exp, rnd; @@ -368,6 +348,8 @@ static comp_t encode_comp_t(unsigned long value) exp++; } + if (exp > (((comp_t) ~0U) >> MANTSIZE)) + return (comp_t) ~0U; /* * Clean it up and polish it off. */ @@ -376,7 +358,7 @@ static comp_t encode_comp_t(unsigned long value) return exp; } -#if ACCT_VERSION==1 || ACCT_VERSION==2 +#if ACCT_VERSION == 1 || ACCT_VERSION == 2 /* * encode an u64 into a comp2_t (24 bits) * @@ -389,7 +371,7 @@ static comp_t encode_comp_t(unsigned long value) #define MANTSIZE2 20 /* 20 bit mantissa. */ #define EXPSIZE2 5 /* 5 bit base 2 exponent. */ #define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */ -#define MAXEXP2 ((1 <<EXPSIZE2) - 1) /* Maximum exponent. */ +#define MAXEXP2 ((1 << EXPSIZE2) - 1) /* Maximum exponent. */ static comp2_t encode_comp2_t(u64 value) { @@ -418,9 +400,7 @@ static comp2_t encode_comp2_t(u64 value) return (value & (MAXFRACT2>>1)) | (exp << (MANTSIZE2-1)); } } -#endif - -#if ACCT_VERSION==3 +#elif ACCT_VERSION == 3 /* * encode an u64 into a 32 bit IEEE float */ @@ -429,8 +409,9 @@ static u32 encode_float(u64 value) unsigned exp = 190; unsigned u; - if (value==0) return 0; - while ((s64)value > 0){ + if (value == 0) + return 0; + while ((s64)value > 0) { value <<= 1; exp--; } @@ -448,121 +429,126 @@ static u32 encode_float(u64 value) * do_exit() or when switching to a different output file. */ -/* - * do_acct_process does all actual work. Caller holds the reference to file. - */ -static void do_acct_process(struct bsd_acct_struct *acct, - struct pid_namespace *ns, struct file *file) +static void fill_ac(struct bsd_acct_struct *acct) { struct pacct_struct *pacct = ¤t->signal->pacct; - acct_t ac; - mm_segment_t fs; - unsigned long flim; - u64 elapsed; - u64 run_time; - struct timespec uptime; + struct file *file = acct->file; + acct_t *ac = &acct->ac; + u64 elapsed, run_time; + time64_t btime; struct tty_struct *tty; - const struct cred *orig_cred; - /* Perform file operations on behalf of whoever enabled accounting */ - orig_cred = override_creds(file->f_cred); + lockdep_assert_held(&acct->lock); - /* - * First check to see if there is enough free_space to continue - * the process accounting system. - */ - if (!check_free_space(acct, file)) - goto out; + if (time_is_after_jiffies(acct->needcheck)) { + acct->check_space = false; + + /* Don't fill in @ac if nothing will be written. */ + if (!acct->active) + return; + } else { + acct->check_space = true; + } /* * Fill the accounting struct with the needed info as recorded * by the different kernel functions. */ - memset(&ac, 0, sizeof(acct_t)); + memset(ac, 0, sizeof(acct_t)); - ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER; - strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm)); + ac->ac_version = ACCT_VERSION | ACCT_BYTEORDER; + strscpy(ac->ac_comm, current->comm, sizeof(ac->ac_comm)); /* calculate run_time in nsec*/ - do_posix_clock_monotonic_gettime(&uptime); - run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec; - run_time -= (u64)current->group_leader->start_time.tv_sec * NSEC_PER_SEC - + current->group_leader->start_time.tv_nsec; + run_time = ktime_get_ns(); + run_time -= current->group_leader->start_time; /* convert nsec -> AHZ */ elapsed = nsec_to_AHZ(run_time); -#if ACCT_VERSION==3 - ac.ac_etime = encode_float(elapsed); +#if ACCT_VERSION == 3 + ac->ac_etime = encode_float(elapsed); #else - ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ? - (unsigned long) elapsed : (unsigned long) -1l); + ac->ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ? + (unsigned long) elapsed : (unsigned long) -1l); #endif -#if ACCT_VERSION==1 || ACCT_VERSION==2 +#if ACCT_VERSION == 1 || ACCT_VERSION == 2 { /* new enlarged etime field */ comp2_t etime = encode_comp2_t(elapsed); - ac.ac_etime_hi = etime >> 16; - ac.ac_etime_lo = (u16) etime; + + ac->ac_etime_hi = etime >> 16; + ac->ac_etime_lo = (u16) etime; } #endif do_div(elapsed, AHZ); - ac.ac_btime = get_seconds() - elapsed; - /* we really need to bite the bullet and change layout */ - ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid); - ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid); -#if ACCT_VERSION==2 - ac.ac_ahz = AHZ; -#endif -#if ACCT_VERSION==1 || ACCT_VERSION==2 - /* backward-compatible 16 bit fields */ - ac.ac_uid16 = ac.ac_uid; - ac.ac_gid16 = ac.ac_gid; -#endif -#if ACCT_VERSION==3 - ac.ac_pid = task_tgid_nr_ns(current, ns); - rcu_read_lock(); - ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns); - rcu_read_unlock(); + btime = ktime_get_real_seconds() - elapsed; + ac->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX); +#if ACCT_VERSION == 2 + ac->ac_ahz = AHZ; #endif spin_lock_irq(¤t->sighand->siglock); tty = current->signal->tty; /* Safe as we hold the siglock */ - ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0; - ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); - ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); - ac.ac_flag = pacct->ac_flag; - ac.ac_mem = encode_comp_t(pacct->ac_mem); - ac.ac_minflt = encode_comp_t(pacct->ac_minflt); - ac.ac_majflt = encode_comp_t(pacct->ac_majflt); - ac.ac_exitcode = pacct->ac_exitcode; + ac->ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0; + ac->ac_utime = encode_comp_t(nsec_to_AHZ(pacct->ac_utime)); + ac->ac_stime = encode_comp_t(nsec_to_AHZ(pacct->ac_stime)); + ac->ac_flag = pacct->ac_flag; + ac->ac_mem = encode_comp_t(pacct->ac_mem); + ac->ac_minflt = encode_comp_t(pacct->ac_minflt); + ac->ac_majflt = encode_comp_t(pacct->ac_majflt); + ac->ac_exitcode = pacct->ac_exitcode; spin_unlock_irq(¤t->sighand->siglock); - ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ - ac.ac_rw = encode_comp_t(ac.ac_io / 1024); - ac.ac_swaps = encode_comp_t(0); - /* - * Get freeze protection. If the fs is frozen, just skip the write - * as we could deadlock the system otherwise. - */ - if (!file_start_write_trylock(file)) - goto out; - /* - * Kernel segment override to datasegment and write it - * to the accounting file. - */ - fs = get_fs(); - set_fs(KERNEL_DS); - /* - * Accounting records are not subject to resource limits. - */ - flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + /* we really need to bite the bullet and change layout */ + ac->ac_uid = from_kuid_munged(file->f_cred->user_ns, current_uid()); + ac->ac_gid = from_kgid_munged(file->f_cred->user_ns, current_gid()); +#if ACCT_VERSION == 1 || ACCT_VERSION == 2 + /* backward-compatible 16 bit fields */ + ac->ac_uid16 = ac->ac_uid; + ac->ac_gid16 = ac->ac_gid; +#elif ACCT_VERSION == 3 + { + struct pid_namespace *ns = acct->ns; + + ac->ac_pid = task_tgid_nr_ns(current, ns); + rcu_read_lock(); + ac->ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns); + rcu_read_unlock(); + } +#endif +} + +static void acct_write_process(struct bsd_acct_struct *acct) +{ + struct file *file = acct->file; + acct_t *ac = &acct->ac; + + /* Perform file operations on behalf of whoever enabled accounting */ + scoped_with_creds(file->f_cred) { + /* + * First check to see if there is enough free_space to continue + * the process accounting system. Then get freeze protection. If + * the fs is frozen, just skip the write as we could deadlock + * the system otherwise. + */ + if (check_free_space(acct) && file_start_write_trylock(file)) { + /* it's been opened O_APPEND, so position is irrelevant */ + loff_t pos = 0; + __kernel_write(file, ac, sizeof(acct_t), &pos); + file_end_write(file); + } + } +} + +static void do_acct_process(struct bsd_acct_struct *acct) +{ + unsigned long flim; + + /* Accounting records are not subject to resource limits. */ + flim = rlimit(RLIMIT_FSIZE); current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; - file->f_op->write(file, (char *)&ac, - sizeof(acct_t), &file->f_pos); + fill_ac(acct); + acct_write_process(acct); current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; - set_fs(fs); - file_end_write(file); -out: - revert_creds(orig_cred); } /** @@ -573,18 +559,18 @@ out: void acct_collect(long exitcode, int group_dead) { struct pacct_struct *pacct = ¤t->signal->pacct; - cputime_t utime, stime; + u64 utime, stime; unsigned long vsize = 0; if (group_dead && current->mm) { + struct mm_struct *mm = current->mm; + VMA_ITERATOR(vmi, mm, 0); struct vm_area_struct *vma; - down_read(¤t->mm->mmap_sem); - vma = current->mm->mmap; - while (vma) { + + mmap_read_lock(mm); + for_each_vma(vmi, vma) vsize += vma->vm_end - vma->vm_start; - vma = vma->vm_next; - } - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(mm); } spin_lock_irq(¤t->sighand->siglock); @@ -601,6 +587,7 @@ void acct_collect(long exitcode, int group_dead) pacct->ac_flag |= ACORE; if (current->flags & PF_SIGNALED) pacct->ac_flag |= AXSIG; + task_cputime(current, &utime, &stime); pacct->ac_utime += utime; pacct->ac_stime += stime; @@ -609,36 +596,20 @@ void acct_collect(long exitcode, int group_dead) spin_unlock_irq(¤t->sighand->siglock); } -static void acct_process_in_ns(struct pid_namespace *ns) +static void slow_acct_process(struct pid_namespace *ns) { - struct file *file = NULL; - struct bsd_acct_struct *acct; - - acct = ns->bacct; - /* - * accelerate the common fastpath: - */ - if (!acct || !acct->file) - return; - - spin_lock(&acct_lock); - file = acct->file; - if (unlikely(!file)) { - spin_unlock(&acct_lock); - return; + for ( ; ns; ns = ns->parent) { + struct bsd_acct_struct *acct = acct_get(ns); + if (acct) { + do_acct_process(acct); + mutex_unlock(&acct->lock); + acct_put(acct); + } } - get_file(file); - spin_unlock(&acct_lock); - - do_acct_process(acct, ns, file); - fput(file); } /** - * acct_process - now just a wrapper around acct_process_in_ns, - * which in turn is a wrapper around do_acct_process. - * - * handles process accounting for an exiting task + * acct_process - handles process accounting for an exiting task */ void acct_process(void) { @@ -649,6 +620,10 @@ void acct_process(void) * alive and holds its namespace, which in turn holds * its parent. */ - for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) - acct_process_in_ns(ns); + for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) { + if (ns->bacct) + break; + } + if (unlikely(ns)) + slow_acct_process(ns); } |
