summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/configs/tiny.config1
-rw-r--r--kernel/dma/debug.c9
-rw-r--r--kernel/dma/direct.c3
-rw-r--r--kernel/events/core.c85
-rw-r--r--kernel/extable.c3
-rw-r--r--kernel/fork.c10
-rw-r--r--kernel/gcov/fs.c4
-rw-r--r--kernel/gcov/gcc_3_4.c6
-rw-r--r--kernel/gcov/gcc_4_7.c2
-rw-r--r--kernel/kallsyms.c2
-rw-r--r--kernel/kmod.c6
-rw-r--r--kernel/locking/lockdep.c51
-rw-r--r--kernel/locking/percpu-rwsem.c3
-rw-r--r--kernel/module.c5
-rw-r--r--kernel/pid.c1
-rw-r--r--kernel/power/user.c101
-rw-r--r--kernel/printk/internal.h5
-rw-r--r--kernel/printk/printk.c34
-rw-r--r--kernel/printk/printk_safe.c11
-rw-r--r--kernel/sched/core.c10
-rw-r--r--kernel/sched/debug.c44
-rw-r--r--kernel/sched/fair.c48
-rw-r--r--kernel/sched/sched.h7
-rw-r--r--kernel/workqueue.c6
24 files changed, 253 insertions, 204 deletions
diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config
index 7fa0c4ae6394..8a44b93da0f3 100644
--- a/kernel/configs/tiny.config
+++ b/kernel/configs/tiny.config
@@ -6,7 +6,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_KERNEL_XZ=y
# CONFIG_KERNEL_LZO is not set
# CONFIG_KERNEL_LZ4 is not set
-CONFIG_OPTIMIZE_INLINING=y
# CONFIG_SLAB is not set
# CONFIG_SLUB is not set
CONFIG_SLOB=y
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 2031ed1ad7fa..9e1777c81f55 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -137,9 +137,12 @@ static const char *const maperr2str[] = {
[MAP_ERR_CHECKED] = "dma map error checked",
};
-static const char *type2name[5] = { "single", "page",
- "scather-gather", "coherent",
- "resource" };
+static const char *type2name[] = {
+ [dma_debug_single] = "single",
+ [dma_debug_sg] = "scather-gather",
+ [dma_debug_coherent] = "coherent",
+ [dma_debug_resource] = "resource",
+};
static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
"DMA_FROM_DEVICE", "DMA_NONE" };
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index a8560052a915..8f4bbdaf965e 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -39,7 +39,8 @@ static inline struct page *dma_direct_to_page(struct device *dev,
u64 dma_direct_get_required_mask(struct device *dev)
{
- u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT);
+ phys_addr_t phys = (phys_addr_t)(max_pfn - 1) << PAGE_SHIFT;
+ u64 max_dma = phys_to_dma_direct(dev, phys);
return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 81e6d80cb219..bc9b98a9af9a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -28,6 +28,7 @@
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/hardirq.h>
+#include <linux/hugetlb.h>
#include <linux/rculist.h>
#include <linux/uaccess.h>
#include <linux/syscalls.h>
@@ -982,16 +983,10 @@ perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
event->shadow_ctx_time = now - t->timestamp;
}
-/*
- * Update cpuctx->cgrp so that it is set when first cgroup event is added and
- * cleared when last cgroup event is removed.
- */
static inline void
-list_update_cgroup_event(struct perf_event *event,
- struct perf_event_context *ctx, bool add)
+perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
{
struct perf_cpu_context *cpuctx;
- struct list_head *cpuctx_entry;
if (!is_cgroup_event(event))
return;
@@ -1008,28 +1003,41 @@ list_update_cgroup_event(struct perf_event *event,
* because if the first would mismatch, the second would not try again
* and we would leave cpuctx->cgrp unset.
*/
- if (add && !cpuctx->cgrp) {
+ if (ctx->is_active && !cpuctx->cgrp) {
struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
cpuctx->cgrp = cgrp;
}
- if (add && ctx->nr_cgroups++)
+ if (ctx->nr_cgroups++)
return;
- else if (!add && --ctx->nr_cgroups)
+
+ list_add(&cpuctx->cgrp_cpuctx_entry,
+ per_cpu_ptr(&cgrp_cpuctx_list, event->cpu));
+}
+
+static inline void
+perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
+{
+ struct perf_cpu_context *cpuctx;
+
+ if (!is_cgroup_event(event))
+ return;
+
+ /*
+ * Because cgroup events are always per-cpu events,
+ * @ctx == &cpuctx->ctx.
+ */
+ cpuctx = container_of(ctx, struct perf_cpu_context, ctx);
+
+ if (--ctx->nr_cgroups)
return;
- /* no cgroup running */
- if (!add)
+ if (ctx->is_active && cpuctx->cgrp)
cpuctx->cgrp = NULL;
- cpuctx_entry = &cpuctx->cgrp_cpuctx_entry;
- if (add)
- list_add(cpuctx_entry,
- per_cpu_ptr(&cgrp_cpuctx_list, event->cpu));
- else
- list_del(cpuctx_entry);
+ list_del(&cpuctx->cgrp_cpuctx_entry);
}
#else /* !CONFIG_CGROUP_PERF */
@@ -1095,11 +1103,14 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event)
}
static inline void
-list_update_cgroup_event(struct perf_event *event,
- struct perf_event_context *ctx, bool add)
+perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
{
}
+static inline void
+perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
+{
+}
#endif
/*
@@ -1790,13 +1801,14 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
add_event_to_groups(event, ctx);
}
- list_update_cgroup_event(event, ctx, true);
-
list_add_rcu(&event->event_entry, &ctx->event_list);
ctx->nr_events++;
if (event->attr.inherit_stat)
ctx->nr_stat++;
+ if (event->state > PERF_EVENT_STATE_OFF)
+ perf_cgroup_event_enable(event, ctx);
+
ctx->generation++;
}
@@ -1975,8 +1987,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
event->attach_state &= ~PERF_ATTACH_CONTEXT;
- list_update_cgroup_event(event, ctx, false);
-
ctx->nr_events--;
if (event->attr.inherit_stat)
ctx->nr_stat--;
@@ -1993,8 +2003,10 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
* of error state is by explicit re-enabling
* of the event
*/
- if (event->state > PERF_EVENT_STATE_OFF)
+ if (event->state > PERF_EVENT_STATE_OFF) {
+ perf_cgroup_event_disable(event, ctx);
perf_event_set_state(event, PERF_EVENT_STATE_OFF);
+ }
ctx->generation++;
}
@@ -2225,6 +2237,7 @@ event_sched_out(struct perf_event *event,
if (READ_ONCE(event->pending_disable) >= 0) {
WRITE_ONCE(event->pending_disable, -1);
+ perf_cgroup_event_disable(event, ctx);
state = PERF_EVENT_STATE_OFF;
}
perf_event_set_state(event, state);
@@ -2362,6 +2375,7 @@ static void __perf_event_disable(struct perf_event *event,
event_sched_out(event, cpuctx, ctx);
perf_event_set_state(event, PERF_EVENT_STATE_OFF);
+ perf_cgroup_event_disable(event, ctx);
}
/*
@@ -2745,7 +2759,7 @@ static int __perf_install_in_context(void *info)
}
#ifdef CONFIG_CGROUP_PERF
- if (is_cgroup_event(event)) {
+ if (event->state > PERF_EVENT_STATE_OFF && is_cgroup_event(event)) {
/*
* If the current cgroup doesn't match the event's
* cgroup, we should not try to schedule it.
@@ -2905,6 +2919,7 @@ static void __perf_event_enable(struct perf_event *event,
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
+ perf_cgroup_event_enable(event, ctx);
if (!ctx->is_active)
return;
@@ -3507,7 +3522,8 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
static bool perf_less_group_idx(const void *l, const void *r)
{
- const struct perf_event *le = l, *re = r;
+ const struct perf_event *le = *(const struct perf_event **)l;
+ const struct perf_event *re = *(const struct perf_event **)r;
return le->group_index < re->group_index;
}
@@ -3615,8 +3631,10 @@ static int merge_sched_in(struct perf_event *event, void *data)
}
if (event->state == PERF_EVENT_STATE_INACTIVE) {
- if (event->attr.pinned)
+ if (event->attr.pinned) {
+ perf_cgroup_event_disable(event, ctx);
perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
+ }
*can_add_hw = 0;
ctx->rotate_necessary = 1;
@@ -6916,9 +6934,12 @@ static u64 perf_virt_to_phys(u64 virt)
* Try IRQ-safe __get_user_pages_fast first.
* If failed, leave phys_addr as 0.
*/
- if ((current->mm != NULL) &&
- (__get_user_pages_fast(virt, 1, 0, &p) == 1))
- phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
+ if (current->mm != NULL) {
+ pagefault_disable();
+ if (__get_user_pages_fast(virt, 1, 0, &p) == 1)
+ phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
+ pagefault_enable();
+ }
if (p)
put_page(p);
@@ -7973,7 +7994,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
flags |= MAP_EXECUTABLE;
if (vma->vm_flags & VM_LOCKED)
flags |= MAP_LOCKED;
- if (vma->vm_flags & VM_HUGETLB)
+ if (is_vm_hugetlb_page(vma))
flags |= MAP_HUGETLB;
if (file) {
diff --git a/kernel/extable.c b/kernel/extable.c
index 7681f87e89dd..b0ea5eb0c3b4 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -34,7 +34,8 @@ u32 __initdata __visible main_extable_sort_needed = 1;
/* Sort the kernel's built-in exception table */
void __init sort_main_extable(void)
{
- if (main_extable_sort_needed && __stop___ex_table > __start___ex_table) {
+ if (main_extable_sort_needed &&
+ &__stop___ex_table > &__start___ex_table) {
pr_notice("Sorting __ex_table...\n");
sort_extable(__start___ex_table, __stop___ex_table);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index d2a967bf85d5..4385f3d639f2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -361,6 +361,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
if (new) {
*new = *orig;
INIT_LIST_HEAD(&new->anon_vma_chain);
+ new->vm_next = new->vm_prev = NULL;
}
return new;
}
@@ -553,14 +554,15 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
if (retval)
goto fail_nomem_anon_vma_fork;
if (tmp->vm_flags & VM_WIPEONFORK) {
- /* VM_WIPEONFORK gets a clean slate in the child. */
+ /*
+ * VM_WIPEONFORK gets a clean slate in the child.
+ * Don't prepare anon_vma until fault since we don't
+ * copy page for current vma.
+ */
tmp->anon_vma = NULL;
- if (anon_vma_prepare(tmp))
- goto fail_nomem_anon_vma_fork;
} else if (anon_vma_fork(tmp, mpnt))
goto fail_nomem_anon_vma_fork;
tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
- tmp->vm_next = tmp->vm_prev = NULL;
file = tmp->vm_file;
if (file) {
struct inode *inode = file_inode(file);
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c
index e5eb5ea7ea59..82babf5aa077 100644
--- a/kernel/gcov/fs.c
+++ b/kernel/gcov/fs.c
@@ -58,7 +58,7 @@ struct gcov_node {
struct dentry *dentry;
struct dentry **links;
int num_loaded;
- char name[0];
+ char name[];
};
static const char objtree[] = OBJTREE;
@@ -108,9 +108,9 @@ static void *gcov_seq_next(struct seq_file *seq, void *data, loff_t *pos)
{
struct gcov_iterator *iter = data;
+ (*pos)++;
if (gcov_iter_next(iter))
return NULL;
- (*pos)++;
return iter;
}
diff --git a/kernel/gcov/gcc_3_4.c b/kernel/gcov/gcc_3_4.c
index 801ee4b0b969..acb83558e5df 100644
--- a/kernel/gcov/gcc_3_4.c
+++ b/kernel/gcov/gcc_3_4.c
@@ -38,7 +38,7 @@ static struct gcov_info *gcov_info_head;
struct gcov_fn_info {
unsigned int ident;
unsigned int checksum;
- unsigned int n_ctrs[0];
+ unsigned int n_ctrs[];
};
/**
@@ -78,7 +78,7 @@ struct gcov_info {
unsigned int n_functions;
const struct gcov_fn_info *functions;
unsigned int ctr_mask;
- struct gcov_ctr_info counts[0];
+ struct gcov_ctr_info counts[];
};
/**
@@ -352,7 +352,7 @@ struct gcov_iterator {
unsigned int count;
int num_types;
- struct type_info type_info[0];
+ struct type_info type_info[];
};
static struct gcov_fn_info *get_func(struct gcov_iterator *iter)
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index ec37563674d6..908fdf5098c3 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -68,7 +68,7 @@ struct gcov_fn_info {
unsigned int ident;
unsigned int lineno_checksum;
unsigned int cfg_checksum;
- struct gcov_ctr_info ctrs[0];
+ struct gcov_ctr_info ctrs[];
};
/**
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index a9b3f660dee7..16c8c605f4b0 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -175,7 +175,6 @@ unsigned long kallsyms_lookup_name(const char *name)
}
return module_kallsyms_lookup_name(name);
}
-EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
unsigned long),
@@ -194,7 +193,6 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
}
return module_kallsyms_on_each_symbol(fn, data);
}
-EXPORT_SYMBOL_GPL(kallsyms_on_each_symbol);
static unsigned long get_symbol_pos(unsigned long addr,
unsigned long *symbolsize,
diff --git a/kernel/kmod.c b/kernel/kmod.c
index bc6addd9152b..37c3c4b97b8e 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -35,7 +35,7 @@
* (u64) THREAD_SIZE * 8UL);
*
* If you need less than 50 threads would mean we're dealing with systems
- * smaller than 3200 pages. This assuems you are capable of having ~13M memory,
+ * smaller than 3200 pages. This assumes you are capable of having ~13M memory,
* and this would only be an be an upper limit, after which the OOM killer
* would take effect. Systems like these are very unlikely if modules are
* enabled.
@@ -120,7 +120,7 @@ out:
* invoke it.
*
* If module auto-loading support is disabled then this function
- * becomes a no-operation.
+ * simply returns -ENOENT.
*/
int __request_module(bool wait, const char *fmt, ...)
{
@@ -137,7 +137,7 @@ int __request_module(bool wait, const char *fmt, ...)
WARN_ON_ONCE(wait && current_is_async());
if (!modprobe_path[0])
- return 0;
+ return -ENOENT;
va_start(args, fmt);
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 1511690e4de7..ac10db66cc63 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3952,10 +3952,36 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
return ret;
}
+static inline short task_wait_context(struct task_struct *curr)
+{
+ /*
+ * Set appropriate wait type for the context; for IRQs we have to take
+ * into account force_irqthread as that is implied by PREEMPT_RT.
+ */
+ if (curr->hardirq_context) {
+ /*
+ * Check if force_irqthreads will run us threaded.
+ */
+ if (curr->hardirq_threaded || curr->irq_config)
+ return LD_WAIT_CONFIG;
+
+ return LD_WAIT_SPIN;
+ } else if (curr->softirq_context) {
+ /*
+ * Softirqs are always threaded.
+ */
+ return LD_WAIT_CONFIG;
+ }
+
+ return LD_WAIT_MAX;
+}
+
static int
print_lock_invalid_wait_context(struct task_struct *curr,
struct held_lock *hlock)
{
+ short curr_inner;
+
if (!debug_locks_off())
return 0;
if (debug_locks_silent)
@@ -3971,6 +3997,10 @@ print_lock_invalid_wait_context(struct task_struct *curr,
print_lock(hlock);
pr_warn("other info that might help us debug this:\n");
+
+ curr_inner = task_wait_context(curr);
+ pr_warn("context-{%d:%d}\n", curr_inner, curr_inner);
+
lockdep_print_held_locks(curr);
pr_warn("stack backtrace:\n");
@@ -4017,26 +4047,7 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
}
depth++;
- /*
- * Set appropriate wait type for the context; for IRQs we have to take
- * into account force_irqthread as that is implied by PREEMPT_RT.
- */
- if (curr->hardirq_context) {
- /*
- * Check if force_irqthreads will run us threaded.
- */
- if (curr->hardirq_threaded || curr->irq_config)
- curr_inner = LD_WAIT_CONFIG;
- else
- curr_inner = LD_WAIT_SPIN;
- } else if (curr->softirq_context) {
- /*
- * Softirqs are always threaded.
- */
- curr_inner = LD_WAIT_CONFIG;
- } else {
- curr_inner = LD_WAIT_MAX;
- }
+ curr_inner = task_wait_context(curr);
for (; depth < curr->lockdep_depth; depth++) {
struct held_lock *prev = curr->held_locks + depth;
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index a008a1ba21a7..8bbafe3e5203 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -118,14 +118,15 @@ static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
unsigned int mode, int wake_flags,
void *key)
{
- struct task_struct *p = get_task_struct(wq_entry->private);
bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
struct percpu_rw_semaphore *sem = key;
+ struct task_struct *p;
/* concurrent against percpu_down_write(), can get stolen */
if (!__percpu_rwsem_trylock(sem, reader))
return 1;
+ p = get_task_struct(wq_entry->private);
list_del_init(&wq_entry->entry);
smp_store_release(&wq_entry->private, NULL);
diff --git a/kernel/module.c b/kernel/module.c
index 33569a01d6e1..646f1e2330d2 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1515,7 +1515,7 @@ struct module_sect_attr {
struct module_sect_attrs {
struct attribute_group grp;
unsigned int nsections;
- struct module_sect_attr attrs[0];
+ struct module_sect_attr attrs[];
};
static ssize_t module_sect_show(struct module_attribute *mattr,
@@ -1608,7 +1608,7 @@ static void remove_sect_attrs(struct module *mod)
struct module_notes_attrs {
struct kobject *dir;
unsigned int notes;
- struct bin_attribute attrs[0];
+ struct bin_attribute attrs[];
};
static ssize_t module_notes_read(struct file *filp, struct kobject *kobj,
@@ -4355,6 +4355,7 @@ static int modules_open(struct inode *inode, struct file *file)
}
static const struct proc_ops modules_proc_ops = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
.proc_open = modules_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
diff --git a/kernel/pid.c b/kernel/pid.c
index bc21c0fb26d8..c835b844aca7 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -256,6 +256,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
get_pid_ns(ns);
refcount_set(&pid->count, 1);
+ spin_lock_init(&pid->lock);
for (type = 0; type < PIDTYPE_MAX; ++type)
INIT_HLIST_HEAD(&pid->tasks[type]);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index ef90eb1fb86e..7959449765d9 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -196,6 +196,50 @@ unlock:
return res;
}
+struct compat_resume_swap_area {
+ compat_loff_t offset;
+ u32 dev;
+} __packed;
+
+static int snapshot_set_swap_area(struct snapshot_data *data,
+ void __user *argp)
+{
+ sector_t offset;
+ dev_t swdev;
+
+ if (swsusp_swap_in_use())
+ return -EPERM;
+
+ if (in_compat_syscall()) {
+ struct compat_resume_swap_area swap_area;
+
+ if (copy_from_user(&swap_area, argp, sizeof(swap_area)))
+ return -EFAULT;
+ swdev = new_decode_dev(swap_area.dev);
+ offset = swap_area.offset;
+ } else {
+ struct resume_swap_area swap_area;
+
+ if (copy_from_user(&swap_area, argp, sizeof(swap_area)))
+ return -EFAULT;
+ swdev = new_decode_dev(swap_area.dev);
+ offset = swap_area.offset;
+ }
+
+ /*
+ * User space encodes device types as two-byte values,
+ * so we need to recode them
+ */
+ if (!swdev) {
+ data->swap = -1;
+ return -EINVAL;
+ }
+ data->swap = swap_type_of(swdev, offset, NULL);
+ if (data->swap < 0)
+ return -ENODEV;
+ return 0;
+}
+
static long snapshot_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
@@ -351,34 +395,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
break;
case SNAPSHOT_SET_SWAP_AREA:
- if (swsusp_swap_in_use()) {
- error = -EPERM;
- } else {
- struct resume_swap_area swap_area;
- dev_t swdev;
-
- error = copy_from_user(&swap_area, (void __user *)arg,
- sizeof(struct resume_swap_area));
- if (error) {
- error = -EFAULT;
- break;
- }
-
- /*
- * User space encodes device types as two-byte values,
- * so we need to recode them
- */
- swdev = new_decode_dev(swap_area.dev);
- if (swdev) {
- offset = swap_area.offset;
- data->swap = swap_type_of(swdev, offset, NULL);
- if (data->swap < 0)
- error = -ENODEV;
- } else {
- data->swap = -1;
- error = -EINVAL;
- }
- }
+ error = snapshot_set_swap_area(data, (void __user *)arg);
break;
default:
@@ -393,12 +410,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
}
#ifdef CONFIG_COMPAT
-
-struct compat_resume_swap_area {
- compat_loff_t offset;
- u32 dev;
-} __packed;
-
static long
snapshot_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
@@ -409,33 +420,13 @@ snapshot_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case SNAPSHOT_AVAIL_SWAP_SIZE:
case SNAPSHOT_ALLOC_SWAP_PAGE:
case SNAPSHOT_CREATE_IMAGE:
+ case SNAPSHOT_SET_SWAP_AREA:
return snapshot_ioctl(file, cmd,
(unsigned long) compat_ptr(arg));
-
- case SNAPSHOT_SET_SWAP_AREA: {
- struct compat_resume_swap_area __user *u_swap_area =
- compat_ptr(arg);
- struct resume_swap_area swap_area;
- mm_segment_t old_fs;
- int err;
-
- err = get_user(swap_area.offset, &u_swap_area->offset);
- err |= get_user(swap_area.dev, &u_swap_area->dev);
- if (err)
- return -EFAULT;
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- err = snapshot_ioctl(file, SNAPSHOT_SET_SWAP_AREA,
- (unsigned long) &swap_area);
- set_fs(old_fs);
- return err;
- }
-
default:
return snapshot_ioctl(file, cmd, arg);
}
}
-
#endif /* CONFIG_COMPAT */
static const struct file_operations snapshot_fops = {
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index c8e6ab689d42..b2b0f526f249 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -23,6 +23,9 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args);
void __printk_safe_enter(void);
void __printk_safe_exit(void);
+void printk_safe_init(void);
+bool printk_percpu_data_ready(void);
+
#define printk_safe_enter_irqsave(flags) \
do { \
local_irq_save(flags); \
@@ -64,4 +67,6 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
#define printk_safe_enter_irq() local_irq_disable()
#define printk_safe_exit_irq() local_irq_enable()
+static inline void printk_safe_init(void) { }
+static inline bool printk_percpu_data_ready(void) { return false; }
#endif /* CONFIG_PRINTK */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 633f41a11d75..9a9b6156270b 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -460,6 +460,18 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;
+/*
+ * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before
+ * per_cpu_areas are initialised. This variable is set to true when
+ * it's safe to access per-CPU data.
+ */
+static bool __printk_percpu_data_ready __read_mostly;
+
+bool printk_percpu_data_ready(void)
+{
+ return __printk_percpu_data_ready;
+}
+
/* Return log buffer address */
char *log_buf_addr_get(void)
{
@@ -1146,12 +1158,28 @@ static void __init log_buf_add_cpu(void)
static inline void log_buf_add_cpu(void) {}
#endif /* CONFIG_SMP */
+static void __init set_percpu_data_ready(void)
+{
+ printk_safe_init();
+ /* Make sure we set this flag only after printk_safe() init is done */
+ barrier();
+ __printk_percpu_data_ready = true;
+}
+
void __init setup_log_buf(int early)
{
unsigned long flags;
char *new_log_buf;
unsigned int free;
+ /*
+ * Some archs call setup_log_buf() multiple times - first is very
+ * early, e.g. from setup_arch(), and second - when percpu_areas
+ * are initialised.
+ */
+ if (!early)
+ set_percpu_data_ready();
+
if (log_buf != __log_buf)
return;
@@ -2975,6 +3003,9 @@ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
void wake_up_klogd(void)
{
+ if (!printk_percpu_data_ready())
+ return;
+
preempt_disable();
if (waitqueue_active(&log_wait)) {
this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
@@ -2985,6 +3016,9 @@ void wake_up_klogd(void)
void defer_console_output(void)
{
+ if (!printk_percpu_data_ready())
+ return;
+
preempt_disable();
__this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index b4045e782743..d9a659a686f3 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -27,7 +27,6 @@
* There are situations when we want to make sure that all buffers
* were handled or when IRQs are blocked.
*/
-static int printk_safe_irq_ready __read_mostly;
#define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \
sizeof(atomic_t) - \
@@ -51,7 +50,7 @@ static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq);
/* Get flushed in a more safe context. */
static void queue_flush_work(struct printk_safe_seq_buf *s)
{
- if (printk_safe_irq_ready)
+ if (printk_percpu_data_ready())
irq_work_queue(&s->work);
}
@@ -402,14 +401,6 @@ void __init printk_safe_init(void)
#endif
}
- /*
- * In the highly unlikely event that a NMI were to trigger at
- * this moment. Make sure IRQ work is set up before this
- * variable is set.
- */
- barrier();
- printk_safe_irq_ready = 1;
-
/* Flush pending messages that did not have scheduled IRQ works. */
printk_safe_flush();
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a2694ba82874..3a61a3b8eaa9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2119,12 +2119,6 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
return cpu;
}
-static void update_avg(u64 *avg, u64 sample)
-{
- s64 diff = sample - *avg;
- *avg += diff >> 3;
-}
-
void sched_set_stop_task(int cpu, struct task_struct *stop)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
@@ -4126,7 +4120,8 @@ static inline void sched_submit_work(struct task_struct *tsk)
* it wants to wake up a task to maintain concurrency.
* As this function is called inside the schedule() context,
* we disable preemption to avoid it calling schedule() again
- * in the possible wakeup of a kworker.
+ * in the possible wakeup of a kworker and because wq_worker_sleeping()
+ * requires it.
*/
if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
preempt_disable();
@@ -6699,7 +6694,6 @@ void __init sched_init(void)
rq_attach_root(rq, &def_root_domain);
#ifdef CONFIG_NO_HZ_COMMON
- rq->last_load_update_tick = jiffies;
rq->last_blocked_load_update_tick = jiffies;
atomic_set(&rq->nohz_flags, 0);
#endif
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 8331bc04aea2..a562df57a86e 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -816,10 +816,12 @@ static int __init init_sched_debug_procfs(void)
__initcall(init_sched_debug_procfs);
-#define __P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
-#define P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
-#define __PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
-#define PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
+#define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
+#define __P(F) __PS(#F, F)
+#define P(F) __PS(#F, p->F)
+#define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
+#define __PN(F) __PSN(#F, F)
+#define PN(F) __PSN(#F, p->F)
#ifdef CONFIG_NUMA_BALANCING
@@ -868,18 +870,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
SEQ_printf(m,
"---------------------------------------------------------"
"----------\n");
-#define __P(F) \
- SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
-#define P(F) \
- SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
-#define P_SCHEDSTAT(F) \
- SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
-#define __PN(F) \
- SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
-#define PN(F) \
- SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
-#define PN_SCHEDSTAT(F) \
- SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
+
+#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->F))
+#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F))
PN(se.exec_start);
PN(se.vruntime);
@@ -939,10 +932,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
}
__P(nr_switches);
- SEQ_printf(m, "%-45s:%21Ld\n",
- "nr_voluntary_switches", (long long)p->nvcsw);
- SEQ_printf(m, "%-45s:%21Ld\n",
- "nr_involuntary_switches", (long long)p->nivcsw);
+ __PS("nr_voluntary_switches", p->nvcsw);
+ __PS("nr_involuntary_switches", p->nivcsw);
P(se.load.weight);
#ifdef CONFIG_SMP
@@ -956,6 +947,12 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P(se.avg.util_est.ewma);
P(se.avg.util_est.enqueued);
#endif
+#ifdef CONFIG_UCLAMP_TASK
+ __PS("uclamp.min", p->uclamp[UCLAMP_MIN].value);
+ __PS("uclamp.max", p->uclamp[UCLAMP_MAX].value);
+ __PS("effective uclamp.min", uclamp_eff_value(p, UCLAMP_MIN));
+ __PS("effective uclamp.max", uclamp_eff_value(p, UCLAMP_MAX));
+#endif
P(policy);
P(prio);
if (task_has_dl_policy(p)) {
@@ -963,11 +960,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P(dl.deadline);
}
#undef PN_SCHEDSTAT
-#undef PN
-#undef __PN
#undef P_SCHEDSTAT
-#undef P
-#undef __P
{
unsigned int this_cpu = raw_smp_processor_id();
@@ -975,8 +968,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
t0 = cpu_clock(this_cpu);
t1 = cpu_clock(this_cpu);
- SEQ_printf(m, "%-45s:%21Ld\n",
- "clock-delta", (long long)(t1-t0));
+ __PS("clock-delta", t1-t0);
}
sched_show_numa(p, m);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d7fb20adabeb..02f323b85b6d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2799,7 +2799,7 @@ static void task_numa_work(struct callback_head *work)
* Skip inaccessible VMAs to avoid any confusion between
* PROT_NONE and NUMA hinting ptes
*/
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+ if (!vma_is_accessible(vma))
continue;
do {
@@ -4836,11 +4836,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
resched_curr(rq);
}
-static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
+static void distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
{
struct cfs_rq *cfs_rq;
- u64 runtime;
- u64 starting_runtime = remaining;
+ u64 runtime, remaining = 1;
rcu_read_lock();
list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
@@ -4855,10 +4854,13 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
/* By the above check, this should never be true */
SCHED_WARN_ON(cfs_rq->runtime_remaining > 0);
+ raw_spin_lock(&cfs_b->lock);
runtime = -cfs_rq->runtime_remaining + 1;
- if (runtime > remaining)
- runtime = remaining;
- remaining -= runtime;
+ if (runtime > cfs_b->runtime)
+ runtime = cfs_b->runtime;
+ cfs_b->runtime -= runtime;
+ remaining = cfs_b->runtime;
+ raw_spin_unlock(&cfs_b->lock);
cfs_rq->runtime_remaining += runtime;
@@ -4873,8 +4875,6 @@ next:
break;
}
rcu_read_unlock();
-
- return starting_runtime - remaining;
}
/*
@@ -4885,7 +4885,6 @@ next:
*/
static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)
{
- u64 runtime;
int throttled;
/* no need to continue the timer with no bandwidth constraint */
@@ -4914,24 +4913,17 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, u
cfs_b->nr_throttled += overrun;
/*
- * This check is repeated as we are holding onto the new bandwidth while
- * we unthrottle. This can potentially race with an unthrottled group
- * trying to acquire new bandwidth from the global pool. This can result
- * in us over-using our runtime if it is all used during this loop, but
- * only by limited amounts in that extreme case.
+ * This check is repeated as we release cfs_b->lock while we unthrottle.
*/
while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
- runtime = cfs_b->runtime;
cfs_b->distribute_running = 1;
raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
/* we can't nest cfs_b->lock while distributing bandwidth */
- runtime = distribute_cfs_runtime(cfs_b, runtime);
+ distribute_cfs_runtime(cfs_b);
raw_spin_lock_irqsave(&cfs_b->lock, flags);
cfs_b->distribute_running = 0;
throttled = !list_empty(&cfs_b->throttled_cfs_rq);
-
- lsub_positive(&cfs_b->runtime, runtime);
}
/*
@@ -5065,10 +5057,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
if (!runtime)
return;
- runtime = distribute_cfs_runtime(cfs_b, runtime);
+ distribute_cfs_runtime(cfs_b);
raw_spin_lock_irqsave(&cfs_b->lock, flags);
- lsub_positive(&cfs_b->runtime, runtime);
cfs_b->distribute_running = 0;
raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
}
@@ -6080,8 +6071,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
struct sched_domain *this_sd;
u64 avg_cost, avg_idle;
- u64 time, cost;
- s64 delta;
+ u64 time;
int this = smp_processor_id();
int cpu, nr = INT_MAX;
@@ -6119,9 +6109,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
}
time = cpu_clock(this) - time;
- cost = this_sd->avg_scan_cost;
- delta = (s64)(time - cost) / 8;
- this_sd->avg_scan_cost += delta;
+ update_avg(&this_sd->avg_scan_cost, time);
return cpu;
}
@@ -9048,6 +9036,14 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
sds->total_capacity;
+ /*
+ * If the local group is more loaded than the selected
+ * busiest group don't try to pull any tasks.
+ */
+ if (local->avg_load >= busiest->avg_load) {
+ env->imbalance = 0;
+ return;
+ }
}
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0f616bf7bce3..db3a57675ccf 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -195,6 +195,12 @@ static inline int task_has_dl_policy(struct task_struct *p)
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
+static inline void update_avg(u64 *avg, u64 sample)
+{
+ s64 diff = sample - *avg;
+ *avg += diff / 8;
+}
+
/*
* !! For sched_setattr_nocheck() (kernel) only !!
*
@@ -882,7 +888,6 @@ struct rq {
#endif
#ifdef CONFIG_NO_HZ_COMMON
#ifdef CONFIG_SMP
- unsigned long last_load_update_tick;
unsigned long last_blocked_load_update_tick;
unsigned int has_blocked_load;
#endif /* CONFIG_SMP */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3816a18c251e..891ccad5f271 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -858,7 +858,8 @@ void wq_worker_running(struct task_struct *task)
* @task: task going to sleep
*
* This function is called from schedule() when a busy worker is
- * going to sleep.
+ * going to sleep. Preemption needs to be disabled to protect ->sleeping
+ * assignment.
*/
void wq_worker_sleeping(struct task_struct *task)
{
@@ -875,7 +876,8 @@ void wq_worker_sleeping(struct task_struct *task)
pool = worker->pool;
- if (WARN_ON_ONCE(worker->sleeping))
+ /* Return if preempted before wq_worker_running() was reached */
+ if (worker->sleeping)
return;
worker->sleeping = 1;