From 2821fd0c2be0c4e513b1622d86df9170ef62a6d4 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 4 Mar 2019 12:00:09 +0200
Subject: lib/test_printf: Switch to bitmap_zalloc()

Switch to bitmap_zalloc() to show clearly what we are allocating.
Besides that it returns pointer of bitmap type instead of opaque void *.

Link: http://lkml.kernel.org/r/20190304100009.65147-1-andriy.shevchenko@linux.intel.com
To: linux-kernel@vger.kernel.org
To: Andrew Morton <akpm@linux-foundation.org>
To: linux@rasmusvillemoes.dk
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 lib/test_printf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/test_printf.c b/lib/test_printf.c
index 659b6cc0d483..e8206d8d2d08 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -481,14 +481,14 @@ static void __init
 large_bitmap(void)
 {
 	const int nbits = 1 << 16;
-	unsigned long *bits = kcalloc(BITS_TO_LONGS(nbits), sizeof(long), GFP_KERNEL);
+	unsigned long *bits = bitmap_zalloc(nbits, GFP_KERNEL);
 	if (!bits)
 		return;
 
 	bitmap_set(bits, 1, 20);
 	bitmap_set(bits, 60000, 15);
 	test("1-20,60000-60014", "%*pbl", nbits, bits);
-	kfree(bits);
+	bitmap_free(bits);
 }
 
 static void __init
-- 
cgit 


From 4feb7c7a4fbb8f63371be31cda79433c7cf3da86 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 21 Mar 2019 14:42:40 +1100
Subject: rhashtable: don't hold lock on first table throughout insertion.

rhashtable_try_insert() currently holds a lock on the bucket in
the first table, while also locking buckets in subsequent tables.
This is unnecessary and looks like a hold-over from some earlier
version of the implementation.

As insert and remove always lock a bucket in each table in turn, and
as insert only inserts in the final table, there cannot be any races
that are not covered by simply locking a bucket in each table in turn.

When an insert call reaches that last table it can be sure that there
is no matchinf entry in any other table as it has searched them all, and
insertion never happens anywhere but in the last table.  The fact that
code tests for the existence of future_tbl while holding a lock on
the relevant bucket ensures that two threads inserting the same key
will make compatible decisions about which is the "last" table.

This simplifies the code and allows the ->rehash field to be
discarded.

We still need a way to ensure that a dead bucket_table is never
re-linked by rhashtable_walk_stop().  This can be achieved by calling
call_rcu() inside the locked region, and checking with
rcu_head_after_call_rcu() in rhashtable_walk_stop() to see if the
bucket table is empty and dead.

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Paul E. McKenney <paulmck@linux.ibm.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 52 ++++++++++++++++------------------------------------
 1 file changed, 16 insertions(+), 36 deletions(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 0a105d4af166..776b3a82d3a1 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -197,6 +197,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 		return NULL;
 	}
 
+	rcu_head_init(&tbl->rcu);
 	INIT_LIST_HEAD(&tbl->walkers);
 
 	tbl->hash_rnd = get_random_u32();
@@ -280,10 +281,9 @@ static int rhashtable_rehash_chain(struct rhashtable *ht,
 	while (!(err = rhashtable_rehash_one(ht, old_hash)))
 		;
 
-	if (err == -ENOENT) {
-		old_tbl->rehash++;
+	if (err == -ENOENT)
 		err = 0;
-	}
+
 	spin_unlock_bh(old_bucket_lock);
 
 	return err;
@@ -330,13 +330,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
 	spin_lock(&ht->lock);
 	list_for_each_entry(walker, &old_tbl->walkers, list)
 		walker->tbl = NULL;
-	spin_unlock(&ht->lock);
 
 	/* Wait for readers. All new readers will see the new
 	 * table, and thus no references to the old table will
 	 * remain.
+	 * We do this inside the locked region so that
+	 * rhashtable_walk_stop() can use rcu_head_after_call_rcu()
+	 * to check if it should not re-link the table.
 	 */
 	call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
+	spin_unlock(&ht->lock);
 
 	return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
 }
@@ -578,46 +581,22 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
 	struct bucket_table *new_tbl;
 	struct bucket_table *tbl;
 	unsigned int hash;
-	spinlock_t *lock;
 	void *data;
 
-	tbl = rcu_dereference(ht->tbl);
-
-	/* All insertions must grab the oldest table containing
-	 * the hashed bucket that is yet to be rehashed.
-	 */
-	for (;;) {
-		hash = rht_head_hashfn(ht, tbl, obj, ht->p);
-		lock = rht_bucket_lock(tbl, hash);
-		spin_lock_bh(lock);
-
-		if (tbl->rehash <= hash)
-			break;
-
-		spin_unlock_bh(lock);
-		tbl = rht_dereference_rcu(tbl->future_tbl, ht);
-	}
-
-	data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
-	new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
-	if (PTR_ERR(new_tbl) != -EEXIST)
-		data = ERR_CAST(new_tbl);
+	new_tbl = rcu_dereference(ht->tbl);
 
-	while (!IS_ERR_OR_NULL(new_tbl)) {
+	do {
 		tbl = new_tbl;
 		hash = rht_head_hashfn(ht, tbl, obj, ht->p);
-		spin_lock_nested(rht_bucket_lock(tbl, hash),
-				 SINGLE_DEPTH_NESTING);
+		spin_lock_bh(rht_bucket_lock(tbl, hash));
 
 		data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
 		new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
 		if (PTR_ERR(new_tbl) != -EEXIST)
 			data = ERR_CAST(new_tbl);
 
-		spin_unlock(rht_bucket_lock(tbl, hash));
-	}
-
-	spin_unlock_bh(lock);
+		spin_unlock_bh(rht_bucket_lock(tbl, hash));
+	} while (!IS_ERR_OR_NULL(new_tbl));
 
 	if (PTR_ERR(data) == -EAGAIN)
 		data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
@@ -939,10 +918,11 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
 	ht = iter->ht;
 
 	spin_lock(&ht->lock);
-	if (tbl->rehash < tbl->size)
-		list_add(&iter->walker.list, &tbl->walkers);
-	else
+	if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu))
+		/* This bucket table is being freed, don't re-link it. */
 		iter->walker.tbl = NULL;
+	else
+		list_add(&iter->walker.list, &tbl->walkers);
 	spin_unlock(&ht->lock);
 
 out:
-- 
cgit 


From f7ad68bf98506f48129267438ada1255fc4edfa2 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 21 Mar 2019 14:42:40 +1100
Subject: rhashtable: rename rht_for_each*continue as *from.

The pattern set by list.h is that for_each..continue()
iterators start at the next entry after the given one,
while for_each..from() iterators start at the given
entry.

The rht_for_each*continue() iterators are documented as though the
start at the 'next' entry, but actually start at the given entry,
and they are used expecting that behaviour.
So fix the documentation and change the names to *from for consistency
with list.h

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 776b3a82d3a1..f65e43fb1ff8 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -490,7 +490,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 
 	elasticity = RHT_ELASTICITY;
 	pprev = rht_bucket_var(tbl, hash);
-	rht_for_each_continue(head, *pprev, tbl, hash) {
+	rht_for_each_from(head, *pprev, tbl, hash) {
 		struct rhlist_head *list;
 		struct rhlist_head *plist;
 
-- 
cgit 


From c03a0fd0b609e2f5c669c2b7f27c8e1928e9196e Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sun, 17 Mar 2019 14:02:31 +0900
Subject: kobject: Don't trigger kobject_uevent(KOBJ_REMOVE) twice.

syzbot is hitting use-after-free bug in uinput module [1]. This is because
kobject_uevent(KOBJ_REMOVE) is called again due to commit 0f4dafc0563c6c49
("Kobject: auto-cleanup on final unref") after memory allocation fault
injection made kobject_uevent(KOBJ_REMOVE) from device_del() from
input_unregister_device() fail, while uinput_destroy_device() is expecting
that kobject_uevent(KOBJ_REMOVE) is not called after device_del() from
input_unregister_device() completed.

That commit intended to catch cases where nobody even attempted to send
"remove" uevents. But there is no guarantee that an event will ultimately
be sent. We are at the point of no return as far as the rest of the kernel
is concerned; there are no repeats or do-overs.

Also, it is not clear whether some subsystem depends on that commit.
If no subsystem depends on that commit, it will be better to remove
the state_{add,remove}_uevent_sent logic. But we don't want to risk
a regression (in a patch which will be backported) by trying to remove
that logic. Therefore, as a first step, let's avoid the use-after-free bug
by making sure that kobject_uevent(KOBJ_REMOVE) won't be triggered twice.

[1] https://syzkaller.appspot.com/bug?id=8b17c134fe938bbddd75a45afaa9e68af43a362d

Reported-by: syzbot <syzbot+f648cfb7e0b52bf7ae32@syzkaller.appspotmail.com>
Analyzed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Fixes: 0f4dafc0563c6c49 ("Kobject: auto-cleanup on final unref")
Cc: Kay Sievers <kay@vrfy.org>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/kobject_uevent.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index f05802687ba4..7998affa45d4 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -466,6 +466,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 	int i = 0;
 	int retval = 0;
 
+	/*
+	 * Mark "remove" event done regardless of result, for some subsystems
+	 * do not want to re-trigger "remove" event via automatic cleanup.
+	 */
+	if (action == KOBJ_REMOVE)
+		kobj->state_remove_uevent_sent = 1;
+
 	pr_debug("kobject: '%s' (%p): %s\n",
 		 kobject_name(kobj), kobj, __func__);
 
@@ -567,10 +574,6 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 		kobj->state_add_uevent_sent = 1;
 		break;
 
-	case KOBJ_REMOVE:
-		kobj->state_remove_uevent_sent = 1;
-		break;
-
 	case KOBJ_UNBIND:
 		zap_modalias_env(env);
 		break;
-- 
cgit 


From e83b9f55448afce3fe1abcd1d10db9584f8042a6 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 2 Apr 2019 09:49:50 -0700
Subject: kbuild: add ability to generate BTF type info for vmlinux

This patch adds new config option to trigger generation of BTF type
information from DWARF debuginfo for vmlinux and kernel modules through
pahole, which in turn relies on libbpf for btf_dedup() algorithm.

The intent is to record compact type information of all types used
inside kernel, including all the structs/unions/typedefs/etc. This
enables BPF's compile-once-run-everywhere ([0]) approach, in which
tracing programs that are inspecting kernel's internal data (e.g.,
struct task_struct) can be compiled on a system running some kernel
version, but would be possible to run on other kernel versions (and
configurations) without recompilation, even if the layout of structs
changed and/or some of the fields were added, removed, or renamed.

This is only possible if BPF loader can get kernel type info to adjust
all the offsets correctly. This patch is a first time in this direction,
making sure that BTF type info is part of Linux kernel image in
non-loadable ELF section.

BTF deduplication ([1]) algorithm typically provides 100x savings
compared to DWARF data, so resulting .BTF section is not big as is
typically about 2MB in size.

[0] http://vger.kernel.org/lpc-bpf2018.html#session-2
[1] https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html

Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 lib/Kconfig.debug | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0d9e81779e37..188fc17c2202 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -219,6 +219,14 @@ config DEBUG_INFO_DWARF4
 	  But it significantly improves the success of resolving
 	  variables in gdb on optimized code.
 
+config DEBUG_INFO_BTF
+	bool "Generate BTF typeinfo"
+	depends on DEBUG_INFO
+	help
+	  Generate deduplicated BTF type information from DWARF debug info.
+	  Turning this on expects presence of pahole tool, which will convert
+	  DWARF type info into equivalent deduplicated BTF type info.
+
 config GDB_SCRIPTS
 	bool "Provide GDB scripts for kernel debugging"
 	depends on DEBUG_INFO
-- 
cgit 


From 72deb455b5ec619ff043c30bc90025aa3de3cdda Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 5 Apr 2019 18:08:59 +0200
Subject: block: remove CONFIG_LBDAF

Currently support for 64-bit sector_t and blkcnt_t is optional on 32-bit
architectures.  These types are required to support block device and/or
file sizes larger than 2 TiB, and have generally defaulted to on for
a long time.  Enabling the option only increases the i386 tinyconfig
size by 145 bytes, and many data structures already always use
64-bit values for their in-core and on-disk data structures anyway,
so there should not be a large change in dynamic memory usage either.

Dropping this option removes a somewhat weird non-default config that
has cause various bugs or compiler warnings when actually used.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 lib/Kconfig.debug | 1 -
 1 file changed, 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0d9e81779e37..d8781786cf63 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1927,7 +1927,6 @@ config TEST_STATIC_KEYS
 config TEST_KMOD
 	tristate "kmod stress tester"
 	depends on m
-	depends on BLOCK && (64BIT || LBDAF)	  # for XFS, BTRFS
 	depends on NETDEVICES && NET_CORE && INET # for TUN
 	select TEST_LKM
 	select XFS_FS
-- 
cgit 


From 7a41c294c1463100fdc82a356e22e36bbaa6b0f9 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Tue, 2 Apr 2019 10:07:45 +1100
Subject: rhashtable: use cmpxchg() in nested_table_alloc()

nested_table_alloc() relies on the fact that there is
at most one spinlock allocated for every slot in the top
level nested table, so it is not possible for two threads
to try to allocate the same table at the same time.

This assumption is a little fragile (it is not explicit) and is
unnecessary as cmpxchg() can be used instead.

A future patch will replace the spinlocks by per-bucket bitlocks,
and then we won't be able to protect the slot pointer with a spinlock.

So replace rcu_assign_pointer() with cmpxchg() - which has equivalent
barrier properties.
If it the cmp fails, free the table that was just allocated.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 811d51b7cb86..6c4f5c8e9baa 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -131,9 +131,11 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht,
 			INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
 	}
 
-	rcu_assign_pointer(*prev, ntbl);
-
-	return ntbl;
+	if (cmpxchg(prev, NULL, ntbl) == NULL)
+		return ntbl;
+	/* Raced with another thread. */
+	kfree(ntbl);
+	return rcu_dereference(*prev);
 }
 
 static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
-- 
cgit 


From ff302db965b57c141297911ea647d36d11fedfbe Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Tue, 2 Apr 2019 10:07:45 +1100
Subject: rhashtable: allow rht_bucket_var to return NULL.

Rather than returning a pointer to a static nulls, rht_bucket_var()
now returns NULL if the bucket doesn't exist.
This will make the next patch, which stores a bitlock in the
bucket pointer, somewhat cleaner.

This change involves introducing __rht_bucket_nested() which is
like rht_bucket_nested(), but doesn't provide the static nulls,
and changing rht_bucket_nested() to call this and possible
provide a static nulls - as is still needed for the non-var case.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 6c4f5c8e9baa..b28fdd560ea9 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -237,8 +237,10 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
 		goto out;
 
 	err = -ENOENT;
+	if (!pprev)
+		goto out;
 
-	rht_for_each(entry, old_tbl, old_hash) {
+	rht_for_each_from(entry, *pprev, old_tbl, old_hash) {
 		err = 0;
 		next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
 
@@ -496,6 +498,8 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 
 	elasticity = RHT_ELASTICITY;
 	pprev = rht_bucket_var(tbl, hash);
+	if (!pprev)
+		return ERR_PTR(-ENOENT);
 	rht_for_each_from(head, *pprev, tbl, hash) {
 		struct rhlist_head *list;
 		struct rhlist_head *plist;
@@ -1161,11 +1165,10 @@ void rhashtable_destroy(struct rhashtable *ht)
 }
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
 
-struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
-					    unsigned int hash)
+struct rhash_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl,
+					      unsigned int hash)
 {
 	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
-	static struct rhash_head __rcu *rhnull;
 	unsigned int index = hash & ((1 << tbl->nest) - 1);
 	unsigned int size = tbl->size >> tbl->nest;
 	unsigned int subhash = hash;
@@ -1183,15 +1186,23 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
 		subhash >>= shift;
 	}
 
-	if (!ntbl) {
-		if (!rhnull)
-			INIT_RHT_NULLS_HEAD(rhnull);
-		return &rhnull;
-	}
+	if (!ntbl)
+		return NULL;
 
 	return &ntbl[subhash].bucket;
 
 }
+EXPORT_SYMBOL_GPL(__rht_bucket_nested);
+
+struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
+					    unsigned int hash)
+{
+	static struct rhash_head __rcu *rhnull;
+
+	if (!rhnull)
+		INIT_RHT_NULLS_HEAD(rhnull);
+	return __rht_bucket_nested(tbl, hash) ?: &rhnull;
+}
 EXPORT_SYMBOL_GPL(rht_bucket_nested);
 
 struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
-- 
cgit 


From 8f0db018006a421956965e1149234c4e8db718ee Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Tue, 2 Apr 2019 10:07:45 +1100
Subject: rhashtable: use bit_spin_locks to protect hash bucket.

This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the
bucket pointer to lock the hash chain for that bucket.

The benefits of a bit spin_lock are:
 - no need to allocate a separate array of locks.
 - no need to have a configuration option to guide the
   choice of the size of this array
 - locking cost is often a single test-and-set in a cache line
   that will have to be loaded anyway.  When inserting at, or removing
   from, the head of the chain, the unlock is free - writing the new
   address in the bucket head implicitly clears the lock bit.
   For __rhashtable_insert_fast() we ensure this always happens
   when adding a new key.
 - even when lockings costs 2 updates (lock and unlock), they are
   in a cacheline that needs to be read anyway.

The cost of using a bit spin_lock is a little bit of code complexity,
which I think is quite manageable.

Bit spin_locks are sometimes inappropriate because they are not fair -
if multiple CPUs repeatedly contend of the same lock, one CPU can
easily be starved.  This is not a credible situation with rhashtable.
Multiple CPUs may want to repeatedly add or remove objects, but they
will typically do so at different buckets, so they will attempt to
acquire different locks.

As we have more bit-locks than we previously had spinlocks (by at
least a factor of two) we can expect slightly less contention to
go with the slightly better cache behavior and reduced memory
consumption.

To enhance type checking, a new struct is introduced to represent the
  pointer plus lock-bit
that is stored in the bucket-table.  This is "struct rhash_lock_head"
and is empty.  A pointer to this needs to be cast to either an
unsigned lock, or a "struct rhash_head *" to be useful.
Variables of this type are most often called "bkt".

Previously "pprev" would sometimes point to a bucket, and sometimes a
->next pointer in an rhash_head.  As these are now different types,
pprev is NULL when it would have pointed to the bucket. In that case,
'blk' is used, together with correct locking protocol.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c      | 141 +++++++++++++++++++++++++-------------------------
 lib/test_rhashtable.c |   2 +-
 2 files changed, 71 insertions(+), 72 deletions(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index b28fdd560ea9..c5d0974467ee 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -31,11 +31,10 @@
 
 #define HASH_DEFAULT_SIZE	64UL
 #define HASH_MIN_SIZE		4U
-#define BUCKET_LOCKS_PER_CPU	32UL
 
 union nested_table {
 	union nested_table __rcu *table;
-	struct rhash_head __rcu *bucket;
+	struct rhash_lock_head __rcu *bucket;
 };
 
 static u32 head_hashfn(struct rhashtable *ht,
@@ -56,9 +55,11 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
 
 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
 {
-	spinlock_t *lock = rht_bucket_lock(tbl, hash);
-
-	return (debug_locks) ? lockdep_is_held(lock) : 1;
+	if (!debug_locks)
+		return 1;
+	if (unlikely(tbl->nest))
+		return 1;
+	return bit_spin_is_locked(1, (unsigned long *)&tbl->buckets[hash]);
 }
 EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
 #else
@@ -104,7 +105,6 @@ static void bucket_table_free(const struct bucket_table *tbl)
 	if (tbl->nest)
 		nested_bucket_table_free(tbl);
 
-	free_bucket_spinlocks(tbl->locks);
 	kvfree(tbl);
 }
 
@@ -171,7 +171,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 					       gfp_t gfp)
 {
 	struct bucket_table *tbl = NULL;
-	size_t size, max_locks;
+	size_t size;
 	int i;
 
 	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
@@ -189,16 +189,6 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 
 	tbl->size = size;
 
-	max_locks = size >> 1;
-	if (tbl->nest)
-		max_locks = min_t(size_t, max_locks, 1U << tbl->nest);
-
-	if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks,
-				   ht->p.locks_mul, gfp) < 0) {
-		bucket_table_free(tbl);
-		return NULL;
-	}
-
 	rcu_head_init(&tbl->rcu);
 	INIT_LIST_HEAD(&tbl->walkers);
 
@@ -223,24 +213,23 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
 	return new_tbl;
 }
 
-static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
+static int rhashtable_rehash_one(struct rhashtable *ht,
+				 struct rhash_lock_head __rcu **bkt,
+				 unsigned int old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
-	struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
 	int err = -EAGAIN;
 	struct rhash_head *head, *next, *entry;
-	spinlock_t *new_bucket_lock;
+	struct rhash_head **pprev = NULL;
 	unsigned int new_hash;
 
 	if (new_tbl->nest)
 		goto out;
 
 	err = -ENOENT;
-	if (!pprev)
-		goto out;
 
-	rht_for_each_from(entry, *pprev, old_tbl, old_hash) {
+	rht_for_each_from(entry, rht_ptr(*bkt), old_tbl, old_hash) {
 		err = 0;
 		next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
 
@@ -255,18 +244,20 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
 
 	new_hash = head_hashfn(ht, new_tbl, entry);
 
-	new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
+	rht_lock(&new_tbl->buckets[new_hash]);
 
-	spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
-	head = rht_dereference_bucket(new_tbl->buckets[new_hash],
-				      new_tbl, new_hash);
+	head = rht_ptr(rht_dereference_bucket(new_tbl->buckets[new_hash],
+					      new_tbl, new_hash));
 
 	RCU_INIT_POINTER(entry->next, head);
 
-	rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
-	spin_unlock(new_bucket_lock);
+	rht_assign_unlock(&new_tbl->buckets[new_hash], entry);
 
-	rcu_assign_pointer(*pprev, next);
+	if (pprev)
+		rcu_assign_pointer(*pprev, next);
+	else
+		/* Need to preserved the bit lock. */
+		rcu_assign_pointer(*bkt, rht_ptr_locked(next));
 
 out:
 	return err;
@@ -276,19 +267,19 @@ static int rhashtable_rehash_chain(struct rhashtable *ht,
 				    unsigned int old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
-	spinlock_t *old_bucket_lock;
+	struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
 	int err;
 
-	old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
+	if (!bkt)
+		return 0;
+	rht_lock(bkt);
 
-	spin_lock_bh(old_bucket_lock);
-	while (!(err = rhashtable_rehash_one(ht, old_hash)))
+	while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
 		;
 
 	if (err == -ENOENT)
 		err = 0;
-
-	spin_unlock_bh(old_bucket_lock);
+	rht_unlock(bkt);
 
 	return err;
 }
@@ -485,6 +476,7 @@ fail:
 }
 
 static void *rhashtable_lookup_one(struct rhashtable *ht,
+				   struct rhash_lock_head __rcu **bkt,
 				   struct bucket_table *tbl, unsigned int hash,
 				   const void *key, struct rhash_head *obj)
 {
@@ -492,15 +484,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 		.ht = ht,
 		.key = key,
 	};
-	struct rhash_head __rcu **pprev;
+	struct rhash_head **pprev = NULL;
 	struct rhash_head *head;
 	int elasticity;
 
 	elasticity = RHT_ELASTICITY;
-	pprev = rht_bucket_var(tbl, hash);
-	if (!pprev)
-		return ERR_PTR(-ENOENT);
-	rht_for_each_from(head, *pprev, tbl, hash) {
+	rht_for_each_from(head, rht_ptr(*bkt), tbl, hash) {
 		struct rhlist_head *list;
 		struct rhlist_head *plist;
 
@@ -522,7 +511,11 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 		RCU_INIT_POINTER(list->next, plist);
 		head = rht_dereference_bucket(head->next, tbl, hash);
 		RCU_INIT_POINTER(list->rhead.next, head);
-		rcu_assign_pointer(*pprev, obj);
+		if (pprev)
+			rcu_assign_pointer(*pprev, obj);
+		else
+			/* Need to preserve the bit lock */
+			rcu_assign_pointer(*bkt, rht_ptr_locked(obj));
 
 		return NULL;
 	}
@@ -534,12 +527,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 }
 
 static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
+						  struct rhash_lock_head __rcu **bkt,
 						  struct bucket_table *tbl,
 						  unsigned int hash,
 						  struct rhash_head *obj,
 						  void *data)
 {
-	struct rhash_head __rcu **pprev;
 	struct bucket_table *new_tbl;
 	struct rhash_head *head;
 
@@ -562,11 +555,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 	if (unlikely(rht_grow_above_100(ht, tbl)))
 		return ERR_PTR(-EAGAIN);
 
-	pprev = rht_bucket_insert(ht, tbl, hash);
-	if (!pprev)
-		return ERR_PTR(-ENOMEM);
-
-	head = rht_dereference_bucket(*pprev, tbl, hash);
+	head = rht_ptr(rht_dereference_bucket(*bkt, tbl, hash));
 
 	RCU_INIT_POINTER(obj->next, head);
 	if (ht->rhlist) {
@@ -576,7 +565,10 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 		RCU_INIT_POINTER(list->next, NULL);
 	}
 
-	rcu_assign_pointer(*pprev, obj);
+	/* bkt is always the head of the list, so it holds
+	 * the lock, which we need to preserve
+	 */
+	rcu_assign_pointer(*bkt, rht_ptr_locked(obj));
 
 	atomic_inc(&ht->nelems);
 	if (rht_grow_above_75(ht, tbl))
@@ -590,6 +582,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
 {
 	struct bucket_table *new_tbl;
 	struct bucket_table *tbl;
+	struct rhash_lock_head __rcu **bkt;
 	unsigned int hash;
 	void *data;
 
@@ -598,14 +591,25 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
 	do {
 		tbl = new_tbl;
 		hash = rht_head_hashfn(ht, tbl, obj, ht->p);
-		spin_lock_bh(rht_bucket_lock(tbl, hash));
-
-		data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
-		new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
-		if (PTR_ERR(new_tbl) != -EEXIST)
-			data = ERR_CAST(new_tbl);
-
-		spin_unlock_bh(rht_bucket_lock(tbl, hash));
+		if (rcu_access_pointer(tbl->future_tbl))
+			/* Failure is OK */
+			bkt = rht_bucket_var(tbl, hash);
+		else
+			bkt = rht_bucket_insert(ht, tbl, hash);
+		if (bkt == NULL) {
+			new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+			data = ERR_PTR(-EAGAIN);
+		} else {
+			rht_lock(bkt);
+			data = rhashtable_lookup_one(ht, bkt, tbl,
+						     hash, key, obj);
+			new_tbl = rhashtable_insert_one(ht, bkt, tbl,
+							hash, obj, data);
+			if (PTR_ERR(new_tbl) != -EEXIST)
+				data = ERR_CAST(new_tbl);
+
+			rht_unlock(bkt);
+		}
 	} while (!IS_ERR_OR_NULL(new_tbl));
 
 	if (PTR_ERR(data) == -EAGAIN)
@@ -1032,11 +1036,6 @@ int rhashtable_init(struct rhashtable *ht,
 
 	size = rounded_hashtable_size(&ht->p);
 
-	if (params->locks_mul)
-		ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
-	else
-		ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
-
 	ht->key_len = ht->p.key_len;
 	if (!params->hashfn) {
 		ht->p.hashfn = jhash;
@@ -1138,7 +1137,7 @@ restart:
 			struct rhash_head *pos, *next;
 
 			cond_resched();
-			for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
+			for (pos = rht_ptr(rht_dereference(*rht_bucket(tbl, i), ht)),
 			     next = !rht_is_a_nulls(pos) ?
 					rht_dereference(pos->next, ht) : NULL;
 			     !rht_is_a_nulls(pos);
@@ -1165,8 +1164,8 @@ void rhashtable_destroy(struct rhashtable *ht)
 }
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
 
-struct rhash_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl,
-					      unsigned int hash)
+struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl,
+						   unsigned int hash)
 {
 	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
 	unsigned int index = hash & ((1 << tbl->nest) - 1);
@@ -1194,10 +1193,10 @@ struct rhash_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl,
 }
 EXPORT_SYMBOL_GPL(__rht_bucket_nested);
 
-struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
-					    unsigned int hash)
+struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
+						 unsigned int hash)
 {
-	static struct rhash_head __rcu *rhnull;
+	static struct rhash_lock_head __rcu *rhnull;
 
 	if (!rhnull)
 		INIT_RHT_NULLS_HEAD(rhnull);
@@ -1205,9 +1204,9 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
 }
 EXPORT_SYMBOL_GPL(rht_bucket_nested);
 
-struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
-						   struct bucket_table *tbl,
-						   unsigned int hash)
+struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
+							struct bucket_table *tbl,
+							unsigned int hash)
 {
 	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
 	unsigned int index = hash & ((1 << tbl->nest) - 1);
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 3bd2e91bfc29..02592c2a249c 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -500,7 +500,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
 		struct rhash_head *pos, *next;
 		struct test_obj_rhl *p;
 
-		pos = rht_dereference(tbl->buckets[i], ht);
+		pos = rht_ptr(rht_dereference(tbl->buckets[i], ht));
 		next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL;
 
 		if (!rht_is_a_nulls(pos)) {
-- 
cgit 


From 149212f07856b25a9d342bfd6d736519b2ef66dc Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Tue, 2 Apr 2019 10:07:45 +1100
Subject: rhashtable: add lockdep tracking to bucket bit-spin-locks.

Native bit_spin_locks are not tracked by lockdep.

The bit_spin_locks used for rhashtable buckets are local
to the rhashtable implementation, so there is little opportunity
for the sort of misuse that lockdep might detect.
However locks are held while a hash function or compare
function is called, and if one of these took a lock,
a misbehaviour is possible.

As it is quite easy to add lockdep support this unlikely
possibility seems to be enough justification.

So create a lockdep class for bucket bit_spin_lock and attach
through a lockdep_map in each bucket_table.

Without the 'nested' annotation in rhashtable_rehash_one(), lockdep
correctly reports a possible problem as this lock is taken
while another bucket lock (in another table) is held.  This
confirms that the added support works.
With the correct nested annotation in place, lockdep reports
no problems.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index c5d0974467ee..a8583af43b59 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -173,6 +173,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 	struct bucket_table *tbl = NULL;
 	size_t size;
 	int i;
+	static struct lock_class_key __key;
 
 	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
 	tbl = kvzalloc(size, gfp);
@@ -187,6 +188,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 	if (tbl == NULL)
 		return NULL;
 
+	lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0);
+
 	tbl->size = size;
 
 	rcu_head_init(&tbl->rcu);
@@ -244,14 +247,14 @@ static int rhashtable_rehash_one(struct rhashtable *ht,
 
 	new_hash = head_hashfn(ht, new_tbl, entry);
 
-	rht_lock(&new_tbl->buckets[new_hash]);
+	rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING);
 
 	head = rht_ptr(rht_dereference_bucket(new_tbl->buckets[new_hash],
 					      new_tbl, new_hash));
 
 	RCU_INIT_POINTER(entry->next, head);
 
-	rht_assign_unlock(&new_tbl->buckets[new_hash], entry);
+	rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry);
 
 	if (pprev)
 		rcu_assign_pointer(*pprev, next);
@@ -272,14 +275,14 @@ static int rhashtable_rehash_chain(struct rhashtable *ht,
 
 	if (!bkt)
 		return 0;
-	rht_lock(bkt);
+	rht_lock(old_tbl, bkt);
 
 	while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
 		;
 
 	if (err == -ENOENT)
 		err = 0;
-	rht_unlock(bkt);
+	rht_unlock(old_tbl, bkt);
 
 	return err;
 }
@@ -600,7 +603,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
 			new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 			data = ERR_PTR(-EAGAIN);
 		} else {
-			rht_lock(bkt);
+			rht_lock(tbl, bkt);
 			data = rhashtable_lookup_one(ht, bkt, tbl,
 						     hash, key, obj);
 			new_tbl = rhashtable_insert_one(ht, bkt, tbl,
@@ -608,7 +611,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
 			if (PTR_ERR(new_tbl) != -EEXIST)
 				data = ERR_CAST(new_tbl);
 
-			rht_unlock(bkt);
+			rht_unlock(tbl, bkt);
 		}
 	} while (!IS_ERR_OR_NULL(new_tbl));
 
-- 
cgit 


From 6989808ee7636188cc091224bc76ab8e1696088a Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Fri, 5 Apr 2019 12:58:54 +1100
Subject: lib/test_printf: Add empty module_exit function

Currently the test_printf module does not have an exit function, this
prevents the module from being unloaded.  If we cannot unload the
module we cannot run the tests a second time.

Add an empty exit function.

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Shuah Khan <shuah@kernel.org>
---
 lib/test_printf.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'lib')

diff --git a/lib/test_printf.c b/lib/test_printf.c
index 659b6cc0d483..601e8519319a 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -615,5 +615,11 @@ test_printf_init(void)
 
 module_init(test_printf_init);
 
+static void __exit test_printf_exit(void)
+{
+}
+
+module_exit(test_printf_exit);
+
 MODULE_AUTHOR("Rasmus Villemoes <linux@rasmusvillemoes.dk>");
 MODULE_LICENSE("GPL");
-- 
cgit 


From 6b1a4d5b1a26ae830d50e08d7b3ca0e8b3e6b453 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Fri, 5 Apr 2019 12:58:57 +1100
Subject: lib: Use new kselftest header

We just added a new C header file for use with test modules that are
intended to be run with kselftest.  We can reduce code duplication by
using this header.

Use new kselftest header to reduce code duplication in test_printf and
test_bitmap test modules.

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Shuah Khan <shuah@kernel.org>
---
 lib/test_bitmap.c | 20 ++++----------------
 lib/test_printf.c | 23 +++++------------------
 2 files changed, 9 insertions(+), 34 deletions(-)

(limited to 'lib')

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 6cd7d0740005..792d90608052 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -12,6 +12,8 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 
+#include "../tools/testing/selftests/kselftest_module.h"
+
 static unsigned total_tests __initdata;
 static unsigned failed_tests __initdata;
 
@@ -361,7 +363,7 @@ static void noinline __init test_mem_optimisations(void)
 	}
 }
 
-static int __init test_bitmap_init(void)
+static void __init selftest(void)
 {
 	test_zero_clear();
 	test_fill_set();
@@ -369,22 +371,8 @@ static int __init test_bitmap_init(void)
 	test_bitmap_arr32();
 	test_bitmap_parselist();
 	test_mem_optimisations();
-
-	if (failed_tests == 0)
-		pr_info("all %u tests passed\n", total_tests);
-	else
-		pr_warn("failed %u out of %u tests\n",
-			failed_tests, total_tests);
-
-	return failed_tests ? -EINVAL : 0;
 }
 
-static void __exit test_bitmap_cleanup(void)
-{
-}
-
-module_init(test_bitmap_init);
-module_exit(test_bitmap_cleanup);
-
+KSTM_MODULE_LOADERS(test_bitmap);
 MODULE_AUTHOR("david decotigny <david.decotigny@googlers.com>");
 MODULE_LICENSE("GPL");
diff --git a/lib/test_printf.c b/lib/test_printf.c
index 601e8519319a..f4fcc1c43739 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -21,6 +21,8 @@
 #include <linux/gfp.h>
 #include <linux/mm.h>
 
+#include "../tools/testing/selftests/kselftest_module.h"
+
 #define BUF_SIZE 256
 #define PAD_SIZE 16
 #define FILL_CHAR '$'
@@ -590,12 +592,11 @@ test_pointer(void)
 	flags();
 }
 
-static int __init
-test_printf_init(void)
+static void __init selftest(void)
 {
 	alloced_buffer = kmalloc(BUF_SIZE + 2*PAD_SIZE, GFP_KERNEL);
 	if (!alloced_buffer)
-		return -ENOMEM;
+		return;
 	test_buffer = alloced_buffer + PAD_SIZE;
 
 	test_basic();
@@ -604,22 +605,8 @@ test_printf_init(void)
 	test_pointer();
 
 	kfree(alloced_buffer);
-
-	if (failed_tests == 0)
-		pr_info("all %u tests passed\n", total_tests);
-	else
-		pr_warn("failed %u out of %u tests\n", failed_tests, total_tests);
-
-	return failed_tests ? -EINVAL : 0;
 }
 
-module_init(test_printf_init);
-
-static void __exit test_printf_exit(void)
-{
-}
-
-module_exit(test_printf_exit);
-
+KSTM_MODULE_LOADERS(test_printf);
 MODULE_AUTHOR("Rasmus Villemoes <linux@rasmusvillemoes.dk>");
 MODULE_LICENSE("GPL");
-- 
cgit 


From 458a3bf82df4fe1f951d0f52b1e0c1e9d5a88a3b Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Fri, 5 Apr 2019 12:58:58 +1100
Subject: lib/string: Add strscpy_pad() function

We have a function to copy strings safely and we have a function to copy
strings and zero the tail of the destination (if source string is
shorter than destination buffer) but we do not have a function to do
both at once.  This means developers must write this themselves if they
desire this functionality.  This is a chore, and also leaves us open to
off by one errors unnecessarily.

Add a function that calls strscpy() then memset()s the tail to zero if
the source string is shorter than the destination buffer.

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Shuah Khan <shuah@kernel.org>
---
 lib/string.c | 47 ++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 40 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/lib/string.c b/lib/string.c
index 3ab861c1a857..6016eb3ac73d 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -159,11 +159,9 @@ EXPORT_SYMBOL(strlcpy);
  * @src: Where to copy the string from
  * @count: Size of destination buffer
  *
- * Copy the string, or as much of it as fits, into the dest buffer.
- * The routine returns the number of characters copied (not including
- * the trailing NUL) or -E2BIG if the destination buffer wasn't big enough.
- * The behavior is undefined if the string buffers overlap.
- * The destination buffer is always NUL terminated, unless it's zero-sized.
+ * Copy the string, or as much of it as fits, into the dest buffer.  The
+ * behavior is undefined if the string buffers overlap.  The destination
+ * buffer is always NUL terminated, unless it's zero-sized.
  *
  * Preferred to strlcpy() since the API doesn't require reading memory
  * from the src string beyond the specified "count" bytes, and since
@@ -173,8 +171,10 @@ EXPORT_SYMBOL(strlcpy);
  *
  * Preferred to strncpy() since it always returns a valid string, and
  * doesn't unnecessarily force the tail of the destination buffer to be
- * zeroed.  If the zeroing is desired, it's likely cleaner to use strscpy()
- * with an overflow test, then just memset() the tail of the dest buffer.
+ * zeroed.  If zeroing is desired please use strscpy_pad().
+ *
+ * Return: The number of characters copied (not including the trailing
+ *         %NUL) or -E2BIG if the destination buffer wasn't big enough.
  */
 ssize_t strscpy(char *dest, const char *src, size_t count)
 {
@@ -237,6 +237,39 @@ ssize_t strscpy(char *dest, const char *src, size_t count)
 EXPORT_SYMBOL(strscpy);
 #endif
 
+/**
+ * strscpy_pad() - Copy a C-string into a sized buffer
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @count: Size of destination buffer
+ *
+ * Copy the string, or as much of it as fits, into the dest buffer.  The
+ * behavior is undefined if the string buffers overlap.  The destination
+ * buffer is always %NUL terminated, unless it's zero-sized.
+ *
+ * If the source string is shorter than the destination buffer, zeros
+ * the tail of the destination buffer.
+ *
+ * For full explanation of why you may want to consider using the
+ * 'strscpy' functions please see the function docstring for strscpy().
+ *
+ * Return: The number of characters copied (not including the trailing
+ *         %NUL) or -E2BIG if the destination buffer wasn't big enough.
+ */
+ssize_t strscpy_pad(char *dest, const char *src, size_t count)
+{
+	ssize_t written;
+
+	written = strscpy(dest, src, count);
+	if (written < 0 || written == count - 1)
+		return written;
+
+	memset(dest + written + 1, 0, count - written - 1);
+
+	return written;
+}
+EXPORT_SYMBOL(strscpy_pad);
+
 #ifndef __HAVE_ARCH_STRCAT
 /**
  * strcat - Append one %NUL-terminated string to another
-- 
cgit 


From 0b0600c8c97abe070724140802f3b8c8aee93170 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Fri, 5 Apr 2019 12:58:59 +1100
Subject: lib: Add test module for strscpy_pad

Add a test module for the new strscpy_pad() function.  Tie it into the
kselftest infrastructure for lib/ tests.

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Shuah Khan <shuah@kernel.org>
---
 lib/Kconfig.debug  |   3 ++
 lib/Makefile       |   1 +
 lib/test_strscpy.c | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 154 insertions(+)
 create mode 100644 lib/test_strscpy.c

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0d9e81779e37..4b644ad399dd 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1769,6 +1769,9 @@ config TEST_HEXDUMP
 config TEST_STRING_HELPERS
 	tristate "Test functions located in the string_helpers module at runtime"
 
+config TEST_STRSCPY
+	tristate "Test strscpy*() family of functions at runtime"
+
 config TEST_KSTRTOX
 	tristate "Test kstrto*() family of functions at runtime"
 
diff --git a/lib/Makefile b/lib/Makefile
index 3b08673e8881..b4e08d6234ba 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
 obj-$(CONFIG_TEST_PRINTF) += test_printf.o
 obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
+obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o
 obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o
 obj-$(CONFIG_TEST_UUID) += test_uuid.o
 obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
diff --git a/lib/test_strscpy.c b/lib/test_strscpy.c
new file mode 100644
index 000000000000..a827f94601f5
--- /dev/null
+++ b/lib/test_strscpy.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/string.h>
+
+#include "../tools/testing/selftests/kselftest_module.h"
+
+/*
+ * Kernel module for testing 'strscpy' family of functions.
+ */
+
+KSTM_MODULE_GLOBALS();
+
+/*
+ * tc() - Run a specific test case.
+ * @src: Source string, argument to strscpy_pad()
+ * @count: Size of destination buffer, argument to strscpy_pad()
+ * @expected: Expected return value from call to strscpy_pad()
+ * @terminator: 1 if there should be a terminating null byte 0 otherwise.
+ * @chars: Number of characters from the src string expected to be
+ *         written to the dst buffer.
+ * @pad: Number of pad characters expected (in the tail of dst buffer).
+ *       (@pad does not include the null terminator byte.)
+ *
+ * Calls strscpy_pad() and verifies the return value and state of the
+ * destination buffer after the call returns.
+ */
+static int __init tc(char *src, int count, int expected,
+		     int chars, int terminator, int pad)
+{
+	int nr_bytes_poison;
+	int max_expected;
+	int max_count;
+	int written;
+	char buf[6];
+	int index, i;
+	const char POISON = 'z';
+
+	total_tests++;
+
+	if (!src) {
+		pr_err("null source string not supported\n");
+		return -1;
+	}
+
+	memset(buf, POISON, sizeof(buf));
+	/* Future proofing test suite, validate args */
+	max_count = sizeof(buf) - 2; /* Space for null and to verify overflow */
+	max_expected = count - 1;    /* Space for the null */
+	if (count > max_count) {
+		pr_err("count (%d) is too big (%d) ... aborting", count, max_count);
+		return -1;
+	}
+	if (expected > max_expected) {
+		pr_warn("expected (%d) is bigger than can possibly be returned (%d)",
+			expected, max_expected);
+	}
+
+	written = strscpy_pad(buf, src, count);
+	if ((written) != (expected)) {
+		pr_err("%d != %d (written, expected)\n", written, expected);
+		goto fail;
+	}
+
+	if (count && written == -E2BIG) {
+		if (strncmp(buf, src, count - 1) != 0) {
+			pr_err("buffer state invalid for -E2BIG\n");
+			goto fail;
+		}
+		if (buf[count - 1] != '\0') {
+			pr_err("too big string is not null terminated correctly\n");
+			goto fail;
+		}
+	}
+
+	for (i = 0; i < chars; i++) {
+		if (buf[i] != src[i]) {
+			pr_err("buf[i]==%c != src[i]==%c\n", buf[i], src[i]);
+			goto fail;
+		}
+	}
+
+	if (terminator) {
+		if (buf[count - 1] != '\0') {
+			pr_err("string is not null terminated correctly\n");
+			goto fail;
+		}
+	}
+
+	for (i = 0; i < pad; i++) {
+		index = chars + terminator + i;
+		if (buf[index] != '\0') {
+			pr_err("padding missing at index: %d\n", i);
+			goto fail;
+		}
+	}
+
+	nr_bytes_poison = sizeof(buf) - chars - terminator - pad;
+	for (i = 0; i < nr_bytes_poison; i++) {
+		index = sizeof(buf) - 1 - i; /* Check from the end back */
+		if (buf[index] != POISON) {
+			pr_err("poison value missing at index: %d\n", i);
+			goto fail;
+		}
+	}
+
+	return 0;
+fail:
+	failed_tests++;
+	return -1;
+}
+
+static void __init selftest(void)
+{
+	/*
+	 * tc() uses a destination buffer of size 6 and needs at
+	 * least 2 characters spare (one for null and one to check for
+	 * overflow).  This means we should only call tc() with
+	 * strings up to a maximum of 4 characters long and 'count'
+	 * should not exceed 4.  To test with longer strings increase
+	 * the buffer size in tc().
+	 */
+
+	/* tc(src, count, expected, chars, terminator, pad) */
+	KSTM_CHECK_ZERO(tc("a", 0, -E2BIG, 0, 0, 0));
+	KSTM_CHECK_ZERO(tc("", 0, -E2BIG, 0, 0, 0));
+
+	KSTM_CHECK_ZERO(tc("a", 1, -E2BIG, 0, 1, 0));
+	KSTM_CHECK_ZERO(tc("", 1, 0, 0, 1, 0));
+
+	KSTM_CHECK_ZERO(tc("ab", 2, -E2BIG, 1, 1, 0));
+	KSTM_CHECK_ZERO(tc("a", 2, 1, 1, 1, 0));
+	KSTM_CHECK_ZERO(tc("", 2, 0, 0, 1, 1));
+
+	KSTM_CHECK_ZERO(tc("abc", 3, -E2BIG, 2, 1, 0));
+	KSTM_CHECK_ZERO(tc("ab", 3, 2, 2, 1, 0));
+	KSTM_CHECK_ZERO(tc("a", 3, 1, 1, 1, 1));
+	KSTM_CHECK_ZERO(tc("", 3, 0, 0, 1, 2));
+
+	KSTM_CHECK_ZERO(tc("abcd", 4, -E2BIG, 3, 1, 0));
+	KSTM_CHECK_ZERO(tc("abc", 4, 3, 3, 1, 0));
+	KSTM_CHECK_ZERO(tc("ab", 4, 2, 2, 1, 1));
+	KSTM_CHECK_ZERO(tc("a", 4, 1, 1, 1, 2));
+	KSTM_CHECK_ZERO(tc("", 4, 0, 0, 1, 3));
+}
+
+KSTM_MODULE_LOADERS(test_strscpy);
+MODULE_AUTHOR("Tobin C. Harding <tobin@kernel.org>");
+MODULE_LICENSE("GPL");
-- 
cgit 


From 224b44d46ffe9ad7785cc45c7a18934d492e66ec Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Thu, 24 Jan 2019 21:37:43 -0600
Subject: lib: zstd: Mark expected switch fall-throughs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation to enabling -Wimplicit-fallthrough, mark switch
cases where we are expecting to fall through.

This patch fixes the following warnings:

lib/zstd/bitstream.h:261:30: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/zstd/bitstream.h:262:30: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/zstd/bitstream.h:263:30: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/zstd/bitstream.h:264:30: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/zstd/bitstream.h:265:30: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/zstd/compress.c:3183:16: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/zstd/decompress.c:1770:18: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/zstd/decompress.c:2376:15: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/zstd/decompress.c:2404:15: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/zstd/decompress.c:2435:16: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/zstd/huf_compress.c: In function ‘HUF_compress1X_usingCTable’:
lib/zstd/huf_compress.c:535:5: warning: this statement may fall through [-Wimplicit-fallthrough=]
  if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 4 + 7) \
     ^
lib/zstd/huf_compress.c:558:54: note: in expansion of macro ‘HUF_FLUSHBITS_2’
  case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC);
                                                      ^~~~~~~~~~~~~~~
lib/zstd/huf_compress.c:559:2: note: here
  case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC);
  ^~~~
lib/zstd/huf_compress.c:531:5: warning: this statement may fall through [-Wimplicit-fallthrough=]
  if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 2 + 7) \
     ^
lib/zstd/huf_compress.c:559:54: note: in expansion of macro ‘HUF_FLUSHBITS_1’
  case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC);
                                                      ^~~~~~~~~~~~~~~
lib/zstd/huf_compress.c:560:2: note: here
  case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC);
  ^~~~
  AR      lib/zstd//built-in.a

Warning level 3 was used: -Wimplicit-fallthrough=3

This patch is part of the ongoing efforts to enabling -Wimplicit-fallthrough.

Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
---
 lib/zstd/bitstream.h    | 5 +++++
 lib/zstd/compress.c     | 1 +
 lib/zstd/decompress.c   | 5 ++++-
 lib/zstd/huf_compress.c | 2 ++
 4 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/zstd/bitstream.h b/lib/zstd/bitstream.h
index a826b99e1d63..3a49784d5c61 100644
--- a/lib/zstd/bitstream.h
+++ b/lib/zstd/bitstream.h
@@ -259,10 +259,15 @@ ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, s
 		bitD->bitContainer = *(const BYTE *)(bitD->start);
 		switch (srcSize) {
 		case 7: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[6]) << (sizeof(bitD->bitContainer) * 8 - 16);
+			/* fall through */
 		case 6: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[5]) << (sizeof(bitD->bitContainer) * 8 - 24);
+			/* fall through */
 		case 5: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[4]) << (sizeof(bitD->bitContainer) * 8 - 32);
+			/* fall through */
 		case 4: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[3]) << 24;
+			/* fall through */
 		case 3: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[2]) << 16;
+			/* fall through */
 		case 2: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[1]) << 8;
 		default:;
 		}
diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c
index f9166cf4f7a9..5e0b67003e55 100644
--- a/lib/zstd/compress.c
+++ b/lib/zstd/compress.c
@@ -3182,6 +3182,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream *zcs, void *dst, size_t *
 				zcs->outBuffFlushedSize = 0;
 				zcs->stage = zcss_flush; /* pass-through to flush stage */
 			}
+			/* fall through */
 
 		case zcss_flush: {
 			size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c
index b17846725ca0..269ee9a796c1 100644
--- a/lib/zstd/decompress.c
+++ b/lib/zstd/decompress.c
@@ -1768,6 +1768,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, c
 			return 0;
 		}
 		dctx->expected = 0; /* not necessary to copy more */
+		/* fall through */
 
 	case ZSTDds_decodeFrameHeader:
 		memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
@@ -2375,7 +2376,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB
 			}
 			zds->stage = zdss_read;
 		}
-		/* pass-through */
+		/* fall through */
 
 		case zdss_read: {
 			size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
@@ -2404,6 +2405,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB
 			zds->stage = zdss_load;
 			/* pass-through */
 		}
+		/* fall through */
 
 		case zdss_load: {
 			size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
@@ -2436,6 +2438,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB
 				/* pass-through */
 			}
 		}
+		/* fall through */
 
 		case zdss_flush: {
 			size_t const toFlushSize = zds->outEnd - zds->outStart;
diff --git a/lib/zstd/huf_compress.c b/lib/zstd/huf_compress.c
index 40055a7016e6..e727812d12aa 100644
--- a/lib/zstd/huf_compress.c
+++ b/lib/zstd/huf_compress.c
@@ -556,7 +556,9 @@ size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, si
 	n = srcSize & ~3; /* join to mod 4 */
 	switch (srcSize & 3) {
 	case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC);
+		/* fall through */
 	case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC);
+		/* fall through */
 	case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC);
 	case 0:
 	default:;
-- 
cgit 


From 8a05452ca460b05c985eadc7b5a4f040f124463e Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 25 Jan 2019 14:25:24 -0600
Subject: lib/cmdline.c: mark expected switch fall-throughs

In preparation to enabling -Wimplicit-fallthrough, mark switch
cases where we are expecting to fall through.

This patch fixes the following warnings:

lib/cmdline.c:137:7: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/cmdline.c:140:7: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/cmdline.c:143:7: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/cmdline.c:146:7: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/cmdline.c:149:7: warning: this statement may fall through [-Wimplicit-fallthrough=]

Warning level 3 was used: -Wimplicit-fallthrough=3

This patch is part of the ongoing efforts to enabling -Wimplicit-fallthrough.

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
---
 lib/cmdline.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'lib')

diff --git a/lib/cmdline.c b/lib/cmdline.c
index 171c19b6888e..dc59d6216318 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -135,18 +135,23 @@ unsigned long long memparse(const char *ptr, char **retptr)
 	case 'E':
 	case 'e':
 		ret <<= 10;
+		/* fall through */
 	case 'P':
 	case 'p':
 		ret <<= 10;
+		/* fall through */
 	case 'T':
 	case 't':
 		ret <<= 10;
+		/* fall through */
 	case 'G':
 	case 'g':
 		ret <<= 10;
+		/* fall through */
 	case 'M':
 	case 'm':
 		ret <<= 10;
+		/* fall through */
 	case 'K':
 	case 'k':
 		ret <<= 10;
-- 
cgit 


From afb33e40d54e365457a4fb5eada6df55cd11a9cf Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 25 Jan 2019 14:46:46 -0600
Subject: ASN.1: mark expected switch fall-through

In preparation to enabling -Wimplicit-fallthrough, mark switch
cases where we are expecting to fall through.

This patch fixes the following warnings:

lib/asn1_decoder.c:386:6: warning: this statement may fall through [-Wimplicit-fallthrough=]
lib/asn1_decoder.c:449:6: warning: this statement may fall through [-Wimplicit-fallthrough=]

Warning level 3 was used: -Wimplicit-fallthrough=3

This patch is part of the ongoing efforts to enabling
-Wimplicit-fallthrough.

Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
---
 lib/asn1_decoder.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'lib')

diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c
index dc14beae2c9a..8f3d207d2b00 100644
--- a/lib/asn1_decoder.c
+++ b/lib/asn1_decoder.c
@@ -385,6 +385,8 @@ next_op:
 	case ASN1_OP_END_SET_ACT:
 		if (unlikely(!(flags & FLAG_MATCHED)))
 			goto tag_mismatch;
+		/* fall through */
+
 	case ASN1_OP_END_SEQ:
 	case ASN1_OP_END_SET_OF:
 	case ASN1_OP_END_SEQ_OF:
@@ -450,6 +452,8 @@ next_op:
 			pc += asn1_op_lengths[op];
 			goto next_op;
 		}
+		/* fall through */
+
 	case ASN1_OP_ACT:
 		ret = actions[machine[pc + 1]](context, hdr, tag, data + tdp, len);
 		if (ret < 0)
-- 
cgit 


From d75f773c86a2b8b7278e2c33343b46a4024bc002 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Mon, 25 Mar 2019 21:32:28 +0200
Subject: treewide: Switch printk users from %pf and %pF to %ps and %pS,
 respectively

%pF and %pf are functionally equivalent to %pS and %ps conversion
specifiers. The former are deprecated, therefore switch the current users
to use the preferred variant.

The changes have been produced by the following command:

	git grep -l '%p[fF]' | grep -v '^\(tools\|Documentation\)/' | \
	while read i; do perl -i -pe 's/%pf/%ps/g; s/%pF/%pS/g;' $i; done

And verifying the result.

Link: http://lkml.kernel.org/r/20190325193229.23390-1-sakari.ailus@linux.intel.com
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: sparclinux@vger.kernel.org
Cc: linux-um@lists.infradead.org
Cc: xen-devel@lists.xenproject.org
Cc: linux-acpi@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: drbd-dev@lists.linbit.com
Cc: linux-block@vger.kernel.org
Cc: linux-mmc@vger.kernel.org
Cc: linux-nvdimm@lists.01.org
Cc: linux-pci@vger.kernel.org
Cc: linux-scsi@vger.kernel.org
Cc: linux-btrfs@vger.kernel.org
Cc: linux-f2fs-devel@lists.sourceforge.net
Cc: linux-mm@kvack.org
Cc: ceph-devel@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: David Sterba <dsterba@suse.com> (for btrfs)
Acked-by: Mike Rapoport <rppt@linux.ibm.com> (for mm/memblock.c)
Acked-by: Bjorn Helgaas <bhelgaas@google.com> (for drivers/pci)
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 lib/error-inject.c    | 2 +-
 lib/percpu-refcount.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/error-inject.c b/lib/error-inject.c
index c0d4600f4896..aa63751c916f 100644
--- a/lib/error-inject.c
+++ b/lib/error-inject.c
@@ -189,7 +189,7 @@ static int ei_seq_show(struct seq_file *m, void *v)
 {
 	struct ei_entry *ent = list_entry(v, struct ei_entry, list);
 
-	seq_printf(m, "%pf\t%s\n", (void *)ent->start_addr,
+	seq_printf(m, "%ps\t%s\n", (void *)ent->start_addr,
 		   error_type_string(ent->etype));
 	return 0;
 }
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 9877682e49c7..da54318d3b55 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -151,7 +151,7 @@ static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
 
 	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
-		  "percpu ref (%pf) <= 0 (%ld) after switching to atomic",
+		  "percpu ref (%ps) <= 0 (%ld) after switching to atomic",
 		  ref->release, atomic_long_read(&ref->count));
 
 	/* @ref is viewed as dead on all CPUs, send out switch confirmation */
@@ -333,7 +333,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 
 	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
-		  "%s called more than once on %pf!", __func__, ref->release);
+		  "%s called more than once on %ps!", __func__, ref->release);
 
 	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
 	__percpu_ref_switch_mode(ref, confirm_kill);
-- 
cgit 


From c252aa3e8ed3ac54060b1838f6a47f29799a133d Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Thu, 11 Apr 2019 18:43:06 -0500
Subject: rhashtable: use struct_size() in kvzalloc()

One of the more common cases of allocation size calculations is finding
the size of a structure that has a zero-sized array at the end, along with
memory for some number of elements for that array.  For example:

struct foo {
    int stuff;
    struct boo entry[];
};

size = sizeof(struct foo) + count * sizeof(struct boo);
instance = kvzalloc(size, GFP_KERNEL);

Instead of leaving these open-coded and prone to type mistakes, we can
now use the new struct_size() helper:

instance = kvzalloc(struct_size(instance, entry, count), GFP_KERNEL);

This code was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index a8583af43b59..9c84f5cef69c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -175,8 +175,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 	int i;
 	static struct lock_class_key __key;
 
-	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
-	tbl = kvzalloc(size, gfp);
+	tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
 
 	size = nbuckets;
 
-- 
cgit 


From e4edbe3c1f44c84f319149aeb998e7e36b3b897f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 12 Apr 2019 11:52:07 +1000
Subject: rhashtable: fix some __rcu annotation errors

With these annotations, the rhashtable now gets no
warnings when compiled with "C=1" for sparse checking.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 9c84f5cef69c..e387ceb00e86 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -223,7 +223,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht,
 	struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
 	int err = -EAGAIN;
 	struct rhash_head *head, *next, *entry;
-	struct rhash_head **pprev = NULL;
+	struct rhash_head __rcu **pprev = NULL;
 	unsigned int new_hash;
 
 	if (new_tbl->nest)
@@ -486,7 +486,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 		.ht = ht,
 		.key = key,
 	};
-	struct rhash_head **pprev = NULL;
+	struct rhash_head __rcu **pprev = NULL;
 	struct rhash_head *head;
 	int elasticity;
 
-- 
cgit 


From adc6a3ab192eb40fb9d8b093c87d9aa785af4513 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 12 Apr 2019 11:52:08 +1000
Subject: rhashtable: move dereference inside rht_ptr()

Rather than dereferencing a pointer to a bucket and then passing the
result to rht_ptr(), we now pass in the pointer and do the dereference
in rht_ptr().

This requires that we pass in the tbl and hash as well to support RCU
checks, and means that the various rht_for_each functions can expect a
pointer that can be dereferenced without further care.

There are two places where we dereference a bucket pointer
where there is no testable protection - in each case we know
that we much have exclusive access without having taken a lock.
The previous code used rht_dereference() to pretend that holding
the mutex provided protects, but holding the mutex never provides
protection for accessing buckets.

So instead introduce rht_ptr_exclusive() that can be used when
there is known to be exclusive access without holding any locks.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c      | 12 ++++++------
 lib/test_rhashtable.c |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index e387ceb00e86..237368ea98c5 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -231,7 +231,8 @@ static int rhashtable_rehash_one(struct rhashtable *ht,
 
 	err = -ENOENT;
 
-	rht_for_each_from(entry, rht_ptr(*bkt), old_tbl, old_hash) {
+	rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
+			  old_tbl, old_hash) {
 		err = 0;
 		next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
 
@@ -248,8 +249,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht,
 
 	rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING);
 
-	head = rht_ptr(rht_dereference_bucket(new_tbl->buckets[new_hash],
-					      new_tbl, new_hash));
+	head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
 
 	RCU_INIT_POINTER(entry->next, head);
 
@@ -491,7 +491,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 	int elasticity;
 
 	elasticity = RHT_ELASTICITY;
-	rht_for_each_from(head, rht_ptr(*bkt), tbl, hash) {
+	rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
 		struct rhlist_head *list;
 		struct rhlist_head *plist;
 
@@ -557,7 +557,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 	if (unlikely(rht_grow_above_100(ht, tbl)))
 		return ERR_PTR(-EAGAIN);
 
-	head = rht_ptr(rht_dereference_bucket(*bkt, tbl, hash));
+	head = rht_ptr(bkt, tbl, hash);
 
 	RCU_INIT_POINTER(obj->next, head);
 	if (ht->rhlist) {
@@ -1139,7 +1139,7 @@ restart:
 			struct rhash_head *pos, *next;
 
 			cond_resched();
-			for (pos = rht_ptr(rht_dereference(*rht_bucket(tbl, i), ht)),
+			for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
 			     next = !rht_is_a_nulls(pos) ?
 					rht_dereference(pos->next, ht) : NULL;
 			     !rht_is_a_nulls(pos);
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 02592c2a249c..084fe5a6ac57 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -500,7 +500,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
 		struct rhash_head *pos, *next;
 		struct test_obj_rhl *p;
 
-		pos = rht_ptr(rht_dereference(tbl->buckets[i], ht));
+		pos = rht_ptr_exclusive(tbl->buckets + i);
 		next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL;
 
 		if (!rht_is_a_nulls(pos)) {
-- 
cgit 


From f4712b46a529ca2da078c82d5d99d367c7ebf82b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 12 Apr 2019 11:52:08 +1000
Subject: rhashtable: replace rht_ptr_locked() with rht_assign_locked()

The only times rht_ptr_locked() is used, it is to store a new
value in a bucket-head.  This is the only time it makes sense
to use it too.  So replace it by a function which does the
whole task:  Sets the lock bit and assigns to a bucket head.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 237368ea98c5..ef5378efdef3 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -259,7 +259,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht,
 		rcu_assign_pointer(*pprev, next);
 	else
 		/* Need to preserved the bit lock. */
-		rcu_assign_pointer(*bkt, rht_ptr_locked(next));
+		rht_assign_locked(bkt, next);
 
 out:
 	return err;
@@ -517,7 +517,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 			rcu_assign_pointer(*pprev, obj);
 		else
 			/* Need to preserve the bit lock */
-			rcu_assign_pointer(*bkt, rht_ptr_locked(obj));
+			rht_assign_locked(bkt, obj);
 
 		return NULL;
 	}
@@ -570,7 +570,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 	/* bkt is always the head of the list, so it holds
 	 * the lock, which we need to preserve
 	 */
-	rcu_assign_pointer(*bkt, rht_ptr_locked(obj));
+	rht_assign_locked(bkt, obj);
 
 	atomic_inc(&ht->nelems);
 	if (rht_grow_above_75(ht, tbl))
-- 
cgit 


From ca0b709d1a07b1fe1fb356d8d58f220287f85672 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 12 Apr 2019 11:52:08 +1000
Subject: rhashtable: use BIT(0) for locking.

As reported by Guenter Roeck, the new bit-locking using
BIT(1) doesn't work on the m68k architecture.  m68k only requires
2-byte alignment for words and longwords, so there is only one
unused bit in pointers to structs - We current use two, one for the
NULLS marker at the end of the linked list, and one for the bit-lock
in the head of the list.

The two uses don't need to conflict as we never need the head of the
list to be a NULLS marker - the marker is only needed to check if an
object has moved to a different table, and the bucket head cannot
move.  The NULLS marker is only needed in a ->next pointer.

As we already have different types for the bucket head pointer (struct
rhash_lock_head) and the ->next pointers (struct rhash_head), it is
fairly easy to treat the lsb differently in each.

So: Initialize buckets heads to NULL, and use the lsb for locking.
When loading the pointer from the bucket head, if it is NULL (ignoring
the lock big), report as being the expected NULLS marker.
When storing a value into a bucket head, if it is a NULLS marker,
store NULL instead.

And convert all places that used bit 1 for locking, to use bit 0.

Fixes: 8f0db018006a ("rhashtable: use bit_spin_locks to protect hash bucket.")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index ef5378efdef3..6529fe1b45c1 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -59,7 +59,7 @@ int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
 		return 1;
 	if (unlikely(tbl->nest))
 		return 1;
-	return bit_spin_is_locked(1, (unsigned long *)&tbl->buckets[hash]);
+	return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
 }
 EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
 #else
-- 
cgit 


From 877b5691f27a1aec0d9b53095a323e45c30069e2 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 14 Apr 2019 17:37:09 -0700
Subject: crypto: shash - remove shash_desc::flags

The flags field in 'struct shash_desc' never actually does anything.
The only ostensibly supported flag is CRYPTO_TFM_REQ_MAY_SLEEP.
However, no shash algorithm ever sleeps, making this flag a no-op.

With this being the case, inevitably some users who can't sleep wrongly
pass MAY_SLEEP.  These would all need to be fixed if any shash algorithm
actually started sleeping.  For example, the shash_ahash_*() functions,
which wrap a shash algorithm with the ahash API, pass through MAY_SLEEP
from the ahash API to the shash API.  However, the shash functions are
called under kmap_atomic(), so actually they're assumed to never sleep.

Even if it turns out that some users do need preemption points while
hashing large buffers, we could easily provide a helper function
crypto_shash_update_large() which divides the data into smaller chunks
and calls crypto_shash_update() and cond_resched() for each chunk.  It's
not necessary to have a flag in 'struct shash_desc', nor is it necessary
to make individual shash algorithms aware of this at all.

Therefore, remove shash_desc::flags, and document that the
crypto_shash_*() functions can be called from any context.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/crc-t10dif.c | 1 -
 lib/digsig.c     | 1 -
 lib/libcrc32c.c  | 1 -
 3 files changed, 3 deletions(-)

(limited to 'lib')

diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index 4d0d47c1ffbd..e89ebfdbb0fc 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -69,7 +69,6 @@ __u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
 
 	rcu_read_lock();
 	desc.shash.tfm = rcu_dereference(crct10dif_tfm);
-	desc.shash.flags = 0;
 	*(__u16 *)desc.ctx = crc;
 
 	err = crypto_shash_update(&desc.shash, buffer, len);
diff --git a/lib/digsig.c b/lib/digsig.c
index 6ba6fcd92dd1..3b0a579bdcdf 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -240,7 +240,6 @@ int digsig_verify(struct key *keyring, const char *sig, int siglen,
 		goto err;
 
 	desc->tfm = shash;
-	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
 	crypto_shash_init(desc);
 	crypto_shash_update(desc, data, datalen);
diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
index f0a2934605bf..4e9829c4d64c 100644
--- a/lib/libcrc32c.c
+++ b/lib/libcrc32c.c
@@ -47,7 +47,6 @@ u32 crc32c(u32 crc, const void *address, unsigned int length)
 	int err;
 
 	shash->tfm = tfm;
-	shash->flags = 0;
 	*ctx = crc;
 
 	err = crypto_shash_update(shash, address, length);
-- 
cgit 


From ba2e544075c282a5bb21df7752efad3b42d6077b Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 16 Apr 2019 17:27:20 +1000
Subject: lib/siphash.c: mark expected switch fall-throughs

In preparation to enabling -Wimplicit-fallthrough, mark switch cases
where we are expecting to fall through.

This patch aims to suppress up to 18 missing-break-in-switch false
positives on some architectures.

Cc: Gustavo A. R. Silva <gustavo@embeddedor.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/siphash.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

(limited to 'lib')

diff --git a/lib/siphash.c b/lib/siphash.c
index 3ae58b4edad6..c47bb6ff2149 100644
--- a/lib/siphash.c
+++ b/lib/siphash.c
@@ -68,11 +68,11 @@ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
 						  bytemask_from_count(left)));
 #else
 	switch (left) {
-	case 7: b |= ((u64)end[6]) << 48;
-	case 6: b |= ((u64)end[5]) << 40;
-	case 5: b |= ((u64)end[4]) << 32;
+	case 7: b |= ((u64)end[6]) << 48; /* fall through */
+	case 6: b |= ((u64)end[5]) << 40; /* fall through */
+	case 5: b |= ((u64)end[4]) << 32; /* fall through */
 	case 4: b |= le32_to_cpup(data); break;
-	case 3: b |= ((u64)end[2]) << 16;
+	case 3: b |= ((u64)end[2]) << 16; /* fall through */
 	case 2: b |= le16_to_cpup(data); break;
 	case 1: b |= end[0];
 	}
@@ -101,11 +101,11 @@ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
 						  bytemask_from_count(left)));
 #else
 	switch (left) {
-	case 7: b |= ((u64)end[6]) << 48;
-	case 6: b |= ((u64)end[5]) << 40;
-	case 5: b |= ((u64)end[4]) << 32;
+	case 7: b |= ((u64)end[6]) << 48; /* fall through */
+	case 6: b |= ((u64)end[5]) << 40; /* fall through */
+	case 5: b |= ((u64)end[4]) << 32; /* fall through */
 	case 4: b |= get_unaligned_le32(end); break;
-	case 3: b |= ((u64)end[2]) << 16;
+	case 3: b |= ((u64)end[2]) << 16; /* fall through */
 	case 2: b |= get_unaligned_le16(end); break;
 	case 1: b |= end[0];
 	}
@@ -268,11 +268,11 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 						  bytemask_from_count(left)));
 #else
 	switch (left) {
-	case 7: b |= ((u64)end[6]) << 48;
-	case 6: b |= ((u64)end[5]) << 40;
-	case 5: b |= ((u64)end[4]) << 32;
+	case 7: b |= ((u64)end[6]) << 48; /* fall through */
+	case 6: b |= ((u64)end[5]) << 40; /* fall through */
+	case 5: b |= ((u64)end[4]) << 32; /* fall through */
 	case 4: b |= le32_to_cpup(data); break;
-	case 3: b |= ((u64)end[2]) << 16;
+	case 3: b |= ((u64)end[2]) << 16; /* fall through */
 	case 2: b |= le16_to_cpup(data); break;
 	case 1: b |= end[0];
 	}
@@ -301,11 +301,11 @@ u32 __hsiphash_unaligned(const void *data, size_t len,
 						  bytemask_from_count(left)));
 #else
 	switch (left) {
-	case 7: b |= ((u64)end[6]) << 48;
-	case 6: b |= ((u64)end[5]) << 40;
-	case 5: b |= ((u64)end[4]) << 32;
+	case 7: b |= ((u64)end[6]) << 48; /* fall through */
+	case 6: b |= ((u64)end[5]) << 40; /* fall through */
+	case 5: b |= ((u64)end[4]) << 32; /* fall through */
 	case 4: b |= get_unaligned_le32(end); break;
-	case 3: b |= ((u64)end[2]) << 16;
+	case 3: b |= ((u64)end[2]) << 16; /* fall through */
 	case 2: b |= get_unaligned_le16(end); break;
 	case 1: b |= end[0];
 	}
@@ -431,7 +431,7 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 		v0 ^= m;
 	}
 	switch (left) {
-	case 3: b |= ((u32)end[2]) << 16;
+	case 3: b |= ((u32)end[2]) << 16; /* fall through */
 	case 2: b |= le16_to_cpup(data); break;
 	case 1: b |= end[0];
 	}
@@ -454,7 +454,7 @@ u32 __hsiphash_unaligned(const void *data, size_t len,
 		v0 ^= m;
 	}
 	switch (left) {
-	case 3: b |= ((u32)end[2]) << 16;
+	case 3: b |= ((u32)end[2]) << 16; /* fall through */
 	case 2: b |= get_unaligned_le16(end); break;
 	case 1: b |= end[0];
 	}
-- 
cgit 


From aa30f47cf666111f6bbfd15f290a27e8a7b9d854 Mon Sep 17 00:00:00 2001
From: Kimberly Brown <kimbrownkd@gmail.com>
Date: Mon, 1 Apr 2019 22:51:18 -0400
Subject: kobject: Add support for default attribute groups to kobj_type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kobj_type currently uses a list of individual attributes to store
default attributes. Attribute groups are more flexible than a list of
attributes because groups provide support for attribute visibility. So,
add support for default attribute groups to kobj_type.

In future patches, the existing uses of kobj_type’s attribute list will
be converted to attribute groups. When that is complete, kobj_type’s
attribute list, “default_attrs”, will be removed.

Signed-off-by: Kimberly Brown <kimbrownkd@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/kobject.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index aa89edcd2b63..ede40005db28 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -82,6 +82,7 @@ static int populate_dir(struct kobject *kobj)
 
 static int create_dir(struct kobject *kobj)
 {
+	const struct kobj_type *ktype = get_ktype(kobj);
 	const struct kobj_ns_type_operations *ops;
 	int error;
 
@@ -95,6 +96,14 @@ static int create_dir(struct kobject *kobj)
 		return error;
 	}
 
+	if (ktype) {
+		error = sysfs_create_groups(kobj, ktype->default_groups);
+		if (error) {
+			sysfs_remove_dir(kobj);
+			return error;
+		}
+	}
+
 	/*
 	 * @kobj->sd may be deleted by an ancestor going away.  Hold an
 	 * extra reference so that it stays until @kobj is gone.
@@ -584,11 +593,16 @@ EXPORT_SYMBOL_GPL(kobject_move);
 void kobject_del(struct kobject *kobj)
 {
 	struct kernfs_node *sd;
+	const struct kobj_type *ktype = get_ktype(kobj);
 
 	if (!kobj)
 		return;
 
 	sd = kobj->sd;
+
+	if (ktype)
+		sysfs_remove_groups(kobj, ktype->default_groups);
+
 	sysfs_remove_dir(kobj);
 	sysfs_put(sd);
 
-- 
cgit 


From 6eea242f9bcdf828bb56334d8ee5c7cb466e4bcd Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 17 Apr 2019 13:53:41 +0200
Subject: vsprintf: Shuffle restricted_pointer()

This is just a preparation step for further changes.

The patch does not change the code.

Link: http://lkml.kernel.org/r/20190417115350.20479-2-pmladek@suse.com
To: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Tobin C . Harding" <me@tobin.cc>
Cc: Joe Perches <joe@perches.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 lib/vsprintf.c | 98 +++++++++++++++++++++++++++++-----------------------------
 1 file changed, 49 insertions(+), 49 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 791b6fa36905..eb7b4a06e1f0 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -717,6 +717,55 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr,
 	return pointer_string(buf, end, (const void *)hashval, spec);
 }
 
+int kptr_restrict __read_mostly;
+
+static noinline_for_stack
+char *restricted_pointer(char *buf, char *end, const void *ptr,
+			 struct printf_spec spec)
+{
+	switch (kptr_restrict) {
+	case 0:
+		/* Always print %pK values */
+		break;
+	case 1: {
+		const struct cred *cred;
+
+		/*
+		 * kptr_restrict==1 cannot be used in IRQ context
+		 * because its test for CAP_SYSLOG would be meaningless.
+		 */
+		if (in_irq() || in_serving_softirq() || in_nmi()) {
+			if (spec.field_width == -1)
+				spec.field_width = 2 * sizeof(ptr);
+			return string(buf, end, "pK-error", spec);
+		}
+
+		/*
+		 * Only print the real pointer value if the current
+		 * process has CAP_SYSLOG and is running with the
+		 * same credentials it started with. This is because
+		 * access to files is checked at open() time, but %pK
+		 * checks permission at read() time. We don't want to
+		 * leak pointer values if a binary opens a file using
+		 * %pK and then elevates privileges before reading it.
+		 */
+		cred = current_cred();
+		if (!has_capability_noaudit(current, CAP_SYSLOG) ||
+		    !uid_eq(cred->euid, cred->uid) ||
+		    !gid_eq(cred->egid, cred->gid))
+			ptr = NULL;
+		break;
+	}
+	case 2:
+	default:
+		/* Always print 0's for %pK */
+		ptr = NULL;
+		break;
+	}
+
+	return pointer_string(buf, end, ptr, spec);
+}
+
 static noinline_for_stack
 char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_spec spec,
 		  const char *fmt)
@@ -1476,55 +1525,6 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	return string(buf, end, uuid, spec);
 }
 
-int kptr_restrict __read_mostly;
-
-static noinline_for_stack
-char *restricted_pointer(char *buf, char *end, const void *ptr,
-			 struct printf_spec spec)
-{
-	switch (kptr_restrict) {
-	case 0:
-		/* Always print %pK values */
-		break;
-	case 1: {
-		const struct cred *cred;
-
-		/*
-		 * kptr_restrict==1 cannot be used in IRQ context
-		 * because its test for CAP_SYSLOG would be meaningless.
-		 */
-		if (in_irq() || in_serving_softirq() || in_nmi()) {
-			if (spec.field_width == -1)
-				spec.field_width = 2 * sizeof(ptr);
-			return string(buf, end, "pK-error", spec);
-		}
-
-		/*
-		 * Only print the real pointer value if the current
-		 * process has CAP_SYSLOG and is running with the
-		 * same credentials it started with. This is because
-		 * access to files is checked at open() time, but %pK
-		 * checks permission at read() time. We don't want to
-		 * leak pointer values if a binary opens a file using
-		 * %pK and then elevates privileges before reading it.
-		 */
-		cred = current_cred();
-		if (!has_capability_noaudit(current, CAP_SYSLOG) ||
-		    !uid_eq(cred->euid, cred->uid) ||
-		    !gid_eq(cred->egid, cred->gid))
-			ptr = NULL;
-		break;
-	}
-	case 2:
-	default:
-		/* Always print 0's for %pK */
-		ptr = NULL;
-		break;
-	}
-
-	return pointer_string(buf, end, ptr, spec);
-}
-
 static noinline_for_stack
 char *netdev_bits(char *buf, char *end, const void *addr,
 		  struct printf_spec spec,  const char *fmt)
-- 
cgit 


From 1ac2f9789c4b76ad749870c25ffae0cbcd1f510f Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 17 Apr 2019 13:53:42 +0200
Subject: vsprintf: Consistent %pK handling for kptr_restrict == 0

restricted_pointer() pretends that it prints the address when kptr_restrict
is set to zero. But it is never called in this situation. Instead,
pointer() falls back to ptr_to_id() and hashes the pointer.

This patch removes the potential confusion. klp_restrict is checked only
in restricted_pointer().

It actually fixes a small race when the address might get printed unhashed:

CPU0                            CPU1

pointer()
  if (!kptr_restrict)
     /* for example set to 2 */
  restricted_pointer()
				/* echo 0 >/proc/sys/kernel/kptr_restrict */
				proc_dointvec_minmax_sysadmin()
				  klpr_restrict = 0;
    switch(kptr_restrict)
      case 0:
	break:

    number()

Fixes: ef0010a30935de4e0211 ("vsprintf: don't use 'restricted_pointer()' when not restricting")
Link: http://lkml.kernel.org/r/20190417115350.20479-3-pmladek@suse.com
To: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
To: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Tobin C . Harding" <me@tobin.cc>
Cc: Joe Perches <joe@perches.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-kernel@vger.kernel.org
Cc: Kees Cook <keescook@chromium.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 lib/vsprintf.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index eb7b4a06e1f0..2af48948a973 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -725,8 +725,8 @@ char *restricted_pointer(char *buf, char *end, const void *ptr,
 {
 	switch (kptr_restrict) {
 	case 0:
-		/* Always print %pK values */
-		break;
+		/* Handle as %p, hash and do _not_ leak addresses. */
+		return ptr_to_id(buf, end, ptr, spec);
 	case 1: {
 		const struct cred *cred;
 
@@ -2041,8 +2041,6 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 			return buf;
 		}
 	case 'K':
-		if (!kptr_restrict)
-			break;
 		return restricted_pointer(buf, end, ptr, spec);
 	case 'N':
 		return netdev_bits(buf, end, ptr, spec, fmt);
-- 
cgit 


From d529ac4194f2c346b2f62f0f473a578a7357039b Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 17 Apr 2019 13:53:43 +0200
Subject: vsprintf: Do not check address of well-known strings

We are going to check the address using probe_kernel_address(). It will
be more expensive and it does not make sense for well known address.

This patch splits the string() function. The variant without the check
is then used on locations that handle string constants or strings defined
as local variables.

This patch does not change the existing behavior.

Link: http://lkml.kernel.org/r/20190417115350.20479-4-pmladek@suse.com
To: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Tobin C . Harding" <me@tobin.cc>
Cc: Joe Perches <joe@perches.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
---
 lib/vsprintf.c | 81 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 44 insertions(+), 37 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 2af48948a973..c9c9a1179870 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -593,15 +593,13 @@ char *widen_string(char *buf, int n, char *end, struct printf_spec spec)
 	return buf;
 }
 
-static noinline_for_stack
-char *string(char *buf, char *end, const char *s, struct printf_spec spec)
+/* Handle string from a well known address. */
+static char *string_nocheck(char *buf, char *end, const char *s,
+			    struct printf_spec spec)
 {
 	int len = 0;
 	size_t lim = spec.precision;
 
-	if ((unsigned long)s < PAGE_SIZE)
-		s = "(null)";
-
 	while (lim--) {
 		char c = *s++;
 		if (!c)
@@ -615,6 +613,15 @@ char *string(char *buf, char *end, const char *s, struct printf_spec spec)
 }
 
 static noinline_for_stack
+char *string(char *buf, char *end, const char *s,
+	     struct printf_spec spec)
+{
+	if ((unsigned long)s < PAGE_SIZE)
+		s = "(null)";
+
+	return string_nocheck(buf, end, s, spec);
+}
+
 char *pointer_string(char *buf, char *end, const void *ptr,
 		     struct printf_spec spec)
 {
@@ -701,7 +708,7 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr,
 	if (static_branch_unlikely(&not_filled_random_ptr_key)) {
 		spec.field_width = 2 * sizeof(ptr);
 		/* string length must be less than default_width */
-		return string(buf, end, str, spec);
+		return string_nocheck(buf, end, str, spec);
 	}
 
 #ifdef CONFIG_64BIT
@@ -737,7 +744,7 @@ char *restricted_pointer(char *buf, char *end, const void *ptr,
 		if (in_irq() || in_serving_softirq() || in_nmi()) {
 			if (spec.field_width == -1)
 				spec.field_width = 2 * sizeof(ptr);
-			return string(buf, end, "pK-error", spec);
+			return string_nocheck(buf, end, "pK-error", spec);
 		}
 
 		/*
@@ -851,7 +858,7 @@ char *symbol_string(char *buf, char *end, void *ptr,
 	else
 		sprint_symbol_no_offset(sym, value);
 
-	return string(buf, end, sym, spec);
+	return string_nocheck(buf, end, sym, spec);
 #else
 	return special_hex_number(buf, end, value, sizeof(void *));
 #endif
@@ -937,27 +944,27 @@ char *resource_string(char *buf, char *end, struct resource *res,
 
 	*p++ = '[';
 	if (res->flags & IORESOURCE_IO) {
-		p = string(p, pend, "io  ", str_spec);
+		p = string_nocheck(p, pend, "io  ", str_spec);
 		specp = &io_spec;
 	} else if (res->flags & IORESOURCE_MEM) {
-		p = string(p, pend, "mem ", str_spec);
+		p = string_nocheck(p, pend, "mem ", str_spec);
 		specp = &mem_spec;
 	} else if (res->flags & IORESOURCE_IRQ) {
-		p = string(p, pend, "irq ", str_spec);
+		p = string_nocheck(p, pend, "irq ", str_spec);
 		specp = &default_dec_spec;
 	} else if (res->flags & IORESOURCE_DMA) {
-		p = string(p, pend, "dma ", str_spec);
+		p = string_nocheck(p, pend, "dma ", str_spec);
 		specp = &default_dec_spec;
 	} else if (res->flags & IORESOURCE_BUS) {
-		p = string(p, pend, "bus ", str_spec);
+		p = string_nocheck(p, pend, "bus ", str_spec);
 		specp = &bus_spec;
 	} else {
-		p = string(p, pend, "??? ", str_spec);
+		p = string_nocheck(p, pend, "??? ", str_spec);
 		specp = &mem_spec;
 		decode = 0;
 	}
 	if (decode && res->flags & IORESOURCE_UNSET) {
-		p = string(p, pend, "size ", str_spec);
+		p = string_nocheck(p, pend, "size ", str_spec);
 		p = number(p, pend, resource_size(res), *specp);
 	} else {
 		p = number(p, pend, res->start, *specp);
@@ -968,21 +975,21 @@ char *resource_string(char *buf, char *end, struct resource *res,
 	}
 	if (decode) {
 		if (res->flags & IORESOURCE_MEM_64)
-			p = string(p, pend, " 64bit", str_spec);
+			p = string_nocheck(p, pend, " 64bit", str_spec);
 		if (res->flags & IORESOURCE_PREFETCH)
-			p = string(p, pend, " pref", str_spec);
+			p = string_nocheck(p, pend, " pref", str_spec);
 		if (res->flags & IORESOURCE_WINDOW)
-			p = string(p, pend, " window", str_spec);
+			p = string_nocheck(p, pend, " window", str_spec);
 		if (res->flags & IORESOURCE_DISABLED)
-			p = string(p, pend, " disabled", str_spec);
+			p = string_nocheck(p, pend, " disabled", str_spec);
 	} else {
-		p = string(p, pend, " flags ", str_spec);
+		p = string_nocheck(p, pend, " flags ", str_spec);
 		p = number(p, pend, res->flags, default_flag_spec);
 	}
 	*p++ = ']';
 	*p = '\0';
 
-	return string(buf, end, sym, spec);
+	return string_nocheck(buf, end, sym, spec);
 }
 
 static noinline_for_stack
@@ -1150,7 +1157,7 @@ char *mac_address_string(char *buf, char *end, u8 *addr,
 	}
 	*p = '\0';
 
-	return string(buf, end, mac_addr, spec);
+	return string_nocheck(buf, end, mac_addr, spec);
 }
 
 static noinline_for_stack
@@ -1313,7 +1320,7 @@ char *ip6_addr_string(char *buf, char *end, const u8 *addr,
 	else
 		ip6_string(ip6_addr, addr, fmt);
 
-	return string(buf, end, ip6_addr, spec);
+	return string_nocheck(buf, end, ip6_addr, spec);
 }
 
 static noinline_for_stack
@@ -1324,7 +1331,7 @@ char *ip4_addr_string(char *buf, char *end, const u8 *addr,
 
 	ip4_string(ip4_addr, addr, fmt);
 
-	return string(buf, end, ip4_addr, spec);
+	return string_nocheck(buf, end, ip4_addr, spec);
 }
 
 static noinline_for_stack
@@ -1386,7 +1393,7 @@ char *ip6_addr_string_sa(char *buf, char *end, const struct sockaddr_in6 *sa,
 	}
 	*p = '\0';
 
-	return string(buf, end, ip6_addr, spec);
+	return string_nocheck(buf, end, ip6_addr, spec);
 }
 
 static noinline_for_stack
@@ -1421,7 +1428,7 @@ char *ip4_addr_string_sa(char *buf, char *end, const struct sockaddr_in *sa,
 	}
 	*p = '\0';
 
-	return string(buf, end, ip4_addr, spec);
+	return string_nocheck(buf, end, ip4_addr, spec);
 }
 
 static noinline_for_stack
@@ -1522,7 +1529,7 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 
 	*p = 0;
 
-	return string(buf, end, uuid, spec);
+	return string_nocheck(buf, end, uuid, spec);
 }
 
 static noinline_for_stack
@@ -1736,13 +1743,13 @@ char *device_node_gen_full_name(const struct device_node *np, char *buf, char *e
 
 	/* special case for root node */
 	if (!parent)
-		return string(buf, end, "/", default_str_spec);
+		return string_nocheck(buf, end, "/", default_str_spec);
 
 	for (depth = 0; parent->parent; depth++)
 		parent = parent->parent;
 
 	for ( ; depth >= 0; depth--) {
-		buf = string(buf, end, "/", default_str_spec);
+		buf = string_nocheck(buf, end, "/", default_str_spec);
 		buf = string(buf, end, device_node_name_for_depth(np, depth),
 			     default_str_spec);
 	}
@@ -1770,10 +1777,10 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
 	str_spec.field_width = -1;
 
 	if (!IS_ENABLED(CONFIG_OF))
-		return string(buf, end, "(!OF)", spec);
+		return string_nocheck(buf, end, "(!OF)", spec);
 
 	if ((unsigned long)dn < PAGE_SIZE)
-		return string(buf, end, "(null)", spec);
+		return string_nocheck(buf, end, "(null)", spec);
 
 	/* simple case without anything any more format specifiers */
 	fmt++;
@@ -1814,7 +1821,7 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
 			tbuf[2] = of_node_check_flag(dn, OF_POPULATED) ? 'P' : '-';
 			tbuf[3] = of_node_check_flag(dn, OF_POPULATED_BUS) ? 'B' : '-';
 			tbuf[4] = 0;
-			buf = string(buf, end, tbuf, str_spec);
+			buf = string_nocheck(buf, end, tbuf, str_spec);
 			break;
 		case 'c':	/* major compatible string */
 			ret = of_property_read_string(dn, "compatible", &p);
@@ -1825,10 +1832,10 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
 			has_mult = false;
 			of_property_for_each_string(dn, "compatible", prop, p) {
 				if (has_mult)
-					buf = string(buf, end, ",", str_spec);
-				buf = string(buf, end, "\"", str_spec);
+					buf = string_nocheck(buf, end, ",", str_spec);
+				buf = string_nocheck(buf, end, "\"", str_spec);
 				buf = string(buf, end, p, str_spec);
-				buf = string(buf, end, "\"", str_spec);
+				buf = string_nocheck(buf, end, "\"", str_spec);
 
 				has_mult = true;
 			}
@@ -1966,7 +1973,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		 */
 		if (spec.field_width == -1)
 			spec.field_width = default_width;
-		return string(buf, end, "(null)", spec);
+		return string_nocheck(buf, end, "(null)", spec);
 	}
 
 	switch (*fmt) {
@@ -2022,7 +2029,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 			case AF_INET6:
 				return ip6_addr_string_sa(buf, end, &sa->v6, spec, fmt);
 			default:
-				return string(buf, end, "(invalid address)", spec);
+				return string_nocheck(buf, end, "(invalid address)", spec);
 			}}
 		}
 		break;
-- 
cgit 


From f00cc102b862be688fe090aec30e08d61a8f5e63 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 17 Apr 2019 13:53:44 +0200
Subject: vsprintf: Factor out %p[iI] handler as ip_addr_string()

Move the non-trivial code from the long pointer() function. We are going
to improve error handling that will make it even more complicated.

This patch does not change the existing behavior.

Link: http://lkml.kernel.org/r/20190417115350.20479-5-pmladek@suse.com
To: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Tobin C . Harding" <me@tobin.cc>
Cc: Joe Perches <joe@perches.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 lib/vsprintf.c | 52 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 22 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index c9c9a1179870..8ca29bc0d786 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1431,6 +1431,35 @@ char *ip4_addr_string_sa(char *buf, char *end, const struct sockaddr_in *sa,
 	return string_nocheck(buf, end, ip4_addr, spec);
 }
 
+static noinline_for_stack
+char *ip_addr_string(char *buf, char *end, const void *ptr,
+		     struct printf_spec spec, const char *fmt)
+{
+	switch (fmt[1]) {
+	case '6':
+		return ip6_addr_string(buf, end, ptr, spec, fmt);
+	case '4':
+		return ip4_addr_string(buf, end, ptr, spec, fmt);
+	case 'S': {
+		const union {
+			struct sockaddr		raw;
+			struct sockaddr_in	v4;
+			struct sockaddr_in6	v6;
+		} *sa = ptr;
+
+		switch (sa->raw.sa_family) {
+		case AF_INET:
+			return ip4_addr_string_sa(buf, end, &sa->v4, spec, fmt);
+		case AF_INET6:
+			return ip6_addr_string_sa(buf, end, &sa->v6, spec, fmt);
+		default:
+			return string_nocheck(buf, end, "(invalid address)", spec);
+		}}
+	}
+
+	return ptr_to_id(buf, end, ptr, spec);
+}
+
 static noinline_for_stack
 char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
 		     const char *fmt)
@@ -2011,28 +2040,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 					 * 4:	001.002.003.004
 					 * 6:   000102...0f
 					 */
-		switch (fmt[1]) {
-		case '6':
-			return ip6_addr_string(buf, end, ptr, spec, fmt);
-		case '4':
-			return ip4_addr_string(buf, end, ptr, spec, fmt);
-		case 'S': {
-			const union {
-				struct sockaddr		raw;
-				struct sockaddr_in	v4;
-				struct sockaddr_in6	v6;
-			} *sa = ptr;
-
-			switch (sa->raw.sa_family) {
-			case AF_INET:
-				return ip4_addr_string_sa(buf, end, &sa->v4, spec, fmt);
-			case AF_INET6:
-				return ip6_addr_string_sa(buf, end, &sa->v6, spec, fmt);
-			default:
-				return string_nocheck(buf, end, "(invalid address)", spec);
-			}}
-		}
-		break;
+		return ip_addr_string(buf, end, ptr, spec, fmt);
 	case 'E':
 		return escaped_string(buf, end, ptr, spec, fmt);
 	case 'U':
-- 
cgit 


From 45c3e93d751ea50861c796da3cbfc848fa6ddf55 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 17 Apr 2019 13:53:45 +0200
Subject: vsprintf: Factor out %pV handler as va_format()

Move the code from the long pointer() function. We are going to improve
error handling that will make it more complicated.

This patch does not change the existing behavior.

Link: http://lkml.kernel.org/r/20190417115350.20479-6-pmladek@suse.com
To: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Tobin C . Harding" <me@tobin.cc>
Cc: Joe Perches <joe@perches.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 lib/vsprintf.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 8ca29bc0d786..12b71a4d4613 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1520,6 +1520,17 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
 	return buf;
 }
 
+static char *va_format(char *buf, char *end, struct va_format *va_fmt)
+{
+	va_list va;
+
+	va_copy(va, *va_fmt->va);
+	buf += vsnprintf(buf, end > buf ? end - buf : 0, va_fmt->fmt, va);
+	va_end(va);
+
+	return buf;
+}
+
 static noinline_for_stack
 char *uuid_string(char *buf, char *end, const u8 *addr,
 		  struct printf_spec spec, const char *fmt)
@@ -2046,15 +2057,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 	case 'U':
 		return uuid_string(buf, end, ptr, spec, fmt);
 	case 'V':
-		{
-			va_list va;
-
-			va_copy(va, *((struct va_format *)ptr)->va);
-			buf += vsnprintf(buf, end > buf ? end - buf : 0,
-					 ((struct va_format *)ptr)->fmt, va);
-			va_end(va);
-			return buf;
-		}
+		return va_format(buf, end, ptr);
 	case 'K':
 		return restricted_pointer(buf, end, ptr, spec);
 	case 'N':
-- 
cgit 


From 798cc27a305e7b35b7bff3a71257e6fe57f70bc1 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 17 Apr 2019 13:53:46 +0200
Subject: vsprintf: Factor out %pO handler as kobject_string()

Move code from the long pointer() function. We are going to improve
error handling that will make it even more complicated.

This patch does not change the existing behavior.

Link: http://lkml.kernel.org/r/20190417115350.20479-7-pmladek@suse.com
To: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Tobin C . Harding" <me@tobin.cc>
Cc: Joe Perches <joe@perches.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-kernel@vger.kernel.org
Cc: Kees Cook <keescook@chromium.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 lib/vsprintf.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 12b71a4d4613..9817d171f608 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1888,6 +1888,17 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
 	return widen_string(buf, buf - buf_start, end, spec);
 }
 
+static char *kobject_string(char *buf, char *end, void *ptr,
+			    struct printf_spec spec, const char *fmt)
+{
+	switch (fmt[1]) {
+	case 'F':
+		return device_node_string(buf, end, ptr, spec, fmt + 1);
+	}
+
+	return ptr_to_id(buf, end, ptr, spec);
+}
+
 /*
  * Show a '%p' thing.  A kernel extension is that the '%p' is followed
  * by an extra set of alphanumeric characters that are extended format
@@ -2082,11 +2093,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 	case 'G':
 		return flags_string(buf, end, ptr, fmt);
 	case 'O':
-		switch (fmt[1]) {
-		case 'F':
-			return device_node_string(buf, end, ptr, spec, fmt + 1);
-		}
-		break;
+		return kobject_string(buf, end, ptr, spec, fmt);
 	case 'x':
 		return pointer_string(buf, end, ptr, spec);
 	}
-- 
cgit 


From 0b74d4d763fd4ee9daa53889324300587c015338 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 17 Apr 2019 13:53:47 +0200
Subject: vsprintf: Consolidate handling of unknown pointer specifiers

There are few printk formats that make sense only with two or more
specifiers. Also some specifiers make sense only when a kernel feature
is enabled.

The handling of unknown specifiers is inconsistent and not helpful.
Using WARN() looks like an overkill for this type of error. pr_warn()
is not good either. It would by handled via printk_safe buffer and
it might be hard to match it with the problematic string.

A reasonable compromise seems to be writing the unknown format specifier
into the original string with a question mark, for example (%pC?).
It should be self-explaining enough. Note that it is in brackets
to follow the (null) style.

Note that it introduces a warning about that test_hashed() function
is unused. It is going to be used again by a later patch.

Link: http://lkml.kernel.org/r/20190417115350.20479-8-pmladek@suse.com
To: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Tobin C . Harding" <me@tobin.cc>
Cc: Joe Perches <joe@perches.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 lib/test_printf.c |  3 +--
 lib/vsprintf.c    | 28 +++++++++++++++++-----------
 2 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'lib')

diff --git a/lib/test_printf.c b/lib/test_printf.c
index 659b6cc0d483..250ee864b8b8 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -462,8 +462,7 @@ struct_rtc_time(void)
 		.tm_year = 118,
 	};
 
-	test_hashed("%pt", &tm);
-
+	test("(%ptR?)", "%pt", &tm);
 	test("2018-11-26T05:35:43", "%ptR", &tm);
 	test("0118-10-26T05:35:43", "%ptRr", &tm);
 	test("05:35:43|2018-11-26", "%ptRt|%ptRd", &tm, &tm);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 9817d171f608..f471a658422f 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1435,6 +1435,8 @@ static noinline_for_stack
 char *ip_addr_string(char *buf, char *end, const void *ptr,
 		     struct printf_spec spec, const char *fmt)
 {
+	char *err_fmt_msg;
+
 	switch (fmt[1]) {
 	case '6':
 		return ip6_addr_string(buf, end, ptr, spec, fmt);
@@ -1457,7 +1459,8 @@ char *ip_addr_string(char *buf, char *end, const void *ptr,
 		}}
 	}
 
-	return ptr_to_id(buf, end, ptr, spec);
+	err_fmt_msg = fmt[0] == 'i' ? "(%pi?)" : "(%pI?)";
+	return string_nocheck(buf, end, err_fmt_msg, spec);
 }
 
 static noinline_for_stack
@@ -1585,7 +1588,7 @@ char *netdev_bits(char *buf, char *end, const void *addr,
 		size = sizeof(netdev_features_t);
 		break;
 	default:
-		return ptr_to_id(buf, end, addr, spec);
+		return string_nocheck(buf, end, "(%pN?)", spec);
 	}
 
 	return special_hex_number(buf, end, num, size);
@@ -1689,7 +1692,7 @@ char *time_and_date(char *buf, char *end, void *ptr, struct printf_spec spec,
 	case 'R':
 		return rtc_str(buf, end, (const struct rtc_time *)ptr, fmt);
 	default:
-		return ptr_to_id(buf, end, ptr, spec);
+		return string_nocheck(buf, end, "(%ptR?)", spec);
 	}
 }
 
@@ -1697,7 +1700,10 @@ static noinline_for_stack
 char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
 	    const char *fmt)
 {
-	if (!IS_ENABLED(CONFIG_HAVE_CLK) || !clk)
+	if (!IS_ENABLED(CONFIG_HAVE_CLK))
+		return string_nocheck(buf, end, "(%pC?)", spec);
+
+	if (!clk)
 		return string(buf, end, NULL, spec);
 
 	switch (fmt[1]) {
@@ -1706,7 +1712,7 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
 #ifdef CONFIG_COMMON_CLK
 		return string(buf, end, __clk_get_name(clk), spec);
 #else
-		return ptr_to_id(buf, end, clk, spec);
+		return string_nocheck(buf, end, "(%pC?)", spec);
 #endif
 	}
 }
@@ -1739,7 +1745,8 @@ char *format_flags(char *buf, char *end, unsigned long flags,
 }
 
 static noinline_for_stack
-char *flags_string(char *buf, char *end, void *flags_ptr, const char *fmt)
+char *flags_string(char *buf, char *end, void *flags_ptr,
+		   struct printf_spec spec, const char *fmt)
 {
 	unsigned long flags;
 	const struct trace_print_flags *names;
@@ -1760,8 +1767,7 @@ char *flags_string(char *buf, char *end, void *flags_ptr, const char *fmt)
 		names = gfpflag_names;
 		break;
 	default:
-		WARN_ONCE(1, "Unsupported flags modifier: %c\n", fmt[1]);
-		return buf;
+		return string_nocheck(buf, end, "(%pG?)", spec);
 	}
 
 	return format_flags(buf, end, flags, names);
@@ -1817,7 +1823,7 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
 	str_spec.field_width = -1;
 
 	if (!IS_ENABLED(CONFIG_OF))
-		return string_nocheck(buf, end, "(!OF)", spec);
+		return string_nocheck(buf, end, "(%pOF?)", spec);
 
 	if ((unsigned long)dn < PAGE_SIZE)
 		return string_nocheck(buf, end, "(null)", spec);
@@ -1896,7 +1902,7 @@ static char *kobject_string(char *buf, char *end, void *ptr,
 		return device_node_string(buf, end, ptr, spec, fmt + 1);
 	}
 
-	return ptr_to_id(buf, end, ptr, spec);
+	return string_nocheck(buf, end, "(%pO?)", spec);
 }
 
 /*
@@ -2091,7 +2097,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 #endif
 
 	case 'G':
-		return flags_string(buf, end, ptr, fmt);
+		return flags_string(buf, end, ptr, spec, fmt);
 	case 'O':
 		return kobject_string(buf, end, ptr, spec, fmt);
 	case 'x':
-- 
cgit 


From 3e5903eb9cff707301712498aed9e34b3e2ee883 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 17 Apr 2019 13:53:48 +0200
Subject: vsprintf: Prevent crash when dereferencing invalid pointers

We already prevent crash when dereferencing some obviously broken
pointers. But the handling is not consistent. Sometimes we print "(null)"
only for pure NULL pointer, sometimes for pointers in the first
page and sometimes also for pointers in the last page (error codes).

Note that printk() call this code under logbuf_lock. Any recursive
printks are redirected to the printk_safe implementation and the messages
are stored into per-CPU buffers. These buffers might be eventually flushed
in printk_safe_flush_on_panic() but it is not guaranteed.

This patch adds a check using probe_kernel_read(). It is not a full-proof
test. But it should help to see the error message in 99% situations where
the kernel would silently crash otherwise.

Also it makes the error handling unified for "%s" and the many %p*
specifiers that need to read the data from a given address. We print:

   + (null)   when accessing data on pure pure NULL address
   + (efault) when accessing data on an invalid address

It does not affect the %p* specifiers that just print the given address
in some form, namely %pF, %pf, %pS, %ps, %pB, %pK, %px, and plain %p.

Note that we print (efault) from security reasons. In fact, the real
address can be seen only by %px or eventually %pK.

Link: http://lkml.kernel.org/r/20190417115350.20479-9-pmladek@suse.com
To: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Tobin C . Harding" <me@tobin.cc>
Cc: Joe Perches <joe@perches.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 lib/test_printf.c |  22 ++++++++-
 lib/vsprintf.c    | 136 ++++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 122 insertions(+), 36 deletions(-)

(limited to 'lib')

diff --git a/lib/test_printf.c b/lib/test_printf.c
index 250ee864b8b8..359ae4fb1ece 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -239,6 +239,7 @@ plain_format(void)
 #define PTR ((void *)0x456789ab)
 #define PTR_STR "456789ab"
 #define PTR_VAL_NO_CRNG "(ptrval)"
+#define ZEROS ""
 
 static int __init
 plain_format(void)
@@ -268,7 +269,6 @@ plain_hash_to_buffer(const void *p, char *buf, size_t len)
 	return 0;
 }
 
-
 static int __init
 plain_hash(void)
 {
@@ -325,6 +325,24 @@ test_hashed(const char *fmt, const void *p)
 	test(buf, fmt, p);
 }
 
+static void __init
+null_pointer(void)
+{
+	test_hashed("%p", NULL);
+	test(ZEROS "00000000", "%px", NULL);
+	test("(null)", "%pE", NULL);
+}
+
+#define PTR_INVALID ((void *)0x000000ab)
+
+static void __init
+invalid_pointer(void)
+{
+	test_hashed("%p", PTR_INVALID);
+	test(ZEROS "000000ab", "%px", PTR_INVALID);
+	test("(efault)", "%pE", PTR_INVALID);
+}
+
 static void __init
 symbol_ptr(void)
 {
@@ -571,6 +589,8 @@ static void __init
 test_pointer(void)
 {
 	plain();
+	null_pointer();
+	invalid_pointer();
 	symbol_ptr();
 	kernel_ptr();
 	struct_resource();
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index f471a658422f..b989f1e8f35b 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -612,12 +612,45 @@ static char *string_nocheck(char *buf, char *end, const char *s,
 	return widen_string(buf, len, end, spec);
 }
 
+/*
+ * This is not a fool-proof test. 99% of the time that this will fault is
+ * due to a bad pointer, not one that crosses into bad memory. Just test
+ * the address to make sure it doesn't fault due to a poorly added printk
+ * during debugging.
+ */
+static const char *check_pointer_msg(const void *ptr)
+{
+	char byte;
+
+	if (!ptr)
+		return "(null)";
+
+	if (probe_kernel_address(ptr, byte))
+		return "(efault)";
+
+	return NULL;
+}
+
+static int check_pointer(char **buf, char *end, const void *ptr,
+			 struct printf_spec spec)
+{
+	const char *err_msg;
+
+	err_msg = check_pointer_msg(ptr);
+	if (err_msg) {
+		*buf = string_nocheck(*buf, end, err_msg, spec);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
 static noinline_for_stack
 char *string(char *buf, char *end, const char *s,
 	     struct printf_spec spec)
 {
-	if ((unsigned long)s < PAGE_SIZE)
-		s = "(null)";
+	if (check_pointer(&buf, end, s, spec))
+		return buf;
 
 	return string_nocheck(buf, end, s, spec);
 }
@@ -792,6 +825,11 @@ char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_sp
 
 	rcu_read_lock();
 	for (i = 0; i < depth; i++, d = p) {
+		if (check_pointer(&buf, end, d, spec)) {
+			rcu_read_unlock();
+			return buf;
+		}
+
 		p = READ_ONCE(d->d_parent);
 		array[i] = READ_ONCE(d->d_name.name);
 		if (p == d) {
@@ -822,8 +860,12 @@ static noinline_for_stack
 char *bdev_name(char *buf, char *end, struct block_device *bdev,
 		struct printf_spec spec, const char *fmt)
 {
-	struct gendisk *hd = bdev->bd_disk;
-	
+	struct gendisk *hd;
+
+	if (check_pointer(&buf, end, bdev, spec))
+		return buf;
+
+	hd = bdev->bd_disk;
 	buf = string(buf, end, hd->disk_name, spec);
 	if (bdev->bd_part->partno) {
 		if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) {
@@ -942,6 +984,9 @@ char *resource_string(char *buf, char *end, struct resource *res,
 	int decode = (fmt[0] == 'R') ? 1 : 0;
 	const struct printf_spec *specp;
 
+	if (check_pointer(&buf, end, res, spec))
+		return buf;
+
 	*p++ = '[';
 	if (res->flags & IORESOURCE_IO) {
 		p = string_nocheck(p, pend, "io  ", str_spec);
@@ -1004,9 +1049,8 @@ char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
 		/* nothing to print */
 		return buf;
 
-	if (ZERO_OR_NULL_PTR(addr))
-		/* NULL pointer */
-		return string(buf, end, NULL, spec);
+	if (check_pointer(&buf, end, addr, spec))
+		return buf;
 
 	switch (fmt[1]) {
 	case 'C':
@@ -1053,6 +1097,9 @@ char *bitmap_string(char *buf, char *end, unsigned long *bitmap,
 	int i, chunksz;
 	bool first = true;
 
+	if (check_pointer(&buf, end, bitmap, spec))
+		return buf;
+
 	/* reused to print numbers */
 	spec = (struct printf_spec){ .flags = SMALL | ZEROPAD, .base = 16 };
 
@@ -1094,6 +1141,9 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap,
 	int cur, rbot, rtop;
 	bool first = true;
 
+	if (check_pointer(&buf, end, bitmap, spec))
+		return buf;
+
 	rbot = cur = find_first_bit(bitmap, nr_bits);
 	while (cur < nr_bits) {
 		rtop = cur;
@@ -1132,6 +1182,9 @@ char *mac_address_string(char *buf, char *end, u8 *addr,
 	char separator;
 	bool reversed = false;
 
+	if (check_pointer(&buf, end, addr, spec))
+		return buf;
+
 	switch (fmt[1]) {
 	case 'F':
 		separator = '-';
@@ -1437,6 +1490,9 @@ char *ip_addr_string(char *buf, char *end, const void *ptr,
 {
 	char *err_fmt_msg;
 
+	if (check_pointer(&buf, end, ptr, spec))
+		return buf;
+
 	switch (fmt[1]) {
 	case '6':
 		return ip6_addr_string(buf, end, ptr, spec, fmt);
@@ -1475,9 +1531,8 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
 	if (spec.field_width == 0)
 		return buf;				/* nothing to print */
 
-	if (ZERO_OR_NULL_PTR(addr))
-		return string(buf, end, NULL, spec);	/* NULL pointer */
-
+	if (check_pointer(&buf, end, addr, spec))
+		return buf;
 
 	do {
 		switch (fmt[count++]) {
@@ -1523,10 +1578,14 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
 	return buf;
 }
 
-static char *va_format(char *buf, char *end, struct va_format *va_fmt)
+static char *va_format(char *buf, char *end, struct va_format *va_fmt,
+		       struct printf_spec spec, const char *fmt)
 {
 	va_list va;
 
+	if (check_pointer(&buf, end, va_fmt, spec))
+		return buf;
+
 	va_copy(va, *va_fmt->va);
 	buf += vsnprintf(buf, end > buf ? end - buf : 0, va_fmt->fmt, va);
 	va_end(va);
@@ -1544,6 +1603,9 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	const u8 *index = uuid_index;
 	bool uc = false;
 
+	if (check_pointer(&buf, end, addr, spec))
+		return buf;
+
 	switch (*(++fmt)) {
 	case 'L':
 		uc = true;		/* fall-through */
@@ -1582,6 +1644,9 @@ char *netdev_bits(char *buf, char *end, const void *addr,
 	unsigned long long num;
 	int size;
 
+	if (check_pointer(&buf, end, addr, spec))
+		return buf;
+
 	switch (fmt[1]) {
 	case 'F':
 		num = *(const netdev_features_t *)addr;
@@ -1595,11 +1660,15 @@ char *netdev_bits(char *buf, char *end, const void *addr,
 }
 
 static noinline_for_stack
-char *address_val(char *buf, char *end, const void *addr, const char *fmt)
+char *address_val(char *buf, char *end, const void *addr,
+		  struct printf_spec spec, const char *fmt)
 {
 	unsigned long long num;
 	int size;
 
+	if (check_pointer(&buf, end, addr, spec))
+		return buf;
+
 	switch (fmt[1]) {
 	case 'd':
 		num = *(const dma_addr_t *)addr;
@@ -1651,12 +1720,16 @@ char *time_str(char *buf, char *end, const struct rtc_time *tm, bool r)
 }
 
 static noinline_for_stack
-char *rtc_str(char *buf, char *end, const struct rtc_time *tm, const char *fmt)
+char *rtc_str(char *buf, char *end, const struct rtc_time *tm,
+	      struct printf_spec spec, const char *fmt)
 {
 	bool have_t = true, have_d = true;
 	bool raw = false;
 	int count = 2;
 
+	if (check_pointer(&buf, end, tm, spec))
+		return buf;
+
 	switch (fmt[count]) {
 	case 'd':
 		have_t = false;
@@ -1690,7 +1763,7 @@ char *time_and_date(char *buf, char *end, void *ptr, struct printf_spec spec,
 {
 	switch (fmt[1]) {
 	case 'R':
-		return rtc_str(buf, end, (const struct rtc_time *)ptr, fmt);
+		return rtc_str(buf, end, (const struct rtc_time *)ptr, spec, fmt);
 	default:
 		return string_nocheck(buf, end, "(%ptR?)", spec);
 	}
@@ -1703,8 +1776,8 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
 	if (!IS_ENABLED(CONFIG_HAVE_CLK))
 		return string_nocheck(buf, end, "(%pC?)", spec);
 
-	if (!clk)
-		return string(buf, end, NULL, spec);
+	if (check_pointer(&buf, end, clk, spec))
+		return buf;
 
 	switch (fmt[1]) {
 	case 'n':
@@ -1751,6 +1824,9 @@ char *flags_string(char *buf, char *end, void *flags_ptr,
 	unsigned long flags;
 	const struct trace_print_flags *names;
 
+	if (check_pointer(&buf, end, flags_ptr, spec))
+		return buf;
+
 	switch (fmt[1]) {
 	case 'p':
 		flags = *(unsigned long *)flags_ptr;
@@ -1825,8 +1901,8 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
 	if (!IS_ENABLED(CONFIG_OF))
 		return string_nocheck(buf, end, "(%pOF?)", spec);
 
-	if ((unsigned long)dn < PAGE_SIZE)
-		return string_nocheck(buf, end, "(null)", spec);
+	if (check_pointer(&buf, end, dn, spec))
+		return buf;
 
 	/* simple case without anything any more format specifiers */
 	fmt++;
@@ -2021,18 +2097,6 @@ static noinline_for_stack
 char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 	      struct printf_spec spec)
 {
-	const int default_width = 2 * sizeof(void *);
-
-	if (!ptr && *fmt != 'K' && *fmt != 'x') {
-		/*
-		 * Print (null) with the same width as a pointer so it makes
-		 * tabular output look nice.
-		 */
-		if (spec.field_width == -1)
-			spec.field_width = default_width;
-		return string_nocheck(buf, end, "(null)", spec);
-	}
-
 	switch (*fmt) {
 	case 'F':
 	case 'f':
@@ -2074,13 +2138,13 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 	case 'U':
 		return uuid_string(buf, end, ptr, spec, fmt);
 	case 'V':
-		return va_format(buf, end, ptr);
+		return va_format(buf, end, ptr, spec, fmt);
 	case 'K':
 		return restricted_pointer(buf, end, ptr, spec);
 	case 'N':
 		return netdev_bits(buf, end, ptr, spec, fmt);
 	case 'a':
-		return address_val(buf, end, ptr, fmt);
+		return address_val(buf, end, ptr, spec, fmt);
 	case 'd':
 		return dentry_name(buf, end, ptr, spec, fmt);
 	case 't':
@@ -2714,11 +2778,13 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
 
 		case FORMAT_TYPE_STR: {
 			const char *save_str = va_arg(args, char *);
+			const char *err_msg;
 			size_t len;
 
-			if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE
-					|| (unsigned long)save_str < PAGE_SIZE)
-				save_str = "(null)";
+			err_msg = check_pointer_msg(save_str);
+			if (err_msg)
+				save_str = err_msg;
+
 			len = strlen(save_str) + 1;
 			if (str + len < end)
 				memcpy(str, save_str, len);
-- 
cgit 


From 635720ac75a51092b456bed517ff170047883252 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 17 Apr 2019 13:53:49 +0200
Subject: vsprintf: Avoid confusion between invalid address and value

We are able to detect invalid values handled by %p[iI] printk specifier.
The current error message is "invalid address". It might cause confusion
against "(efault)" reported by the generic valid_pointer_address() check.

Let's unify the style and use the more appropriate error code description
"(einval)".

Link: http://lkml.kernel.org/r/20190417115350.20479-10-pmladek@suse.com
To: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Tobin C . Harding" <me@tobin.cc>
Cc: Joe Perches <joe@perches.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 lib/vsprintf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index b989f1e8f35b..4e5666035b74 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1511,7 +1511,7 @@ char *ip_addr_string(char *buf, char *end, const void *ptr,
 		case AF_INET6:
 			return ip6_addr_string_sa(buf, end, &sa->v6, spec, fmt);
 		default:
-			return string_nocheck(buf, end, "(invalid address)", spec);
+			return string_nocheck(buf, end, "(einval)", spec);
 		}}
 	}
 
-- 
cgit 


From c8c3b584343cb7522fc00322769a9f288305743f Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 17 Apr 2019 13:53:50 +0200
Subject: vsprintf: Limit the length of inlined error messages

The inlined error messages must be used carefully because
they need to fit into the given buffer.

Handle them using a custom wrapper that makes people aware
of the problem. Also define a reasonable hard limit to
avoid a completely insane usage.

Suggested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Link: http://lkml.kernel.org/r/20190417115350.20479-11-pmladek@suse.com
To: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Tobin C . Harding" <me@tobin.cc>
Cc: Joe Perches <joe@perches.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 lib/vsprintf.c | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 4e5666035b74..1f367f3a7e2b 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -612,6 +612,21 @@ static char *string_nocheck(char *buf, char *end, const char *s,
 	return widen_string(buf, len, end, spec);
 }
 
+/* Be careful: error messages must fit into the given buffer. */
+static char *error_string(char *buf, char *end, const char *s,
+			  struct printf_spec spec)
+{
+	/*
+	 * Hard limit to avoid a completely insane messages. It actually
+	 * works pretty well because most error messages are in
+	 * the many pointer format modifiers.
+	 */
+	if (spec.precision == -1)
+		spec.precision = 2 * sizeof(void *);
+
+	return string_nocheck(buf, end, s, spec);
+}
+
 /*
  * This is not a fool-proof test. 99% of the time that this will fault is
  * due to a bad pointer, not one that crosses into bad memory. Just test
@@ -638,7 +653,7 @@ static int check_pointer(char **buf, char *end, const void *ptr,
 
 	err_msg = check_pointer_msg(ptr);
 	if (err_msg) {
-		*buf = string_nocheck(*buf, end, err_msg, spec);
+		*buf = error_string(*buf, end, err_msg, spec);
 		return -EFAULT;
 	}
 
@@ -741,7 +756,7 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr,
 	if (static_branch_unlikely(&not_filled_random_ptr_key)) {
 		spec.field_width = 2 * sizeof(ptr);
 		/* string length must be less than default_width */
-		return string_nocheck(buf, end, str, spec);
+		return error_string(buf, end, str, spec);
 	}
 
 #ifdef CONFIG_64BIT
@@ -777,7 +792,7 @@ char *restricted_pointer(char *buf, char *end, const void *ptr,
 		if (in_irq() || in_serving_softirq() || in_nmi()) {
 			if (spec.field_width == -1)
 				spec.field_width = 2 * sizeof(ptr);
-			return string_nocheck(buf, end, "pK-error", spec);
+			return error_string(buf, end, "pK-error", spec);
 		}
 
 		/*
@@ -1511,12 +1526,12 @@ char *ip_addr_string(char *buf, char *end, const void *ptr,
 		case AF_INET6:
 			return ip6_addr_string_sa(buf, end, &sa->v6, spec, fmt);
 		default:
-			return string_nocheck(buf, end, "(einval)", spec);
+			return error_string(buf, end, "(einval)", spec);
 		}}
 	}
 
 	err_fmt_msg = fmt[0] == 'i' ? "(%pi?)" : "(%pI?)";
-	return string_nocheck(buf, end, err_fmt_msg, spec);
+	return error_string(buf, end, err_fmt_msg, spec);
 }
 
 static noinline_for_stack
@@ -1653,7 +1668,7 @@ char *netdev_bits(char *buf, char *end, const void *addr,
 		size = sizeof(netdev_features_t);
 		break;
 	default:
-		return string_nocheck(buf, end, "(%pN?)", spec);
+		return error_string(buf, end, "(%pN?)", spec);
 	}
 
 	return special_hex_number(buf, end, num, size);
@@ -1765,7 +1780,7 @@ char *time_and_date(char *buf, char *end, void *ptr, struct printf_spec spec,
 	case 'R':
 		return rtc_str(buf, end, (const struct rtc_time *)ptr, spec, fmt);
 	default:
-		return string_nocheck(buf, end, "(%ptR?)", spec);
+		return error_string(buf, end, "(%ptR?)", spec);
 	}
 }
 
@@ -1774,7 +1789,7 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
 	    const char *fmt)
 {
 	if (!IS_ENABLED(CONFIG_HAVE_CLK))
-		return string_nocheck(buf, end, "(%pC?)", spec);
+		return error_string(buf, end, "(%pC?)", spec);
 
 	if (check_pointer(&buf, end, clk, spec))
 		return buf;
@@ -1785,7 +1800,7 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
 #ifdef CONFIG_COMMON_CLK
 		return string(buf, end, __clk_get_name(clk), spec);
 #else
-		return string_nocheck(buf, end, "(%pC?)", spec);
+		return error_string(buf, end, "(%pC?)", spec);
 #endif
 	}
 }
@@ -1843,7 +1858,7 @@ char *flags_string(char *buf, char *end, void *flags_ptr,
 		names = gfpflag_names;
 		break;
 	default:
-		return string_nocheck(buf, end, "(%pG?)", spec);
+		return error_string(buf, end, "(%pG?)", spec);
 	}
 
 	return format_flags(buf, end, flags, names);
@@ -1899,7 +1914,7 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
 	str_spec.field_width = -1;
 
 	if (!IS_ENABLED(CONFIG_OF))
-		return string_nocheck(buf, end, "(%pOF?)", spec);
+		return error_string(buf, end, "(%pOF?)", spec);
 
 	if (check_pointer(&buf, end, dn, spec))
 		return buf;
@@ -1978,7 +1993,7 @@ static char *kobject_string(char *buf, char *end, void *ptr,
 		return device_node_string(buf, end, ptr, spec, fmt + 1);
 	}
 
-	return string_nocheck(buf, end, "(%pO?)", spec);
+	return error_string(buf, end, "(%pO?)", spec);
 }
 
 /*
-- 
cgit 


From 6f455f5f4e9c28aefaefbe18ce7304b499645d75 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 26 Apr 2019 14:07:27 +0200
Subject: netlink: add NLA_MIN_LEN

Rather than using NLA_UNSPEC for this type of thing, use NLA_MIN_LEN
so we can make NLA_UNSPEC be NLA_REJECT under certain conditions for
future attributes.

While at it, also use NLA_EXACT_LEN for the struct example.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/nlattr.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/nlattr.c b/lib/nlattr.c
index d26de6156b97..465c9e8ef8a5 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -278,10 +278,17 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 			}
 		}
 		break;
+
+	case NLA_UNSPEC:
+	case NLA_MIN_LEN:
+		if (attrlen < pt->len)
+			goto out_err;
+		break;
+
 	default:
 		if (pt->len)
 			minlen = pt->len;
-		else if (pt->type != NLA_UNSPEC)
+		else
 			minlen = nla_attr_minlen[pt->type];
 
 		if (attrlen < minlen)
-- 
cgit 


From 8cb081746c031fb164089322e2336a0bf5b3070c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 26 Apr 2019 14:07:28 +0200
Subject: netlink: make validation more configurable for future strictness

We currently have two levels of strict validation:

 1) liberal (default)
     - undefined (type >= max) & NLA_UNSPEC attributes accepted
     - attribute length >= expected accepted
     - garbage at end of message accepted
 2) strict (opt-in)
     - NLA_UNSPEC attributes accepted
     - attribute length >= expected accepted

Split out parsing strictness into four different options:
 * TRAILING     - check that there's no trailing data after parsing
                  attributes (in message or nested)
 * MAXTYPE      - reject attrs > max known type
 * UNSPEC       - reject attributes with NLA_UNSPEC policy entries
 * STRICT_ATTRS - strictly validate attribute size

The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().

Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.

We end up with the following renames:
 * nla_parse           -> nla_parse_deprecated
 * nla_parse_strict    -> nla_parse_deprecated_strict
 * nlmsg_parse         -> nlmsg_parse_deprecated
 * nlmsg_parse_strict  -> nlmsg_parse_deprecated_strict
 * nla_parse_nested    -> nla_parse_nested_deprecated
 * nla_validate_nested -> nla_validate_nested_deprecated

Using spatch, of course:
    @@
    expression TB, MAX, HEAD, LEN, POL, EXT;
    @@
    -nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
    +nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)

    @@
    expression NLH, HDRLEN, TB, MAX, POL, EXT;
    @@
    -nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
    +nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)

    @@
    expression NLH, HDRLEN, TB, MAX, POL, EXT;
    @@
    -nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
    +nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)

    @@
    expression TB, MAX, NLA, POL, EXT;
    @@
    -nla_parse_nested(TB, MAX, NLA, POL, EXT)
    +nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)

    @@
    expression START, MAX, POL, EXT;
    @@
    -nla_validate_nested(START, MAX, POL, EXT)
    +nla_validate_nested_deprecated(START, MAX, POL, EXT)

    @@
    expression NLH, HDRLEN, MAX, POL, EXT;
    @@
    -nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
    +nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)

For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.

Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.

Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.

In effect then, this adds fully strict validation for any new command.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/nlattr.c | 171 ++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 88 insertions(+), 83 deletions(-)

(limited to 'lib')

diff --git a/lib/nlattr.c b/lib/nlattr.c
index 465c9e8ef8a5..af0f8b0309c6 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -69,7 +69,8 @@ static int validate_nla_bitfield32(const struct nlattr *nla,
 
 static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
 			      const struct nla_policy *policy,
-			      struct netlink_ext_ack *extack)
+			      struct netlink_ext_ack *extack,
+			      unsigned int validate)
 {
 	const struct nlattr *entry;
 	int rem;
@@ -86,8 +87,8 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
 			return -ERANGE;
 		}
 
-		ret = nla_validate(nla_data(entry), nla_len(entry),
-				   maxtype, policy, extack);
+		ret = __nla_validate(nla_data(entry), nla_len(entry),
+				     maxtype, policy, validate, extack);
 		if (ret < 0)
 			return ret;
 	}
@@ -154,7 +155,7 @@ static int nla_validate_int_range(const struct nla_policy *pt,
 }
 
 static int validate_nla(const struct nlattr *nla, int maxtype,
-			const struct nla_policy *policy,
+			const struct nla_policy *policy, unsigned int validate,
 			struct netlink_ext_ack *extack)
 {
 	const struct nla_policy *pt;
@@ -172,6 +173,11 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 	    (pt->type == NLA_EXACT_LEN_WARN && attrlen != pt->len)) {
 		pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
 				    current->comm, type);
+		if (validate & NL_VALIDATE_STRICT_ATTRS) {
+			NL_SET_ERR_MSG_ATTR(extack, nla,
+					    "invalid attribute length");
+			return -EINVAL;
+		}
 	}
 
 	switch (pt->type) {
@@ -244,8 +250,9 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 		if (attrlen < NLA_HDRLEN)
 			goto out_err;
 		if (pt->validation_data) {
-			err = nla_validate(nla_data(nla), nla_len(nla), pt->len,
-					   pt->validation_data, extack);
+			err = __nla_validate(nla_data(nla), nla_len(nla), pt->len,
+					     pt->validation_data, validate,
+					     extack);
 			if (err < 0) {
 				/*
 				 * return directly to preserve the inner
@@ -268,7 +275,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 
 			err = nla_validate_array(nla_data(nla), nla_len(nla),
 						 pt->len, pt->validation_data,
-						 extack);
+						 extack, validate);
 			if (err < 0) {
 				/*
 				 * return directly to preserve the inner
@@ -280,6 +287,12 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 		break;
 
 	case NLA_UNSPEC:
+		if (validate & NL_VALIDATE_UNSPEC) {
+			NL_SET_ERR_MSG_ATTR(extack, nla,
+					    "Unsupported attribute");
+			return -EINVAL;
+		}
+		/* fall through */
 	case NLA_MIN_LEN:
 		if (attrlen < pt->len)
 			goto out_err;
@@ -322,37 +335,75 @@ out_err:
 	return err;
 }
 
+static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype,
+				const struct nla_policy *policy,
+				unsigned int validate,
+				struct netlink_ext_ack *extack,
+				struct nlattr **tb)
+{
+	const struct nlattr *nla;
+	int rem;
+
+	if (tb)
+		memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+
+	nla_for_each_attr(nla, head, len, rem) {
+		u16 type = nla_type(nla);
+
+		if (type == 0 || type > maxtype) {
+			if (validate & NL_VALIDATE_MAXTYPE) {
+				NL_SET_ERR_MSG(extack, "Unknown attribute type");
+				return -EINVAL;
+			}
+			continue;
+		}
+		if (policy) {
+			int err = validate_nla(nla, maxtype, policy,
+					       validate, extack);
+
+			if (err < 0)
+				return err;
+		}
+
+		if (tb)
+			tb[type] = (struct nlattr *)nla;
+	}
+
+	if (unlikely(rem > 0)) {
+		pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n",
+				    rem, current->comm);
+		NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes");
+		if (validate & NL_VALIDATE_TRAILING)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
- * nla_validate - Validate a stream of attributes
+ * __nla_validate - Validate a stream of attributes
  * @head: head of attribute stream
  * @len: length of attribute stream
  * @maxtype: maximum attribute type to be expected
  * @policy: validation policy
+ * @validate: validation strictness
  * @extack: extended ACK report struct
  *
  * Validates all attributes in the specified attribute stream against the
- * specified policy. Attributes with a type exceeding maxtype will be
- * ignored. See documenation of struct nla_policy for more details.
+ * specified policy. Validation depends on the validate flags passed, see
+ * &enum netlink_validation for more details on that.
+ * See documenation of struct nla_policy for more details.
  *
  * Returns 0 on success or a negative error code.
  */
-int nla_validate(const struct nlattr *head, int len, int maxtype,
-		 const struct nla_policy *policy,
-		 struct netlink_ext_ack *extack)
+int __nla_validate(const struct nlattr *head, int len, int maxtype,
+		   const struct nla_policy *policy, unsigned int validate,
+		   struct netlink_ext_ack *extack)
 {
-	const struct nlattr *nla;
-	int rem;
-
-	nla_for_each_attr(nla, head, len, rem) {
-		int err = validate_nla(nla, maxtype, policy, extack);
-
-		if (err < 0)
-			return err;
-	}
-
-	return 0;
+	return __nla_validate_parse(head, len, maxtype, policy, validate,
+				    extack, NULL);
 }
-EXPORT_SYMBOL(nla_validate);
+EXPORT_SYMBOL(__nla_validate);
 
 /**
  * nla_policy_len - Determin the max. length of a policy
@@ -384,76 +435,30 @@ nla_policy_len(const struct nla_policy *p, int n)
 EXPORT_SYMBOL(nla_policy_len);
 
 /**
- * nla_parse - Parse a stream of attributes into a tb buffer
+ * __nla_parse - Parse a stream of attributes into a tb buffer
  * @tb: destination array with maxtype+1 elements
  * @maxtype: maximum attribute type to be expected
  * @head: head of attribute stream
  * @len: length of attribute stream
  * @policy: validation policy
+ * @validate: validation strictness
+ * @extack: extended ACK pointer
  *
  * Parses a stream of attributes and stores a pointer to each attribute in
- * the tb array accessible via the attribute type. Attributes with a type
- * exceeding maxtype will be silently ignored for backwards compatibility
- * reasons. policy may be set to NULL if no validation is required.
+ * the tb array accessible via the attribute type.
+ * Validation is controlled by the @validate parameter.
  *
  * Returns 0 on success or a negative error code.
  */
-static int __nla_parse(struct nlattr **tb, int maxtype,
-		       const struct nlattr *head, int len,
-		       bool strict, const struct nla_policy *policy,
-		       struct netlink_ext_ack *extack)
-{
-	const struct nlattr *nla;
-	int rem;
-
-	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
-
-	nla_for_each_attr(nla, head, len, rem) {
-		u16 type = nla_type(nla);
-
-		if (type == 0 || type > maxtype) {
-			if (strict) {
-				NL_SET_ERR_MSG(extack, "Unknown attribute type");
-				return -EINVAL;
-			}
-			continue;
-		}
-		if (policy) {
-			int err = validate_nla(nla, maxtype, policy, extack);
-
-			if (err < 0)
-				return err;
-		}
-
-		tb[type] = (struct nlattr *)nla;
-	}
-
-	if (unlikely(rem > 0)) {
-		pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n",
-				    rem, current->comm);
-		NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes");
-		if (strict)
-			return -EINVAL;
-	}
-
-	return 0;
-}
-
-int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
-	      int len, const struct nla_policy *policy,
-	      struct netlink_ext_ack *extack)
-{
-	return __nla_parse(tb, maxtype, head, len, false, policy, extack);
-}
-EXPORT_SYMBOL(nla_parse);
-
-int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head,
-		     int len, const struct nla_policy *policy,
-		     struct netlink_ext_ack *extack)
+int __nla_parse(struct nlattr **tb, int maxtype,
+		const struct nlattr *head, int len,
+		const struct nla_policy *policy, unsigned int validate,
+		struct netlink_ext_ack *extack)
 {
-	return __nla_parse(tb, maxtype, head, len, true, policy, extack);
+	return __nla_validate_parse(head, len, maxtype, policy, validate,
+				    extack, tb);
 }
-EXPORT_SYMBOL(nla_parse_strict);
+EXPORT_SYMBOL(__nla_parse);
 
 /**
  * nla_find - Find a specific attribute in a stream of attributes
-- 
cgit 


From 56738f460841761abc70347c919d5c45f6f05a42 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 26 Apr 2019 14:07:30 +0200
Subject: netlink: add strict parsing for future attributes

Unfortunately, we cannot add strict parsing for all attributes, as
that would break existing userspace. We currently warn about it, but
that's about all we can do.

For new attributes, however, the story is better: nobody is using
them, so we can reject bad sizes.

Also, for new attributes, we need not accept them when the policy
doesn't declare their usage.

David Ahern and I went back and forth on how to best encode this, and
the best way we found was to have a "boundary type", from which point
on new attributes have all possible validation applied, and NLA_UNSPEC
is rejected.

As we didn't want to add another argument to all functions that get a
netlink policy, the workaround is to encode that boundary in the first
entry of the policy array (which is for type 0 and thus probably not
really valid anyway). I put it into the validation union for the rare
possibility that somebody is actually using attribute 0, which would
continue to work fine unless they tried to use the extended validation,
which isn't likely. We also didn't find any in-tree users with type 0.

The reason for setting the "start strict here" attribute is that we
never really need to start strict from 0, which is invalid anyway (or
in legacy families where that isn't true, it cannot be set to strict),
so we can thus reserve the value 0 for "don't do this check" and don't
have to add the tag to all policies right now.

Thus, policies can now opt in to this validation, which we should do
for all existing policies, at least when adding new attributes.

Note that entirely *new* policies won't need to set it, as the use
of that should be using nla_parse()/nlmsg_parse() etc. which anyway
do fully strict validation now, regardless of this.

So in effect, this patch only covers the "existing command with new
attribute" case.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/nlattr.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'lib')

diff --git a/lib/nlattr.c b/lib/nlattr.c
index af0f8b0309c6..29f6336e2422 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -158,10 +158,14 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 			const struct nla_policy *policy, unsigned int validate,
 			struct netlink_ext_ack *extack)
 {
+	u16 strict_start_type = policy[0].strict_start_type;
 	const struct nla_policy *pt;
 	int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla);
 	int err = -ERANGE;
 
+	if (strict_start_type && type >= strict_start_type)
+		validate |= NL_VALIDATE_STRICT;
+
 	if (type <= 0 || type > maxtype)
 		return 0;
 
-- 
cgit 


From 92067f843854be0eef1e41ff00cb465247a83c42 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Sun, 28 Apr 2019 10:48:10 +1000
Subject: kobject: Improve docs for kobject_add/del

There is currently some confusion on how to wind back
kobject_init_and_add() during the error paths in code that uses this
function.

Add documentation to kobject_add() and kobject_del() to help clarify the
usage.

Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/kobject.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index ede40005db28..c97b5729f94d 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -406,15 +406,19 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj,
  * is assigned to the kobject, then the kobject will be located in the
  * root of the sysfs tree.
  *
- * If this function returns an error, kobject_put() must be called to
- * properly clean up the memory associated with the object.
- * Under no instance should the kobject that is passed to this function
- * be directly freed with a call to kfree(), that can leak memory.
- *
  * Note, no "add" uevent will be created with this call, the caller should set
  * up all of the necessary sysfs files for the object and then call
  * kobject_uevent() with the UEVENT_ADD parameter to ensure that
  * userspace is properly notified of this kobject's creation.
+ *
+ * Return: If this function returns an error, kobject_put() must be
+ *         called to properly clean up the memory associated with the
+ *         object.  Under no instance should the kobject that is passed
+ *         to this function be directly freed with a call to kfree(),
+ *         that can leak memory.
+ *
+ *         If this call returns successfully and you later need to unwind
+ *         kobject_add() for the error path you should call kobject_del().
  */
 int kobject_add(struct kobject *kobj, struct kobject *parent,
 		const char *fmt, ...)
@@ -589,6 +593,9 @@ EXPORT_SYMBOL_GPL(kobject_move);
 /**
  * kobject_del - unlink kobject from hierarchy.
  * @kobj: object.
+ *
+ * This is the function that should be called to delete an object
+ * successfully added via kobject_add().
  */
 void kobject_del(struct kobject *kobj)
 {
-- 
cgit 


From 1fd7c3b438a2e4741435ed4d45546c03abf045b2 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Sun, 28 Apr 2019 09:56:52 +1000
Subject: kobject: Improve doc clarity kobject_init_and_add()

Function kobject_init_and_add() is currently misused in a number of
places in the kernel.  On error return kobject_put() must be called but
is at times not.

Make the function documentation more explicit about calling
kobject_put() in the error path.

Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/kobject.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index c97b5729f94d..a30ee0467942 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -450,9 +450,12 @@ EXPORT_SYMBOL(kobject_add);
  * @parent: pointer to the parent of this kobject.
  * @fmt: the name of the kobject.
  *
- * This function combines the call to kobject_init() and
- * kobject_add().  The same type of error handling after a call to
- * kobject_add() and kobject lifetime rules are the same here.
+ * This function combines the call to kobject_init() and kobject_add().
+ *
+ * If this function returns an error, kobject_put() must be called to
+ * properly clean up the memory associated with the object.  This is the
+ * same type of error handling after a call to kobject_add() and kobject
+ * lifetime rules are the same here.
  */
 int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
 			 struct kobject *parent, const char *fmt, ...)
-- 
cgit 


From ce9d3eceb7ffb74445a8d892ca0685395a93a7e2 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 27 Apr 2019 00:46:30 +0800
Subject: lib/vsprintf: Make function pointer_string static

Fix sparse warning:

lib/vsprintf.c:673:6: warning:
 symbol 'pointer_string' was not declared. Should it be static?

Link: http://lkml.kernel.org/r/20190426164630.22104-1-yuehaibing@huawei.com
To: <sergey.senozhatsky@gmail.com>
To: <andriy.shevchenko@linux.intel.com>
To: <geert+renesas@glider.be>
To: <rostedt@goodmis.org>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 lib/vsprintf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 1f367f3a7e2b..7b0a6140bfad 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -670,8 +670,9 @@ char *string(char *buf, char *end, const char *s,
 	return string_nocheck(buf, end, s, spec);
 }
 
-char *pointer_string(char *buf, char *end, const void *ptr,
-		     struct printf_spec spec)
+static char *pointer_string(char *buf, char *end,
+			    const void *ptr,
+			    struct printf_spec spec)
 {
 	spec.base = 16;
 	spec.flags |= SMALL;
-- 
cgit 


From 3d378dc713f3ff4951d9a50c6a815f011e59da10 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 1 May 2019 13:43:17 +0100
Subject: kobject: fix dereference before null check on kobj

The kobj pointer is being null-checked so potentially it could be null,
however, the ktype declaration before the null check is dereferencing kobj
hence we have a potential null pointer deference. Fix this by moving the
assignment of ktype after kobj has been null checked.

Addresses-Coverity: ("Dereference before null check")
Fixes: aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/kobject.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index a30ee0467942..095bcb55c2ba 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -603,12 +603,13 @@ EXPORT_SYMBOL_GPL(kobject_move);
 void kobject_del(struct kobject *kobj)
 {
 	struct kernfs_node *sd;
-	const struct kobj_type *ktype = get_ktype(kobj);
+	const struct kobj_type *ktype;
 
 	if (!kobj)
 		return;
 
 	sd = kobj->sd;
+	ktype = get_ktype(kobj);
 
 	if (ktype)
 		sysfs_remove_groups(kobj, ktype->default_groups);
-- 
cgit 


From 923abb9d797ba078f4e9eb3734dd71be5f567a2a Mon Sep 17 00:00:00 2001
From: Gal Pressman <galpress@amazon.com>
Date: Wed, 1 May 2019 13:48:13 +0300
Subject: RDMA/core: Introduce RDMA subsystem ibdev_* print functions

Similarly to dev/netdev/etc printk helpers, add standard printk helpers
for the RDMA subsystem.

Example output:
efa 0000:00:06.0 efa_0: Hello World!
efa_0: Hello World! (no parent device set)
(NULL ib_device): Hello World! (ibdev is NULL)

Cc: Jason Baron <jbaron@akamai.com>
Suggested-by: Jason Gunthorpe <jgg@ziepe.ca>
Suggested-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Gal Pressman <galpress@amazon.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Shiraz Saleem <shiraz.saleem@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 lib/dynamic_debug.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'lib')

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 7bdf98c37e91..8a16c2d498e9 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -37,6 +37,8 @@
 #include <linux/device.h>
 #include <linux/netdevice.h>
 
+#include <rdma/ib_verbs.h>
+
 extern struct _ddebug __start___verbose[];
 extern struct _ddebug __stop___verbose[];
 
@@ -636,6 +638,41 @@ EXPORT_SYMBOL(__dynamic_netdev_dbg);
 
 #endif
 
+#if IS_ENABLED(CONFIG_INFINIBAND)
+
+void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
+			 const struct ib_device *ibdev, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	if (ibdev && ibdev->dev.parent) {
+		char buf[PREFIX_SIZE];
+
+		dev_printk_emit(LOGLEVEL_DEBUG, ibdev->dev.parent,
+				"%s%s %s %s: %pV",
+				dynamic_emit_prefix(descriptor, buf),
+				dev_driver_string(ibdev->dev.parent),
+				dev_name(ibdev->dev.parent),
+				dev_name(&ibdev->dev),
+				&vaf);
+	} else if (ibdev) {
+		printk(KERN_DEBUG "%s: %pV", dev_name(&ibdev->dev), &vaf);
+	} else {
+		printk(KERN_DEBUG "(NULL ib_device): %pV", &vaf);
+	}
+
+	va_end(args);
+}
+EXPORT_SYMBOL(__dynamic_ibdev_dbg);
+
+#endif
+
 #define DDEBUG_STRING_SIZE 1024
 static __initdata char ddebug_setup_string[DDEBUG_STRING_SIZE];
 
-- 
cgit 


From 8fd7c302b37099670b5d793375da10a40da7edf5 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Thu, 2 May 2019 12:31:39 +1000
Subject: kobject: Remove docstring reference to kset

Currently the docstring for kobject_get_path() mentions 'kset'.  The
kset is not used in the function callchain starting from this function.

Remove docstring reference to kset from the function kobject_get_path().

Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/kobject.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index 095bcb55c2ba..09e3ac329e92 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -162,12 +162,11 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length)
 }
 
 /**
- * kobject_get_path - generate and return the path associated with a given kobj and kset pair.
- *
+ * kobject_get_path() - Allocate memory and fill in the path for @kobj.
  * @kobj:	kobject in question, with which to build the path
  * @gfp_mask:	the allocation type used to allocate the path
  *
- * The result must be freed by the caller with kfree().
+ * Return: The newly allocated memory, caller must free with kfree().
  */
 char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask)
 {
-- 
cgit 


From ed856349dc0886a97e91fd4ce6ba5ff5312fc0f1 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Thu, 2 May 2019 12:31:40 +1000
Subject: kobject: Fix kernel-doc comment first line

kernel-doc comments have a prescribed format.  This includes parenthesis
on the function name.  To be _particularly_ correct we should also
capitalise the brief description and terminate it with a period.

In preparation for adding/updating kernel-doc function comments clean up
the ones currently present.

Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/kobject.c | 43 ++++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index 09e3ac329e92..3f4b7e95b0c2 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -18,7 +18,7 @@
 #include <linux/random.h>
 
 /**
- * kobject_namespace - return @kobj's namespace tag
+ * kobject_namespace() - Return @kobj's namespace tag.
  * @kobj: kobject in question
  *
  * Returns namespace tag of @kobj if its parent has namespace ops enabled
@@ -36,7 +36,7 @@ const void *kobject_namespace(struct kobject *kobj)
 }
 
 /**
- * kobject_get_ownership - get sysfs ownership data for @kobj
+ * kobject_get_ownership() - Get sysfs ownership data for @kobj.
  * @kobj: kobject in question
  * @uid: kernel user ID for sysfs objects
  * @gid: kernel group ID for sysfs objects
@@ -273,7 +273,7 @@ static int kobject_add_internal(struct kobject *kobj)
 }
 
 /**
- * kobject_set_name_vargs - Set the name of an kobject
+ * kobject_set_name_vargs() - Set the name of a kobject.
  * @kobj: struct kobject to set the name of
  * @fmt: format string used to build the name
  * @vargs: vargs to format the string.
@@ -313,7 +313,7 @@ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt,
 }
 
 /**
- * kobject_set_name - Set the name of a kobject
+ * kobject_set_name() - Set the name of a kobject.
  * @kobj: struct kobject to set the name of
  * @fmt: format string used to build the name
  *
@@ -335,7 +335,7 @@ int kobject_set_name(struct kobject *kobj, const char *fmt, ...)
 EXPORT_SYMBOL(kobject_set_name);
 
 /**
- * kobject_init - initialize a kobject structure
+ * kobject_init() - Initialize a kobject structure.
  * @kobj: pointer to the kobject to initialize
  * @ktype: pointer to the ktype for this kobject.
  *
@@ -391,7 +391,7 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj,
 }
 
 /**
- * kobject_add - the main kobject add function
+ * kobject_add() - The main kobject add function.
  * @kobj: the kobject to add
  * @parent: pointer to the parent of the kobject.
  * @fmt: format to name the kobject with.
@@ -443,7 +443,8 @@ int kobject_add(struct kobject *kobj, struct kobject *parent,
 EXPORT_SYMBOL(kobject_add);
 
 /**
- * kobject_init_and_add - initialize a kobject structure and add it to the kobject hierarchy
+ * kobject_init_and_add() - Initialize a kobject structure and add it to
+ *                          the kobject hierarchy.
  * @kobj: pointer to the kobject to initialize
  * @ktype: pointer to the ktype for this kobject.
  * @parent: pointer to the parent of this kobject.
@@ -473,7 +474,7 @@ int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
 EXPORT_SYMBOL_GPL(kobject_init_and_add);
 
 /**
- * kobject_rename - change the name of an object
+ * kobject_rename() - Change the name of an object.
  * @kobj: object in question.
  * @new_name: object's new name
  *
@@ -540,7 +541,7 @@ out:
 EXPORT_SYMBOL_GPL(kobject_rename);
 
 /**
- * kobject_move - move object to another parent
+ * kobject_move() - Move object to another parent.
  * @kobj: object in question.
  * @new_parent: object's new parent (can be NULL)
  */
@@ -593,7 +594,7 @@ out:
 EXPORT_SYMBOL_GPL(kobject_move);
 
 /**
- * kobject_del - unlink kobject from hierarchy.
+ * kobject_del() - Unlink kobject from hierarchy.
  * @kobj: object.
  *
  * This is the function that should be called to delete an object
@@ -624,7 +625,7 @@ void kobject_del(struct kobject *kobj)
 EXPORT_SYMBOL(kobject_del);
 
 /**
- * kobject_get - increment refcount for object.
+ * kobject_get() - Increment refcount for object.
  * @kobj: object.
  */
 struct kobject *kobject_get(struct kobject *kobj)
@@ -717,7 +718,7 @@ static void kobject_release(struct kref *kref)
 }
 
 /**
- * kobject_put - decrement refcount for object.
+ * kobject_put() - Decrement refcount for object.
  * @kobj: object.
  *
  * Decrement the refcount, and if 0, call kobject_cleanup().
@@ -746,7 +747,7 @@ static struct kobj_type dynamic_kobj_ktype = {
 };
 
 /**
- * kobject_create - create a struct kobject dynamically
+ * kobject_create() - Create a struct kobject dynamically.
  *
  * This function creates a kobject structure dynamically and sets it up
  * to be a "dynamic" kobject with a default release function set up.
@@ -769,8 +770,8 @@ struct kobject *kobject_create(void)
 }
 
 /**
- * kobject_create_and_add - create a struct kobject dynamically and register it with sysfs
- *
+ * kobject_create_and_add() - Create a struct kobject dynamically and
+ *                            register it with sysfs.
  * @name: the name for the kobject
  * @parent: the parent kobject of this kobject, if any.
  *
@@ -801,7 +802,7 @@ struct kobject *kobject_create_and_add(const char *name, struct kobject *parent)
 EXPORT_SYMBOL_GPL(kobject_create_and_add);
 
 /**
- * kset_init - initialize a kset for use
+ * kset_init() - Initialize a kset for use.
  * @k: kset
  */
 void kset_init(struct kset *k)
@@ -843,7 +844,7 @@ const struct sysfs_ops kobj_sysfs_ops = {
 EXPORT_SYMBOL_GPL(kobj_sysfs_ops);
 
 /**
- * kset_register - initialize and add a kset.
+ * kset_register() - Initialize and add a kset.
  * @k: kset.
  */
 int kset_register(struct kset *k)
@@ -863,7 +864,7 @@ int kset_register(struct kset *k)
 EXPORT_SYMBOL(kset_register);
 
 /**
- * kset_unregister - remove a kset.
+ * kset_unregister() - Remove a kset.
  * @k: kset.
  */
 void kset_unregister(struct kset *k)
@@ -876,7 +877,7 @@ void kset_unregister(struct kset *k)
 EXPORT_SYMBOL(kset_unregister);
 
 /**
- * kset_find_obj - search for object in kset.
+ * kset_find_obj() - Search for object in kset.
  * @kset: kset we're looking in.
  * @name: object's name.
  *
@@ -924,7 +925,7 @@ static struct kobj_type kset_ktype = {
 };
 
 /**
- * kset_create - create a struct kset dynamically
+ * kset_create() - Create a struct kset dynamically.
  *
  * @name: the name for the kset
  * @uevent_ops: a struct kset_uevent_ops for the kset
@@ -968,7 +969,7 @@ static struct kset *kset_create(const char *name,
 }
 
 /**
- * kset_create_and_add - create a struct kset dynamically and add it to sysfs
+ * kset_create_and_add() - Create a struct kset dynamically and add it to sysfs.
  *
  * @name: the name for the kset
  * @uevent_ops: a struct kset_uevent_ops for the kset
-- 
cgit 


From 70e16a620e075cb916644e06012766639b58b2fb Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 2 May 2019 12:22:24 +0200
Subject: kobject: clean up the kobject add documentation a bit more

Commit 1fd7c3b438a2 ("kobject: Improve doc clarity kobject_init_and_add()")
tried to provide more clarity, but the reference to kobject_del() was
incorrect.  Fix that up by removing that line, and hopefully be more explicit
as to exactly what needs to happen here once you register a kobject with the
kobject core.

Acked-by: Tobin C. Harding <tobin@kernel.org>
Fixes: 1fd7c3b438a2 ("kobject: Improve doc clarity kobject_init_and_add()")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/kobject.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index 3f4b7e95b0c2..f2ccdbac8ed9 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -416,8 +416,12 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj,
  *         to this function be directly freed with a call to kfree(),
  *         that can leak memory.
  *
- *         If this call returns successfully and you later need to unwind
- *         kobject_add() for the error path you should call kobject_del().
+ *         If this function returns success, kobject_put() must also be called
+ *         in order to properly clean up the memory associated with the object.
+ *
+ *         In short, once this function is called, kobject_put() MUST be called
+ *         when the use of the object is finished in order to properly free
+ *         everything.
  */
 int kobject_add(struct kobject *kobj, struct kobject *parent,
 		const char *fmt, ...)
-- 
cgit 


From 554aae35007e49f533d3d10e788295f7141725bc Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Thu, 2 May 2019 23:23:29 +0300
Subject: lib: Add support for generic packing operations

This provides an unified API for accessing register bit fields
regardless of memory layout. The basic unit of data for these API
functions is the u64. The process of transforming an u64 from native CPU
encoding into the peripheral's encoding is called 'pack', and
transforming it from peripheral to native CPU encoding is 'unpack'.

Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/Kconfig   |  17 +++++
 lib/Makefile  |   1 +
 lib/packing.c | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 231 insertions(+)
 create mode 100644 lib/packing.c

(limited to 'lib')

diff --git a/lib/Kconfig b/lib/Kconfig
index a9e56539bd11..ac1fcf06d8ea 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -18,6 +18,23 @@ config RAID6_PQ_BENCHMARK
 	  Benchmark all available RAID6 PQ functions on init and choose the
 	  fastest one.
 
+config PACKING
+	bool "Generic bitfield packing and unpacking"
+	default n
+	help
+	  This option provides the packing() helper function, which permits
+	  converting bitfields between a CPU-usable representation and a
+	  memory representation that can have any combination of these quirks:
+	    - Is little endian (bytes are reversed within a 32-bit group)
+	    - The least-significant 32-bit word comes first (within a 64-bit
+	      group)
+	    - The most significant bit of a byte is at its right (bit 0 of a
+	      register description is numerically 2^7).
+	  Drivers may use these helpers to match the bit indices as described
+	  in the data sheets of the peripherals they are in control of.
+
+	  When in doubt, say N.
+
 config BITREVERSE
 	tristate
 
diff --git a/lib/Makefile b/lib/Makefile
index 3b08673e8881..7d4db18fabf1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -108,6 +108,7 @@ obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
 obj-$(CONFIG_BITREVERSE) += bitrev.o
+obj-$(CONFIG_PACKING)	+= packing.o
 obj-$(CONFIG_RATIONAL)	+= rational.o
 obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
 obj-$(CONFIG_CRC16)	+= crc16.o
diff --git a/lib/packing.c b/lib/packing.c
new file mode 100644
index 000000000000..50d1e9f2f5a7
--- /dev/null
+++ b/lib/packing.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2016-2018, NXP Semiconductors
+ * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
+ */
+#include <linux/packing.h>
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+
+static int get_le_offset(int offset)
+{
+	int closest_multiple_of_4;
+
+	closest_multiple_of_4 = (offset / 4) * 4;
+	offset -= closest_multiple_of_4;
+	return closest_multiple_of_4 + (3 - offset);
+}
+
+static int get_reverse_lsw32_offset(int offset, size_t len)
+{
+	int closest_multiple_of_4;
+	int word_index;
+
+	word_index = offset / 4;
+	closest_multiple_of_4 = word_index * 4;
+	offset -= closest_multiple_of_4;
+	word_index = (len / 4) - word_index - 1;
+	return word_index * 4 + offset;
+}
+
+static u64 bit_reverse(u64 val, unsigned int width)
+{
+	u64 new_val = 0;
+	unsigned int bit;
+	unsigned int i;
+
+	for (i = 0; i < width; i++) {
+		bit = (val & (1 << i)) != 0;
+		new_val |= (bit << (width - i - 1));
+	}
+	return new_val;
+}
+
+static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit,
+				       int *box_end_bit, u8 *box_mask)
+{
+	int box_bit_width = *box_start_bit - *box_end_bit + 1;
+	int new_box_start_bit, new_box_end_bit;
+
+	*to_write >>= *box_end_bit;
+	*to_write = bit_reverse(*to_write, box_bit_width);
+	*to_write <<= *box_end_bit;
+
+	new_box_end_bit   = box_bit_width - *box_start_bit - 1;
+	new_box_start_bit = box_bit_width - *box_end_bit - 1;
+	*box_mask = GENMASK_ULL(new_box_start_bit, new_box_end_bit);
+	*box_start_bit = new_box_start_bit;
+	*box_end_bit   = new_box_end_bit;
+}
+
+/**
+ * packing - Convert numbers (currently u64) between a packed and an unpacked
+ *	     format. Unpacked means laid out in memory in the CPU's native
+ *	     understanding of integers, while packed means anything else that
+ *	     requires translation.
+ *
+ * @pbuf: Pointer to a buffer holding the packed value.
+ * @uval: Pointer to an u64 holding the unpacked value.
+ * @startbit: The index (in logical notation, compensated for quirks) where
+ *	      the packed value starts within pbuf. Must be larger than, or
+ *	      equal to, endbit.
+ * @endbit: The index (in logical notation, compensated for quirks) where
+ *	    the packed value ends within pbuf. Must be smaller than, or equal
+ *	    to, startbit.
+ * @op: If PACK, then uval will be treated as const pointer and copied (packed)
+ *	into pbuf, between startbit and endbit.
+ *	If UNPACK, then pbuf will be treated as const pointer and the logical
+ *	value between startbit and endbit will be copied (unpacked) to uval.
+ * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and
+ *	    QUIRK_MSB_ON_THE_RIGHT.
+ *
+ * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming
+ *	   correct usage, return code may be discarded.
+ *	   If op is PACK, pbuf is modified.
+ *	   If op is UNPACK, uval is modified.
+ */
+int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
+	    enum packing_op op, u8 quirks)
+{
+	/* Number of bits for storing "uval"
+	 * also width of the field to access in the pbuf
+	 */
+	u64 value_width;
+	/* Logical byte indices corresponding to the
+	 * start and end of the field.
+	 */
+	int plogical_first_u8, plogical_last_u8, box;
+
+	/* startbit is expected to be larger than endbit */
+	if (startbit < endbit)
+		/* Invalid function call */
+		return -EINVAL;
+
+	value_width = startbit - endbit + 1;
+	if (value_width > 64)
+		return -ERANGE;
+
+	/* Check if "uval" fits in "value_width" bits.
+	 * If value_width is 64, the check will fail, but any
+	 * 64-bit uval will surely fit.
+	 */
+	if (op == PACK && value_width < 64 && (*uval >= (1ull << value_width)))
+		/* Cannot store "uval" inside "value_width" bits.
+		 * Truncating "uval" is most certainly not desirable,
+		 * so simply erroring out is appropriate.
+		 */
+		return -ERANGE;
+
+	/* Initialize parameter */
+	if (op == UNPACK)
+		*uval = 0;
+
+	/* Iterate through an idealistic view of the pbuf as an u64 with
+	 * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low
+	 * logical bit significance. "box" denotes the current logical u8.
+	 */
+	plogical_first_u8 = startbit / 8;
+	plogical_last_u8  = endbit / 8;
+
+	for (box = plogical_first_u8; box >= plogical_last_u8; box--) {
+		/* Bit indices into the currently accessed 8-bit box */
+		int box_start_bit, box_end_bit, box_addr;
+		u8  box_mask;
+		/* Corresponding bits from the unpacked u64 parameter */
+		int proj_start_bit, proj_end_bit;
+		u64 proj_mask;
+
+		/* This u8 may need to be accessed in its entirety
+		 * (from bit 7 to bit 0), or not, depending on the
+		 * input arguments startbit and endbit.
+		 */
+		if (box == plogical_first_u8)
+			box_start_bit = startbit % 8;
+		else
+			box_start_bit = 7;
+		if (box == plogical_last_u8)
+			box_end_bit = endbit % 8;
+		else
+			box_end_bit = 0;
+
+		/* We have determined the box bit start and end.
+		 * Now we calculate where this (masked) u8 box would fit
+		 * in the unpacked (CPU-readable) u64 - the u8 box's
+		 * projection onto the unpacked u64. Though the
+		 * box is u8, the projection is u64 because it may fall
+		 * anywhere within the unpacked u64.
+		 */
+		proj_start_bit = ((box * 8) + box_start_bit) - endbit;
+		proj_end_bit   = ((box * 8) + box_end_bit) - endbit;
+		proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit);
+		box_mask  = GENMASK_ULL(box_start_bit, box_end_bit);
+
+		/* Determine the offset of the u8 box inside the pbuf,
+		 * adjusted for quirks. The adjusted box_addr will be used for
+		 * effective addressing inside the pbuf (so it's not
+		 * logical any longer).
+		 */
+		box_addr = pbuflen - box - 1;
+		if (quirks & QUIRK_LITTLE_ENDIAN)
+			box_addr = get_le_offset(box_addr);
+		if (quirks & QUIRK_LSW32_IS_FIRST)
+			box_addr = get_reverse_lsw32_offset(box_addr,
+							    pbuflen);
+
+		if (op == UNPACK) {
+			u64 pval;
+
+			/* Read from pbuf, write to uval */
+			pval = ((u8 *)pbuf)[box_addr] & box_mask;
+			if (quirks & QUIRK_MSB_ON_THE_RIGHT)
+				adjust_for_msb_right_quirk(&pval,
+							   &box_start_bit,
+							   &box_end_bit,
+							   &box_mask);
+
+			pval >>= box_end_bit;
+			pval <<= proj_end_bit;
+			*uval &= ~proj_mask;
+			*uval |= pval;
+		} else {
+			u64 pval;
+
+			/* Write to pbuf, read from uval */
+			pval = (*uval) & proj_mask;
+			pval >>= proj_end_bit;
+			if (quirks & QUIRK_MSB_ON_THE_RIGHT)
+				adjust_for_msb_right_quirk(&pval,
+							   &box_start_bit,
+							   &box_end_bit,
+							   &box_mask);
+
+			pval <<= box_end_bit;
+			((u8 *)pbuf)[box_addr] &= ~box_mask;
+			((u8 *)pbuf)[box_addr] |= pval;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(packing);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Generic bitfield packing and unpacking");
-- 
cgit 


From d54a16b20157ce300298eb4a1169bf9acfda3d08 Mon Sep 17 00:00:00 2001
From: Michal Kubecek <mkubecek@suse.cz>
Date: Thu, 2 May 2019 16:15:10 +0200
Subject: netlink: set bad attribute also on maxtype check

The check that attribute type is within 0...maxtype range in
__nla_validate_parse() sets only error message but not bad_attr in extack.
Set also bad_attr to tell userspace which attribute failed validation.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/nlattr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/nlattr.c b/lib/nlattr.c
index 29f6336e2422..adc919b32bf9 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -356,7 +356,8 @@ static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype,
 
 		if (type == 0 || type > maxtype) {
 			if (validate & NL_VALIDATE_MAXTYPE) {
-				NL_SET_ERR_MSG(extack, "Unknown attribute type");
+				NL_SET_ERR_MSG_ATTR(extack, nla,
+						    "Unknown attribute type");
 				return -EINVAL;
 			}
 			continue;
-- 
cgit 


From b424e432e770d6dd572765459d5b6a96a19c5286 Mon Sep 17 00:00:00 2001
From: Michal Kubecek <mkubecek@suse.cz>
Date: Thu, 2 May 2019 16:15:10 +0200
Subject: netlink: add validation of NLA_F_NESTED flag

Add new validation flag NL_VALIDATE_NESTED which adds three consistency
checks of NLA_F_NESTED_FLAG:

  - the flag is set on attributes with NLA_NESTED{,_ARRAY} policy
  - the flag is not set on attributes with other policies except NLA_UNSPEC
  - the flag is set on attribute passed to nla_parse_nested()

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>

v2: change error messages to mention NLA_F_NESTED explicitly
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/nlattr.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'lib')

diff --git a/lib/nlattr.c b/lib/nlattr.c
index adc919b32bf9..cace9b307781 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -184,6 +184,21 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 		}
 	}
 
+	if (validate & NL_VALIDATE_NESTED) {
+		if ((pt->type == NLA_NESTED || pt->type == NLA_NESTED_ARRAY) &&
+		    !(nla->nla_type & NLA_F_NESTED)) {
+			NL_SET_ERR_MSG_ATTR(extack, nla,
+					    "NLA_F_NESTED is missing");
+			return -EINVAL;
+		}
+		if (pt->type != NLA_NESTED && pt->type != NLA_NESTED_ARRAY &&
+		    pt->type != NLA_UNSPEC && (nla->nla_type & NLA_F_NESTED)) {
+			NL_SET_ERR_MSG_ATTR(extack, nla,
+					    "NLA_F_NESTED not expected");
+			return -EINVAL;
+		}
+	}
+
 	switch (pt->type) {
 	case NLA_EXACT_LEN:
 		if (attrlen != pt->len)
-- 
cgit 


From 2ac5a3bf7042a1c4abbcce1b6f0ec61e5d3786c2 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Fri, 10 May 2019 10:42:13 +0200
Subject: vsprintf: Do not break early boot with probing addresses

The commit 3e5903eb9cff70730 ("vsprintf: Prevent crash when dereferencing
invalid pointers") broke boot on several architectures. The common
pattern is that probe_kernel_read() is not working during early
boot because userspace access framework is not ready.

It is a generic problem. We have to avoid any complex external
functions in vsprintf() code, especially in the common path.
They might break printk() easily and are hard to debug.

Replace probe_kernel_read() with some simple checks for obvious
problems.

Details:

1. Report on Power:

Kernel crashes very early during boot with with CONFIG_PPC_KUAP and
CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG

The problem is the combination of some new code called via printk(),
check_pointer() which calls probe_kernel_read(). That then calls
allow_user_access() (PPC_KUAP) and that uses mmu_has_feature() too early
(before we've patched features). With the JUMP_LABEL debug enabled that
causes us to call printk() & dump_stack() and we end up recursing and
overflowing the stack.

Because it happens so early you don't get any output, just an apparently
dead system.

The stack trace (which you don't see) is something like:

  ...
  dump_stack+0xdc
  probe_kernel_read+0x1a4
  check_pointer+0x58
  string+0x3c
  vsnprintf+0x1bc
  vscnprintf+0x20
  printk_safe_log_store+0x7c
  printk+0x40
  dump_stack_print_info+0xbc
  dump_stack+0x8
  probe_kernel_read+0x1a4
  probe_kernel_read+0x19c
  check_pointer+0x58
  string+0x3c
  vsnprintf+0x1bc
  vscnprintf+0x20
  vprintk_store+0x6c
  vprintk_emit+0xec
  vprintk_func+0xd4
  printk+0x40
  cpufeatures_process_feature+0xc8
  scan_cpufeatures_subnodes+0x380
  of_scan_flat_dt_subnodes+0xb4
  dt_cpu_ftrs_scan_callback+0x158
  of_scan_flat_dt+0xf0
  dt_cpu_ftrs_scan+0x3c
  early_init_devtree+0x360
  early_setup+0x9c

2. Report on s390:

vsnprintf invocations, are broken on s390. For example, the early boot
output now looks like this where the first (efault) should be
the linux_banner:

[    0.099985] (efault)
[    0.099985] setup: Linux is running as a z/VM guest operating system in 64-bit mode
[    0.100066] setup: The maximum memory size is 8192MB
[    0.100070] cma: Reserved 4 MiB at (efault)
[    0.100100] numa: NUMA mode: (efault)

The reason for this, is that the code assumes that
probe_kernel_address() works very early. This however is not true on
at least s390. Uaccess on KERNEL_DS works only after page tables have
been setup on s390, which happens with setup_arch()->paging_init().

Any probe_kernel_address() invocation before that will return -EFAULT.

Fixes: 3e5903eb9cff70730 ("vsprintf: Prevent crash when dereferencing invalid pointers")
Link: http://lkml.kernel.org/r/20190510084213.22149-1-pmladek@suse.com
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: "Tobin C . Harding" <me@tobin.cc>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-kernel@vger.kernel.org
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Russell Currey <ruscur@russell.cc>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Stephen Rothwell <sfr@ozlabs.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-s390@vger.kernel.org
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Petr Mladek <pmladek@suse.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 lib/vsprintf.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 7b0a6140bfad..2f003cfe340e 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -628,19 +628,16 @@ static char *error_string(char *buf, char *end, const char *s,
 }
 
 /*
- * This is not a fool-proof test. 99% of the time that this will fault is
- * due to a bad pointer, not one that crosses into bad memory. Just test
- * the address to make sure it doesn't fault due to a poorly added printk
- * during debugging.
+ * Do not call any complex external code here. Nested printk()/vsprintf()
+ * might cause infinite loops. Failures might break printk() and would
+ * be hard to debug.
  */
 static const char *check_pointer_msg(const void *ptr)
 {
-	char byte;
-
 	if (!ptr)
 		return "(null)";
 
-	if (probe_kernel_address(ptr, byte))
+	if ((unsigned long)ptr < PAGE_SIZE || IS_ERR_VALUE(ptr))
 		return "(efault)";
 
 	return NULL;
-- 
cgit 


From 73b0140bf0fe9df90fb267c00673c4b9bf285430 Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Mon, 13 May 2019 17:17:11 -0700
Subject: mm/gup: change GUP fast to use flags rather than a write 'bool'

To facilitate additional options to get_user_pages_fast() change the
singular write parameter to be gup_flags.

This patch does not change any functionality.  New functionality will
follow in subsequent patches.

Some of the get_user_pages_fast() call sites were unchanged because they
already passed FOLL_WRITE or 0 for the write parameter.

NOTE: It was suggested to change the ordering of the get_user_pages_fast()
arguments to ensure that callers were converted.  This breaks the current
GUP call site convention of having the returned pages be the final
parameter.  So the suggestion was rejected.

Link: http://lkml.kernel.org/r/20190328084422.29911-4-ira.weiny@intel.com
Link: http://lkml.kernel.org/r/20190317183438.2057-4-ira.weiny@intel.com
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Mike Marshall <hubcap@omnibond.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/iov_iter.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index b396d328a764..f74fa832f3aa 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1293,7 +1293,9 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 			len = maxpages * PAGE_SIZE;
 		addr &= ~(PAGE_SIZE - 1);
 		n = DIV_ROUND_UP(len, PAGE_SIZE);
-		res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, pages);
+		res = get_user_pages_fast(addr, n,
+				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
+				pages);
 		if (unlikely(res < 0))
 			return res;
 		return (res == n ? len : res * PAGE_SIZE) - *start;
@@ -1374,7 +1376,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 		p = get_pages_array(n);
 		if (!p)
 			return -ENOMEM;
-		res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, p);
+		res = get_user_pages_fast(addr, n,
+				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
 		if (unlikely(res < 0)) {
 			kvfree(p);
 			return res;
-- 
cgit 


From 9012d011660ea5cf2a623e1de207a2bc0ca6936d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 14 May 2019 15:42:25 -0700
Subject: compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING

Commit 60a3cdd06394 ("x86: add optimized inlining") introduced
CONFIG_OPTIMIZE_INLINING, but it has been available only for x86.

The idea is obviously arch-agnostic.  This commit moves the config entry
from arch/x86/Kconfig.debug to lib/Kconfig.debug so that all
architectures can benefit from it.

This can make a huge difference in kernel image size especially when
CONFIG_OPTIMIZE_FOR_SIZE is enabled.

For example, I got 3.5% smaller arm64 kernel for v5.1-rc1.

  dec       file
  18983424  arch/arm64/boot/Image.before
  18321920  arch/arm64/boot/Image.after

This also slightly improves the "Kernel hacking" Kconfig menu as
e61aca5158a8 ("Merge branch 'kconfig-diet' from Dave Hansen') suggested;
this config option would be a good fit in the "compiler option" menu.

Link: http://lkml.kernel.org/r/20190423034959.13525-12-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Borislav Petkov <bp@suse.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boris Brezillon <bbrezillon@kernel.org>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Stefan Agner <stefan@agner.ch>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d695ec1477f3..d5411a7484f6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -318,6 +318,20 @@ config HEADERS_CHECK
 	  exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in
 	  your build tree), to make sure they're suitable.
 
+config OPTIMIZE_INLINING
+	bool "Allow compiler to uninline functions marked 'inline'"
+	help
+	  This option determines if the kernel forces gcc to inline the functions
+	  developers have marked 'inline'. Doing so takes away freedom from gcc to
+	  do what it thinks is best, which is desirable for the gcc 3.x series of
+	  compilers. The gcc 4.x series have a rewritten inlining algorithm and
+	  enabling this option will generate a smaller kernel there. Hopefully
+	  this algorithm is so good that allowing gcc 4.x and above to make the
+	  decision will become the default in the future. Until then this option
+	  is there to test gcc for this.
+
+	  If unsure, say N.
+
 config DEBUG_SECTION_MISMATCH
 	bool "Enable full Section mismatch analysis"
 	help
-- 
cgit 


From 5f239f655a7e67a972ee1fa17045a08e640d28da Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 14 May 2019 15:42:40 -0700
Subject: lib/bitmap.c: remove unused EXPORT_SYMBOLs

AFAICT, there have never been any callers of these functions outside
mm/mempolicy.c (via their nodemask.h wrappers).  In particular, no
modular code has ever used them, and given their somewhat exotic
semantics, I highly doubt they will ever find such a use.  In any case,
no need to export them currently.

Link: http://lkml.kernel.org/r/20190329205353.6010-1-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/bitmap.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'lib')

diff --git a/lib/bitmap.c b/lib/bitmap.c
index 98872e9025da..66421f304f7d 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -757,7 +757,6 @@ void bitmap_remap(unsigned long *dst, const unsigned long *src,
 			set_bit(bitmap_ord_to_pos(new, n % w, nbits), dst);
 	}
 }
-EXPORT_SYMBOL(bitmap_remap);
 
 /**
  * bitmap_bitremap - Apply map defined by a pair of bitmaps to a single bit
@@ -795,7 +794,6 @@ int bitmap_bitremap(int oldbit, const unsigned long *old,
 	else
 		return bitmap_ord_to_pos(new, n % w, bits);
 }
-EXPORT_SYMBOL(bitmap_bitremap);
 
 /**
  * bitmap_onto - translate one bitmap relative to another
@@ -930,7 +928,6 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig,
 		m++;
 	}
 }
-EXPORT_SYMBOL(bitmap_onto);
 
 /**
  * bitmap_fold - fold larger bitmap into smaller, modulo specified size
@@ -955,7 +952,6 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig,
 	for_each_set_bit(oldbit, orig, nbits)
 		set_bit(oldbit % sz, dst);
 }
-EXPORT_SYMBOL(bitmap_fold);
 
 /*
  * Common code for bitmap_*_region() routines.
-- 
cgit 


From cdc90a1871d6e64080f4506e900c6ef88e6fb39f Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 14 May 2019 15:42:43 -0700
Subject: lib/bitmap.c: guard exotic bitmap functions by CONFIG_NUMA

The bitmap_remap, _bitremap, _onto and _fold functions are only used,
via their node_ wrappers, in mm/mempolicy.c, which is only built for
CONFIG_NUMA.  The helper bitmap_ord_to_pos used by these functions is
global, but its only external caller is node_random() in lib/nodemask.c,
which is also guarded by CONFIG_NUMA.

For !CONFIG_NUMA:

add/remove: 0/6 grow/shrink: 0/0 up/down: 0/-621 (-621)
Function                                     old     new   delta
bitmap_pos_to_ord                             20       -     -20
bitmap_ord_to_pos                             70       -     -70
bitmap_bitremap                               81       -     -81
bitmap_fold                                  113       -    -113
bitmap_onto                                  123       -    -123
bitmap_remap                                 214       -    -214
Total: Before=4776, After=4155, chg -13.00%

Link: http://lkml.kernel.org/r/20190329205353.6010-2-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/bitmap.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'lib')

diff --git a/lib/bitmap.c b/lib/bitmap.c
index 66421f304f7d..3f3b8051f342 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -649,6 +649,7 @@ int bitmap_parselist_user(const char __user *ubuf,
 EXPORT_SYMBOL(bitmap_parselist_user);
 
 
+#ifdef CONFIG_NUMA
 /**
  * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
  *	@buf: pointer to a bitmap
@@ -952,6 +953,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig,
 	for_each_set_bit(oldbit, orig, nbits)
 		set_bit(oldbit % sz, dst);
 }
+#endif /* CONFIG_NUMA */
 
 /*
  * Common code for bitmap_*_region() routines.
-- 
cgit 


From 8e18faeac3e4d4b3ff3d705cd46d0fcb710b09d0 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Tue, 14 May 2019 15:42:46 -0700
Subject: lib/plist: rename DEBUG_PI_LIST to DEBUG_PLIST

This is a lot more appropriate than PI_LIST, which in the kernel one
would assume that it has to do with priority-inheritance; which is not
-- furthermore futexes make use of plists so this can be even more
confusing, albeit the debug nature of the config option.

Link: http://lkml.kernel.org/r/20190317185434.1626-1-dave@stgolabs.net
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 2 +-
 lib/plist.c       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d5411a7484f6..181bd56238b0 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1372,7 +1372,7 @@ config DEBUG_LIST
 
 	  If unsure, say N.
 
-config DEBUG_PI_LIST
+config DEBUG_PLIST
 	bool "Debug priority linked list manipulation"
 	depends on DEBUG_KERNEL
 	help
diff --git a/lib/plist.c b/lib/plist.c
index 199408f91057..d3bd8827186f 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -26,7 +26,7 @@
 #include <linux/bug.h>
 #include <linux/plist.h>
 
-#ifdef CONFIG_DEBUG_PI_LIST
+#ifdef CONFIG_DEBUG_PLIST
 
 static struct plist_head test_head;
 
@@ -173,7 +173,7 @@ void plist_requeue(struct plist_node *node, struct plist_head *head)
 	plist_check_head(head);
 }
 
-#ifdef CONFIG_DEBUG_PI_LIST
+#ifdef CONFIG_DEBUG_PLIST
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
 #include <linux/module.h>
-- 
cgit 


From 37d0ec34d111acfdb82b24e3de00d926c0aece4d Mon Sep 17 00:00:00 2001
From: George Spelvin <lkml@sdf.org>
Date: Tue, 14 May 2019 15:42:49 -0700
Subject: lib/sort: make swap functions more generic

Patch series "lib/sort & lib/list_sort: faster and smaller", v2.

Because CONFIG_RETPOLINE has made indirect calls much more expensive, I
thought I'd try to reduce the number made by the library sort functions.

The first three patches apply to lib/sort.c.

Patch #1 is a simple optimization.  The built-in swap has special cases
for aligned 4- and 8-byte objects.  But those are almost never used;
most calls to sort() work on larger structures, which fall back to the
byte-at-a-time loop.  This generalizes them to aligned *multiples* of 4
and 8 bytes.  (If nothing else, it saves an awful lot of energy by not
thrashing the store buffers as much.)

Patch #2 grabs a juicy piece of low-hanging fruit.  I agree that nice
simple solid heapsort is preferable to more complex algorithms (sorry,
Andrey), but it's possible to implement heapsort with far fewer
comparisons (50% asymptotically, 25-40% reduction for realistic sizes)
than the way it's been done up to now.  And with some care, the code
ends up smaller, as well.  This is the "big win" patch.

Patch #3 adds the same sort of indirect call bypass that has been added
to the net code of late.  The great majority of the callers use the
builtin swap functions, so replace the indirect call to sort_func with a
(highly preditable) series of if() statements.  Rather surprisingly,
this decreased code size, as the swap functions were inlined and their
prologue & epilogue code eliminated.

lib/list_sort.c is a bit trickier, as merge sort is already close to
optimal, and we don't want to introduce triumphs of theory over
practicality like the Ford-Johnson merge-insertion sort.

Patch #4, without changing the algorithm, chops 32% off the code size
and removes the part[MAX_LIST_LENGTH+1] pointer array (and the
corresponding upper limit on efficiently sortable input size).

Patch #5 improves the algorithm.  The previous code is already optimal
for power-of-two (or slightly smaller) size inputs, but when the input
size is just over a power of 2, there's a very unbalanced final merge.

There are, in the literature, several algorithms which solve this, but
they all depend on the "breadth-first" merge order which was replaced by
commit 835cc0c8477f with a more cache-friendly "depth-first" order.
Some hard thinking came up with a depth-first algorithm which defers
merges as little as possible while avoiding bad merges.  This saves
0.2*n compares, averaged over all sizes.

The code size increase is minimal (64 bytes on x86-64, reducing the net
savings to 26%), but the comments expanded significantly to document the
clever algorithm.

TESTING NOTES: I have some ugly user-space benchmarking code which I
used for testing before moving this code into the kernel.  Shout if you
want a copy.

I'm running this code right now, with CONFIG_TEST_SORT and
CONFIG_TEST_LIST_SORT, but I confess I haven't rebooted since the last
round of minor edits to quell checkpatch.  I figure there will be at
least one round of comments and final testing.

This patch (of 5):

Rather than having special-case swap functions for 4- and 8-byte
objects, special-case aligned multiples of 4 or 8 bytes.  This speeds up
most users of sort() by avoiding fallback to the byte copy loop.

Despite what ca96ab859ab4 ("lib/sort: Add 64 bit swap function") claims,
very few users of sort() sort pointers (or pointer-sized objects); most
sort structures containing at least two words.  (E.g.
drivers/acpi/fan.c:acpi_fan_get_fps() sorts an array of 40-byte struct
acpi_fan_fps.)

The functions also got renamed to reflect the fact that they support
multiple words.  In the great tradition of bikeshedding, the names were
by far the most contentious issue during review of this patch series.

x86-64 code size 872 -> 886 bytes (+14)

With feedback from Andy Shevchenko, Rasmus Villemoes and Geert
Uytterhoeven.

Link: http://lkml.kernel.org/r/f24f932df3a7fa1973c1084154f1cea596bcf341.1552704200.git.lkml@sdf.org
Signed-off-by: George Spelvin <lkml@sdf.org>
Acked-by: Andrey Abramov <st5pub@yandex.ru>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Daniel Wagner <daniel.wagner@siemens.com>
Cc: Don Mullis <don.mullis@gmail.com>
Cc: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/sort.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 99 insertions(+), 24 deletions(-)

(limited to 'lib')

diff --git a/lib/sort.c b/lib/sort.c
index d6b7a202b0b6..ec79eac85e21 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -11,35 +11,108 @@
 #include <linux/export.h>
 #include <linux/sort.h>
 
-static int alignment_ok(const void *base, int align)
+/**
+ * is_aligned - is this pointer & size okay for word-wide copying?
+ * @base: pointer to data
+ * @size: size of each element
+ * @align: required aignment (typically 4 or 8)
+ *
+ * Returns true if elements can be copied using word loads and stores.
+ * The size must be a multiple of the alignment, and the base address must
+ * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
+ *
+ * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
+ * to "if ((a | b) & mask)", so we do that by hand.
+ */
+__attribute_const__ __always_inline
+static bool is_aligned(const void *base, size_t size, unsigned char align)
 {
-	return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
-		((unsigned long)base & (align - 1)) == 0;
+	unsigned char lsbits = (unsigned char)size;
+
+	(void)base;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	lsbits |= (unsigned char)(uintptr_t)base;
+#endif
+	return (lsbits & (align - 1)) == 0;
 }
 
-static void u32_swap(void *a, void *b, int size)
+/**
+ * swap_words_32 - swap two elements in 32-bit chunks
+ * @a, @b: pointers to the elements
+ * @size: element size (must be a multiple of 4)
+ *
+ * Exchange the two objects in memory.  This exploits base+index addressing,
+ * which basically all CPUs have, to minimize loop overhead computations.
+ *
+ * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
+ * bottom of the loop, even though the zero flag is stil valid from the
+ * subtract (since the intervening mov instructions don't alter the flags).
+ * Gcc 8.1.0 doesn't have that problem.
+ */
+static void swap_words_32(void *a, void *b, int size)
 {
-	u32 t = *(u32 *)a;
-	*(u32 *)a = *(u32 *)b;
-	*(u32 *)b = t;
+	size_t n = (unsigned int)size;
+
+	do {
+		u32 t = *(u32 *)(a + (n -= 4));
+		*(u32 *)(a + n) = *(u32 *)(b + n);
+		*(u32 *)(b + n) = t;
+	} while (n);
 }
 
-static void u64_swap(void *a, void *b, int size)
+/**
+ * swap_words_64 - swap two elements in 64-bit chunks
+ * @a, @b: pointers to the elements
+ * @size: element size (must be a multiple of 8)
+ *
+ * Exchange the two objects in memory.  This exploits base+index
+ * addressing, which basically all CPUs have, to minimize loop overhead
+ * computations.
+ *
+ * We'd like to use 64-bit loads if possible.  If they're not, emulating
+ * one requires base+index+4 addressing which x86 has but most other
+ * processors do not.  If CONFIG_64BIT, we definitely have 64-bit loads,
+ * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
+ * x32 ABI).  Are there any cases the kernel needs to worry about?
+ */
+static void swap_words_64(void *a, void *b, int size)
 {
-	u64 t = *(u64 *)a;
-	*(u64 *)a = *(u64 *)b;
-	*(u64 *)b = t;
+	size_t n = (unsigned int)size;
+
+	do {
+#ifdef CONFIG_64BIT
+		u64 t = *(u64 *)(a + (n -= 8));
+		*(u64 *)(a + n) = *(u64 *)(b + n);
+		*(u64 *)(b + n) = t;
+#else
+		/* Use two 32-bit transfers to avoid base+index+4 addressing */
+		u32 t = *(u32 *)(a + (n -= 4));
+		*(u32 *)(a + n) = *(u32 *)(b + n);
+		*(u32 *)(b + n) = t;
+
+		t = *(u32 *)(a + (n -= 4));
+		*(u32 *)(a + n) = *(u32 *)(b + n);
+		*(u32 *)(b + n) = t;
+#endif
+	} while (n);
 }
 
-static void generic_swap(void *a, void *b, int size)
+/**
+ * swap_bytes - swap two elements a byte at a time
+ * @a, @b: pointers to the elements
+ * @size: element size
+ *
+ * This is the fallback if alignment doesn't allow using larger chunks.
+ */
+static void swap_bytes(void *a, void *b, int size)
 {
-	char t;
+	size_t n = (unsigned int)size;
 
 	do {
-		t = *(char *)a;
-		*(char *)a++ = *(char *)b;
-		*(char *)b++ = t;
-	} while (--size > 0);
+		char t = ((char *)a)[--n];
+		((char *)a)[n] = ((char *)b)[n];
+		((char *)b)[n] = t;
+	} while (n);
 }
 
 /**
@@ -50,8 +123,10 @@ static void generic_swap(void *a, void *b, int size)
  * @cmp_func: pointer to comparison function
  * @swap_func: pointer to swap function or NULL
  *
- * This function does a heapsort on the given array. You may provide a
- * swap_func function optimized to your element type.
+ * This function does a heapsort on the given array.  You may provide
+ * a swap_func function if you need to do something more than a memory
+ * copy (e.g. fix up pointers or auxiliary data), but the built-in swap
+ * isn't usually a bottleneck.
  *
  * Sorting time is O(n log n) both on average and worst-case. While
  * qsort is about 20% faster on average, it suffers from exploitable
@@ -67,12 +142,12 @@ void sort(void *base, size_t num, size_t size,
 	int i = (num/2 - 1) * size, n = num * size, c, r;
 
 	if (!swap_func) {
-		if (size == 4 && alignment_ok(base, 4))
-			swap_func = u32_swap;
-		else if (size == 8 && alignment_ok(base, 8))
-			swap_func = u64_swap;
+		if (is_aligned(base, size, 8))
+			swap_func = swap_words_64;
+		else if (is_aligned(base, size, 4))
+			swap_func = swap_words_32;
 		else
-			swap_func = generic_swap;
+			swap_func = swap_bytes;
 	}
 
 	/* heapify */
-- 
cgit 


From 22a241ccb2c19962a0fb02c98154aa93d3fc1862 Mon Sep 17 00:00:00 2001
From: George Spelvin <lkml@sdf.org>
Date: Tue, 14 May 2019 15:42:52 -0700
Subject: lib/sort: use more efficient bottom-up heapsort variant

This uses fewer comparisons than the previous code (approaching half as
many for large random inputs), but produces identical results; it
actually performs the exact same series of swap operations.

Specifically, it reduces the average number of compares from
  2*n*log2(n) - 3*n + o(n)
to
    n*log2(n) + 0.37*n + o(n).

This is still 1.63*n worse than glibc qsort() which manages n*log2(n) -
1.26*n, but at least the leading coefficient is correct.

Standard heapsort, when sifting down, performs two comparisons per
level: one to find the greater child, and a second to see if the current
node should be exchanged with that child.

Bottom-up heapsort observes that it's better to postpone the second
comparison and search for the leaf where -infinity would be sent to,
then search back *up* for the current node's destination.

Since sifting down usually proceeds to the leaf level (that's where half
the nodes are), this does O(1) second comparisons rather than log2(n).
That saves a lot of (expensive since Spectre) indirect function calls.

The one time it's worse than the previous code is if there are large
numbers of duplicate keys, when the top-down algorithm is O(n) and
bottom-up is O(n log n).  For distinct keys, it's provably always
better, doing 1.5*n*log2(n) + O(n) in the worst case.

(The code is not significantly more complex.  This patch also merges the
heap-building and -extracting sift-down loops, resulting in a net code
size savings.)

x86-64 code size 885 -> 767 bytes (-118)

(I see the checkpatch complaint about "else if (n -= size)".  The
alternative is significantly uglier.)

Link: http://lkml.kernel.org/r/2de8348635a1a421a72620677898c7fd5bd4b19d.1552704200.git.lkml@sdf.org
Signed-off-by: George Spelvin <lkml@sdf.org>
Acked-by: Andrey Abramov <st5pub@yandex.ru>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Daniel Wagner <daniel.wagner@siemens.com>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Don Mullis <don.mullis@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/sort.c | 112 ++++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 81 insertions(+), 31 deletions(-)

(limited to 'lib')

diff --git a/lib/sort.c b/lib/sort.c
index ec79eac85e21..0d24d0c5c0fc 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -1,8 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * A fast, small, non-recursive O(nlog n) sort for the Linux kernel
+ * A fast, small, non-recursive O(n log n) sort for the Linux kernel
  *
- * Jan 23 2005  Matt Mackall <mpm@selenic.com>
+ * This performs n*log2(n) + 0.37*n + o(n) comparisons on average,
+ * and 1.5*n*log2(n) + O(n) in the (very contrived) worst case.
+ *
+ * Glibc qsort() manages n*log2(n) - 1.26*n for random inputs (1.63*n
+ * better) at the expense of stack usage and much larger code to avoid
+ * quicksort's O(n^2) worst case.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -15,7 +20,7 @@
  * is_aligned - is this pointer & size okay for word-wide copying?
  * @base: pointer to data
  * @size: size of each element
- * @align: required aignment (typically 4 or 8)
+ * @align: required alignment (typically 4 or 8)
  *
  * Returns true if elements can be copied using word loads and stores.
  * The size must be a multiple of the alignment, and the base address must
@@ -115,6 +120,32 @@ static void swap_bytes(void *a, void *b, int size)
 	} while (n);
 }
 
+/**
+ * parent - given the offset of the child, find the offset of the parent.
+ * @i: the offset of the heap element whose parent is sought.  Non-zero.
+ * @lsbit: a precomputed 1-bit mask, equal to "size & -size"
+ * @size: size of each element
+ *
+ * In terms of array indexes, the parent of element j = @i/@size is simply
+ * (j-1)/2.  But when working in byte offsets, we can't use implicit
+ * truncation of integer divides.
+ *
+ * Fortunately, we only need one bit of the quotient, not the full divide.
+ * @size has a least significant bit.  That bit will be clear if @i is
+ * an even multiple of @size, and set if it's an odd multiple.
+ *
+ * Logically, we're doing "if (i & lsbit) i -= size;", but since the
+ * branch is unpredictable, it's done with a bit of clever branch-free
+ * code instead.
+ */
+__attribute_const__ __always_inline
+static size_t parent(size_t i, unsigned int lsbit, size_t size)
+{
+	i -= size;
+	i -= size & -(i & lsbit);
+	return i / 2;
+}
+
 /**
  * sort - sort an array of elements
  * @base: pointer to data to sort
@@ -129,17 +160,20 @@ static void swap_bytes(void *a, void *b, int size)
  * isn't usually a bottleneck.
  *
  * Sorting time is O(n log n) both on average and worst-case. While
- * qsort is about 20% faster on average, it suffers from exploitable
+ * quicksort is slightly faster on average, it suffers from exploitable
  * O(n*n) worst-case behavior and extra memory requirements that make
  * it less suitable for kernel use.
  */
-
 void sort(void *base, size_t num, size_t size,
 	  int (*cmp_func)(const void *, const void *),
 	  void (*swap_func)(void *, void *, int size))
 {
 	/* pre-scale counters for performance */
-	int i = (num/2 - 1) * size, n = num * size, c, r;
+	size_t n = num * size, a = (num/2) * size;
+	const unsigned int lsbit = size & -size;  /* Used to find parent */
+
+	if (!a)		/* num < 2 || size == 0 */
+		return;
 
 	if (!swap_func) {
 		if (is_aligned(base, size, 8))
@@ -150,32 +184,48 @@ void sort(void *base, size_t num, size_t size,
 			swap_func = swap_bytes;
 	}
 
-	/* heapify */
-	for ( ; i >= 0; i -= size) {
-		for (r = i; r * 2 + size < n; r  = c) {
-			c = r * 2 + size;
-			if (c < n - size &&
-					cmp_func(base + c, base + c + size) < 0)
-				c += size;
-			if (cmp_func(base + r, base + c) >= 0)
-				break;
-			swap_func(base + r, base + c, size);
-		}
-	}
-
-	/* sort */
-	for (i = n - size; i > 0; i -= size) {
-		swap_func(base, base + i, size);
-		for (r = 0; r * 2 + size < i; r = c) {
-			c = r * 2 + size;
-			if (c < i - size &&
-					cmp_func(base + c, base + c + size) < 0)
-				c += size;
-			if (cmp_func(base + r, base + c) >= 0)
-				break;
-			swap_func(base + r, base + c, size);
+	/*
+	 * Loop invariants:
+	 * 1. elements [a,n) satisfy the heap property (compare greater than
+	 *    all of their children),
+	 * 2. elements [n,num*size) are sorted, and
+	 * 3. a <= b <= c <= d <= n (whenever they are valid).
+	 */
+	for (;;) {
+		size_t b, c, d;
+
+		if (a)			/* Building heap: sift down --a */
+			a -= size;
+		else if (n -= size)	/* Sorting: Extract root to --n */
+			swap_func(base, base + n, size);
+		else			/* Sort complete */
+			break;
+
+		/*
+		 * Sift element at "a" down into heap.  This is the
+		 * "bottom-up" variant, which significantly reduces
+		 * calls to cmp_func(): we find the sift-down path all
+		 * the way to the leaves (one compare per level), then
+		 * backtrack to find where to insert the target element.
+		 *
+		 * Because elements tend to sift down close to the leaves,
+		 * this uses fewer compares than doing two per level
+		 * on the way down.  (A bit more than half as many on
+		 * average, 3/4 worst-case.)
+		 */
+		for (b = a; c = 2*b + size, (d = c + size) < n;)
+			b = cmp_func(base + c, base + d) >= 0 ? c : d;
+		if (d == n)	/* Special case last leaf with no sibling */
+			b = c;
+
+		/* Now backtrack from "b" to the correct location for "a" */
+		while (b != a && cmp_func(base + a, base + b) >= 0)
+			b = parent(b, lsbit, size);
+		c = b;			/* Where "a" belongs */
+		while (b != a) {	/* Shift it into place */
+			b = parent(b, lsbit, size);
+			swap_func(base + b, base + c, size);
 		}
 	}
 }
-
 EXPORT_SYMBOL(sort);
-- 
cgit 


From 8fb583c4258d08f0aff105aa2ae5157b7d414ea2 Mon Sep 17 00:00:00 2001
From: George Spelvin <lkml@sdf.org>
Date: Tue, 14 May 2019 15:42:55 -0700
Subject: lib/sort: avoid indirect calls to built-in swap

Similar to what's being done in the net code, this takes advantage of
the fact that most invocations use only a few common swap functions, and
replaces indirect calls to them with (highly predictable) conditional
branches.  (The downside, of course, is that if you *do* use a custom
swap function, there are a few extra predicted branches on the code
path.)

This actually *shrinks* the x86-64 code, because it inlines the various
swap functions inside do_swap, eliding function prologues & epilogues.

x86-64 code size 767 -> 703 bytes (-64)

Link: http://lkml.kernel.org/r/d10c5d4b393a1847f32f5b26f4bbaa2857140e1e.1552704200.git.lkml@sdf.org
Signed-off-by: George Spelvin <lkml@sdf.org>
Acked-by: Andrey Abramov <st5pub@yandex.ru>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Daniel Wagner <daniel.wagner@siemens.com>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Don Mullis <don.mullis@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/sort.c | 51 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 15 deletions(-)

(limited to 'lib')

diff --git a/lib/sort.c b/lib/sort.c
index 0d24d0c5c0fc..50855ea8c262 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -54,10 +54,8 @@ static bool is_aligned(const void *base, size_t size, unsigned char align)
  * subtract (since the intervening mov instructions don't alter the flags).
  * Gcc 8.1.0 doesn't have that problem.
  */
-static void swap_words_32(void *a, void *b, int size)
+static void swap_words_32(void *a, void *b, size_t n)
 {
-	size_t n = (unsigned int)size;
-
 	do {
 		u32 t = *(u32 *)(a + (n -= 4));
 		*(u32 *)(a + n) = *(u32 *)(b + n);
@@ -80,10 +78,8 @@ static void swap_words_32(void *a, void *b, int size)
  * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
  * x32 ABI).  Are there any cases the kernel needs to worry about?
  */
-static void swap_words_64(void *a, void *b, int size)
+static void swap_words_64(void *a, void *b, size_t n)
 {
-	size_t n = (unsigned int)size;
-
 	do {
 #ifdef CONFIG_64BIT
 		u64 t = *(u64 *)(a + (n -= 8));
@@ -109,10 +105,8 @@ static void swap_words_64(void *a, void *b, int size)
  *
  * This is the fallback if alignment doesn't allow using larger chunks.
  */
-static void swap_bytes(void *a, void *b, int size)
+static void swap_bytes(void *a, void *b, size_t n)
 {
-	size_t n = (unsigned int)size;
-
 	do {
 		char t = ((char *)a)[--n];
 		((char *)a)[n] = ((char *)b)[n];
@@ -120,6 +114,33 @@ static void swap_bytes(void *a, void *b, int size)
 	} while (n);
 }
 
+typedef void (*swap_func_t)(void *a, void *b, int size);
+
+/*
+ * The values are arbitrary as long as they can't be confused with
+ * a pointer, but small integers make for the smallest compare
+ * instructions.
+ */
+#define SWAP_WORDS_64 (swap_func_t)0
+#define SWAP_WORDS_32 (swap_func_t)1
+#define SWAP_BYTES    (swap_func_t)2
+
+/*
+ * The function pointer is last to make tail calls most efficient if the
+ * compiler decides not to inline this function.
+ */
+static void do_swap(void *a, void *b, size_t size, swap_func_t swap_func)
+{
+	if (swap_func == SWAP_WORDS_64)
+		swap_words_64(a, b, size);
+	else if (swap_func == SWAP_WORDS_32)
+		swap_words_32(a, b, size);
+	else if (swap_func == SWAP_BYTES)
+		swap_bytes(a, b, size);
+	else
+		swap_func(a, b, (int)size);
+}
+
 /**
  * parent - given the offset of the child, find the offset of the parent.
  * @i: the offset of the heap element whose parent is sought.  Non-zero.
@@ -157,7 +178,7 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
  * This function does a heapsort on the given array.  You may provide
  * a swap_func function if you need to do something more than a memory
  * copy (e.g. fix up pointers or auxiliary data), but the built-in swap
- * isn't usually a bottleneck.
+ * avoids a slow retpoline and so is significantly faster.
  *
  * Sorting time is O(n log n) both on average and worst-case. While
  * quicksort is slightly faster on average, it suffers from exploitable
@@ -177,11 +198,11 @@ void sort(void *base, size_t num, size_t size,
 
 	if (!swap_func) {
 		if (is_aligned(base, size, 8))
-			swap_func = swap_words_64;
+			swap_func = SWAP_WORDS_64;
 		else if (is_aligned(base, size, 4))
-			swap_func = swap_words_32;
+			swap_func = SWAP_WORDS_32;
 		else
-			swap_func = swap_bytes;
+			swap_func = SWAP_BYTES;
 	}
 
 	/*
@@ -197,7 +218,7 @@ void sort(void *base, size_t num, size_t size,
 		if (a)			/* Building heap: sift down --a */
 			a -= size;
 		else if (n -= size)	/* Sorting: Extract root to --n */
-			swap_func(base, base + n, size);
+			do_swap(base, base + n, size, swap_func);
 		else			/* Sort complete */
 			break;
 
@@ -224,7 +245,7 @@ void sort(void *base, size_t num, size_t size,
 		c = b;			/* Where "a" belongs */
 		while (b != a) {	/* Shift it into place */
 			b = parent(b, lsbit, size);
-			swap_func(base + b, base + c, size);
+			do_swap(base + b, base + c, size, swap_func);
 		}
 	}
 }
-- 
cgit 


From 043b3f7b6388fca6be86ca82979f66c5723a0d10 Mon Sep 17 00:00:00 2001
From: George Spelvin <lkml@sdf.org>
Date: Tue, 14 May 2019 15:42:58 -0700
Subject: lib/list_sort: simplify and remove MAX_LIST_LENGTH_BITS

Rather than a fixed-size array of pending sorted runs, use the ->prev
links to keep track of things.  This reduces stack usage, eliminates
some ugly overflow handling, and reduces the code size.

Also:
* merge() no longer needs to handle NULL inputs, so simplify.
* The same applies to merge_and_restore_back_links(), which is renamed
  to the less ponderous merge_final().  (It's a static helper function,
  so we don't need a super-descriptive name; comments will do.)
* Document the actual return value requirements on the (*cmp)()
  function; some callers are already using this feature.

x86-64 code size 1086 -> 739 bytes (-347)

(Yes, I see checkpatch complaining about no space after comma in
"__attribute__((nonnull(2,3,4,5)))".  Checkpatch is wrong.)

Feedback from Rasmus Villemoes, Andy Shevchenko and Geert Uytterhoeven.

[akpm@linux-foundation.org: remove __pure usage due to mysterious warning]
Link: http://lkml.kernel.org/r/f63c410e0ff76009c9b58e01027e751ff7fdb749.1552704200.git.lkml@sdf.org
Signed-off-by: George Spelvin <lkml@sdf.org>
Acked-by: Andrey Abramov <st5pub@yandex.ru>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Daniel Wagner <daniel.wagner@siemens.com>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Don Mullis <don.mullis@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/list_sort.c | 165 +++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 103 insertions(+), 62 deletions(-)

(limited to 'lib')

diff --git a/lib/list_sort.c b/lib/list_sort.c
index 85759928215b..ba9431bcac0b 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -7,33 +7,41 @@
 #include <linux/list_sort.h>
 #include <linux/list.h>
 
-#define MAX_LIST_LENGTH_BITS 20
+typedef int __attribute__((nonnull(2,3))) (*cmp_func)(void *,
+		struct list_head const *, struct list_head const *);
 
 /*
  * Returns a list organized in an intermediate format suited
  * to chaining of merge() calls: null-terminated, no reserved or
  * sentinel head node, "prev" links not maintained.
  */
-static struct list_head *merge(void *priv,
-				int (*cmp)(void *priv, struct list_head *a,
-					struct list_head *b),
+__attribute__((nonnull(2,3,4)))
+static struct list_head *merge(void *priv, cmp_func cmp,
 				struct list_head *a, struct list_head *b)
 {
-	struct list_head head, *tail = &head;
+	struct list_head *head, **tail = &head;
 
-	while (a && b) {
+	for (;;) {
 		/* if equal, take 'a' -- important for sort stability */
-		if ((*cmp)(priv, a, b) <= 0) {
-			tail->next = a;
+		if (cmp(priv, a, b) <= 0) {
+			*tail = a;
+			tail = &a->next;
 			a = a->next;
+			if (!a) {
+				*tail = b;
+				break;
+			}
 		} else {
-			tail->next = b;
+			*tail = b;
+			tail = &b->next;
 			b = b->next;
+			if (!b) {
+				*tail = a;
+				break;
+			}
 		}
-		tail = tail->next;
 	}
-	tail->next = a?:b;
-	return head.next;
+	return head;
 }
 
 /*
@@ -43,44 +51,52 @@ static struct list_head *merge(void *priv,
  * prev-link restoration pass, or maintaining the prev links
  * throughout.
  */
-static void merge_and_restore_back_links(void *priv,
-				int (*cmp)(void *priv, struct list_head *a,
-					struct list_head *b),
-				struct list_head *head,
-				struct list_head *a, struct list_head *b)
+__attribute__((nonnull(2,3,4,5)))
+static void merge_final(void *priv, cmp_func cmp, struct list_head *head,
+			struct list_head *a, struct list_head *b)
 {
 	struct list_head *tail = head;
 	u8 count = 0;
 
-	while (a && b) {
+	for (;;) {
 		/* if equal, take 'a' -- important for sort stability */
-		if ((*cmp)(priv, a, b) <= 0) {
+		if (cmp(priv, a, b) <= 0) {
 			tail->next = a;
 			a->prev = tail;
+			tail = a;
 			a = a->next;
+			if (!a)
+				break;
 		} else {
 			tail->next = b;
 			b->prev = tail;
+			tail = b;
 			b = b->next;
+			if (!b) {
+				b = a;
+				break;
+			}
 		}
-		tail = tail->next;
 	}
-	tail->next = a ? : b;
 
+	/* Finish linking remainder of list b on to tail */
+	tail->next = b;
 	do {
 		/*
-		 * In worst cases this loop may run many iterations.
+		 * If the merge is highly unbalanced (e.g. the input is
+		 * already sorted), this loop may run many iterations.
 		 * Continue callbacks to the client even though no
 		 * element comparison is needed, so the client's cmp()
 		 * routine can invoke cond_resched() periodically.
 		 */
-		if (unlikely(!(++count)))
-			(*cmp)(priv, tail->next, tail->next);
-
-		tail->next->prev = tail;
-		tail = tail->next;
-	} while (tail->next);
-
+		if (unlikely(!++count))
+			cmp(priv, b, b);
+		b->prev = tail;
+		tail = b;
+		b = b->next;
+	} while (b);
+
+	/* And the final links to make a circular doubly-linked list */
 	tail->next = head;
 	head->prev = tail;
 }
@@ -91,55 +107,80 @@ static void merge_and_restore_back_links(void *priv,
  * @head: the list to sort
  * @cmp: the elements comparison function
  *
- * This function implements "merge sort", which has O(nlog(n))
- * complexity.
+ * This function implements a bottom-up merge sort, which has O(nlog(n))
+ * complexity.  We use depth-first order to take advantage of cacheing.
+ * (E.g. when we get to the fourth element, we immediately merge the
+ * first two 2-element lists.)
+ *
+ * The comparison funtion @cmp must return > 0 if @a should sort after
+ * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should
+ * sort before @b *or* their original order should be preserved.  It is
+ * always called with the element that came first in the input in @a,
+ * and list_sort is a stable sort, so it is not necessary to distinguish
+ * the @a < @b and @a == @b cases.
  *
- * The comparison function @cmp must return a negative value if @a
- * should sort before @b, and a positive value if @a should sort after
- * @b. If @a and @b are equivalent, and their original relative
- * ordering is to be preserved, @cmp must return 0.
+ * This is compatible with two styles of @cmp function:
+ * - The traditional style which returns <0 / =0 / >0, or
+ * - Returning a boolean 0/1.
+ * The latter offers a chance to save a few cycles in the comparison
+ * (which is used by e.g. plug_ctx_cmp() in block/blk-mq.c).
+ *
+ * A good way to write a multi-word comparison is
+ *	if (a->high != b->high)
+ *		return a->high > b->high;
+ *	if (a->middle != b->middle)
+ *		return a->middle > b->middle;
+ *	return a->low > b->low;
  */
+__attribute__((nonnull(2,3)))
 void list_sort(void *priv, struct list_head *head,
 		int (*cmp)(void *priv, struct list_head *a,
 			struct list_head *b))
 {
-	struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
-						-- last slot is a sentinel */
-	int lev;  /* index into part[] */
-	int max_lev = 0;
-	struct list_head *list;
+	struct list_head *list = head->next, *pending = NULL;
+	size_t count = 0;	/* Count of pending */
 
-	if (list_empty(head))
+	if (list == head->prev)	/* Zero or one elements */
 		return;
 
-	memset(part, 0, sizeof(part));
-
+	/* Convert to a null-terminated singly-linked list. */
 	head->prev->next = NULL;
-	list = head->next;
 
-	while (list) {
+	/*
+	 * Data structure invariants:
+	 * - All lists are singly linked and null-terminated; prev
+	 *   pointers are not maintained.
+	 * - pending is a prev-linked "list of lists" of sorted
+	 *   sublists awaiting further merging.
+	 * - Each of the sorted sublists is power-of-two in size,
+	 *   corresponding to bits set in "count".
+	 * - Sublists are sorted by size and age, smallest & newest at front.
+	 */
+	do {
+		size_t bits;
 		struct list_head *cur = list;
+
+		/* Extract the head of "list" as a single-element list "cur" */
 		list = list->next;
 		cur->next = NULL;
 
-		for (lev = 0; part[lev]; lev++) {
-			cur = merge(priv, cmp, part[lev], cur);
-			part[lev] = NULL;
+		/* Do merges corresponding to set lsbits in count */
+		for (bits = count; bits & 1; bits >>= 1) {
+			cur = merge(priv, (cmp_func)cmp, pending, cur);
+			pending = pending->prev;  /* Untouched by merge() */
 		}
-		if (lev > max_lev) {
-			if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
-				printk_once(KERN_DEBUG "list too long for efficiency\n");
-				lev--;
-			}
-			max_lev = lev;
-		}
-		part[lev] = cur;
+		/* And place the result at the head of "pending" */
+		cur->prev = pending;
+		pending = cur;
+		count++;
+	} while (list->next);
+
+	/* Now merge together last element with all pending lists */
+	while (pending->prev) {
+		list = merge(priv, (cmp_func)cmp, pending, list);
+		pending = pending->prev;
 	}
-
-	for (lev = 0; lev < max_lev; lev++)
-		if (part[lev])
-			list = merge(priv, cmp, part[lev], list);
-
-	merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
+	/* The final merge, rebuilding prev links */
+	merge_final(priv, (cmp_func)cmp, head, pending, list);
 }
 EXPORT_SYMBOL(list_sort);
-- 
cgit 


From b5c56e0cdd62979dd538e5363b06be5bdf735a09 Mon Sep 17 00:00:00 2001
From: George Spelvin <lkml@sdf.org>
Date: Tue, 14 May 2019 15:43:02 -0700
Subject: lib/list_sort: optimize number of calls to comparison function

CONFIG_RETPOLINE has severely degraded indirect function call
performance, so it's worth putting some effort into reducing the number
of times cmp() is called.

This patch avoids badly unbalanced merges on unlucky input sizes.  It
slightly increases the code size, but saves an average of 0.2*n calls to
cmp().

x86-64 code size 739 -> 803 bytes (+64)

Unfortunately, there's not a lot of low-hanging fruit in a merge sort;
it already performs only n*log2(n) - K*n + O(1) compares.  The leading
coefficient is already at the theoretical limit (log2(n!) corresponds to
K=1.4427), so we're fighting over the linear term, and the best
mergesort can do is K=1.2645, achieved when n is a power of 2.

The differences between mergesort variants appear when n is *not* a
power of 2; K is a function of the fractional part of log2(n).  Top-down
mergesort does best of all, achieving a minimum K=1.2408, and an average
(over all sizes) K=1.248.  However, that requires knowing the number of
entries to be sorted ahead of time, and making a full pass over the
input to count it conflicts with a second performance goal, which is
cache blocking.

Obviously, we have to read the entire list into L1 cache at some point,
and performance is best if it fits.  But if it doesn't fit, each full
pass over the input causes a cache miss per element, which is
undesirable.

While textbooks explain bottom-up mergesort as a succession of merging
passes, practical implementations do merging in depth-first order: as
soon as two lists of the same size are available, they are merged.  This
allows as many merge passes as possible to fit into L1; only the final
few merges force cache misses.

This cache-friendly depth-first merge order depends on us merging the
beginning of the input as much as possible before we've even seen the
end of the input (and thus know its size).

The simple eager merge pattern causes bad performance when n is just
over a power of 2.  If n=1028, the final merge is between 1024- and
4-element lists, which is wasteful of comparisons.  (This is actually
worse on average than n=1025, because a 1204:1 merge will, on average,
end after 512 compares, while 1024:4 will walk 4/5 of the list.)

Because of this, bottom-up mergesort achieves K < 0.5 for such sizes,
and has an average (over all sizes) K of around 1.  (My experiments show
K=1.01, while theory predicts K=0.965.)

There are "worst-case optimal" variants of bottom-up mergesort which
avoid this bad performance, but the algorithms given in the literature,
such as queue-mergesort and boustrodephonic mergesort, depend on the
breadth-first multi-pass structure that we are trying to avoid.

This implementation is as eager as possible while ensuring that all
merge passes are at worst 1:2 unbalanced.  This achieves the same
average K=1.207 as queue-mergesort, which is 0.2*n better then
bottom-up, and only 0.04*n behind top-down mergesort.

Specifically, defers merging two lists of size 2^k until it is known
that there are 2^k additional inputs following.  This ensures that the
final uneven merges triggered by reaching the end of the input will be
at worst 2:1.  This will avoid cache misses as long as 3*2^k elements
fit into the cache.

(I confess to being more than a little bit proud of how clean this code
turned out.  It took a lot of thinking, but the resultant inner loop is
very simple and efficient.)

Refs:
  Bottom-up Mergesort: A Detailed Analysis
  Wolfgang Panny, Helmut Prodinger
  Algorithmica 14(4):340--354, October 1995
  https://doi.org/10.1007/BF01294131
  https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.6.5260

  The cost distribution of queue-mergesort, optimal mergesorts, and
  power-of-two rules
  Wei-Mei Chen, Hsien-Kuei Hwang, Gen-Huey Chen
  Journal of Algorithms 30(2); Pages 423--448, February 1999
  https://doi.org/10.1006/jagm.1998.0986
  https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.5380

  Queue-Mergesort
  Mordecai J. Golin, Robert Sedgewick
  Information Processing Letters, 48(5):253--259, 10 December 1993
  https://doi.org/10.1016/0020-0190(93)90088-q
  https://sci-hub.tw/10.1016/0020-0190(93)90088-Q

Feedback from Rasmus Villemoes <linux@rasmusvillemoes.dk>.

Link: http://lkml.kernel.org/r/fd560853cc4dca0d0f02184ffa888b4c1be89abc.1552704200.git.lkml@sdf.org
Signed-off-by: George Spelvin <lkml@sdf.org>
Acked-by: Andrey Abramov <st5pub@yandex.ru>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Daniel Wagner <daniel.wagner@siemens.com>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Don Mullis <don.mullis@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/list_sort.c | 113 +++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 91 insertions(+), 22 deletions(-)

(limited to 'lib')

diff --git a/lib/list_sort.c b/lib/list_sort.c
index ba9431bcac0b..06e900c5587b 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -107,11 +107,6 @@ static void merge_final(void *priv, cmp_func cmp, struct list_head *head,
  * @head: the list to sort
  * @cmp: the elements comparison function
  *
- * This function implements a bottom-up merge sort, which has O(nlog(n))
- * complexity.  We use depth-first order to take advantage of cacheing.
- * (E.g. when we get to the fourth element, we immediately merge the
- * first two 2-element lists.)
- *
  * The comparison funtion @cmp must return > 0 if @a should sort after
  * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should
  * sort before @b *or* their original order should be preserved.  It is
@@ -131,6 +126,60 @@ static void merge_final(void *priv, cmp_func cmp, struct list_head *head,
  *	if (a->middle != b->middle)
  *		return a->middle > b->middle;
  *	return a->low > b->low;
+ *
+ *
+ * This mergesort is as eager as possible while always performing at least
+ * 2:1 balanced merges.  Given two pending sublists of size 2^k, they are
+ * merged to a size-2^(k+1) list as soon as we have 2^k following elements.
+ *
+ * Thus, it will avoid cache thrashing as long as 3*2^k elements can
+ * fit into the cache.  Not quite as good as a fully-eager bottom-up
+ * mergesort, but it does use 0.2*n fewer comparisons, so is faster in
+ * the common case that everything fits into L1.
+ *
+ *
+ * The merging is controlled by "count", the number of elements in the
+ * pending lists.  This is beautiully simple code, but rather subtle.
+ *
+ * Each time we increment "count", we set one bit (bit k) and clear
+ * bits k-1 .. 0.  Each time this happens (except the very first time
+ * for each bit, when count increments to 2^k), we merge two lists of
+ * size 2^k into one list of size 2^(k+1).
+ *
+ * This merge happens exactly when the count reaches an odd multiple of
+ * 2^k, which is when we have 2^k elements pending in smaller lists,
+ * so it's safe to merge away two lists of size 2^k.
+ *
+ * After this happens twice, we have created two lists of size 2^(k+1),
+ * which will be merged into a list of size 2^(k+2) before we create
+ * a third list of size 2^(k+1), so there are never more than two pending.
+ *
+ * The number of pending lists of size 2^k is determined by the
+ * state of bit k of "count" plus two extra pieces of information:
+ * - The state of bit k-1 (when k == 0, consider bit -1 always set), and
+ * - Whether the higher-order bits are zero or non-zero (i.e.
+ *   is count >= 2^(k+1)).
+ * There are six states we distinguish.  "x" represents some arbitrary
+ * bits, and "y" represents some arbitrary non-zero bits:
+ * 0:  00x: 0 pending of size 2^k;           x pending of sizes < 2^k
+ * 1:  01x: 0 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
+ * 2: x10x: 0 pending of size 2^k; 2^k     + x pending of sizes < 2^k
+ * 3: x11x: 1 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
+ * 4: y00x: 1 pending of size 2^k; 2^k     + x pending of sizes < 2^k
+ * 5: y01x: 2 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
+ * (merge and loop back to state 2)
+ *
+ * We gain lists of size 2^k in the 2->3 and 4->5 transitions (because
+ * bit k-1 is set while the more significant bits are non-zero) and
+ * merge them away in the 5->2 transition.  Note in particular that just
+ * before the 5->2 transition, all lower-order bits are 11 (state 3),
+ * so there is one list of each smaller size.
+ *
+ * When we reach the end of the input, we merge all the pending
+ * lists, from smallest to largest.  If you work through cases 2 to
+ * 5 above, you can see that the number of elements we merge with a list
+ * of size 2^k varies from 2^(k-1) (cases 3 and 5 when x == 0) to
+ * 2^(k+1) - 1 (second merge of case 5 when x == 2^(k-1) - 1).
  */
 __attribute__((nonnull(2,3)))
 void list_sort(void *priv, struct list_head *head,
@@ -152,33 +201,53 @@ void list_sort(void *priv, struct list_head *head,
 	 *   pointers are not maintained.
 	 * - pending is a prev-linked "list of lists" of sorted
 	 *   sublists awaiting further merging.
-	 * - Each of the sorted sublists is power-of-two in size,
-	 *   corresponding to bits set in "count".
+	 * - Each of the sorted sublists is power-of-two in size.
 	 * - Sublists are sorted by size and age, smallest & newest at front.
+	 * - There are zero to two sublists of each size.
+	 * - A pair of pending sublists are merged as soon as the number
+	 *   of following pending elements equals their size (i.e.
+	 *   each time count reaches an odd multiple of that size).
+	 *   That ensures each later final merge will be at worst 2:1.
+	 * - Each round consists of:
+	 *   - Merging the two sublists selected by the highest bit
+	 *     which flips when count is incremented, and
+	 *   - Adding an element from the input as a size-1 sublist.
 	 */
 	do {
 		size_t bits;
-		struct list_head *cur = list;
+		struct list_head **tail = &pending;
 
-		/* Extract the head of "list" as a single-element list "cur" */
-		list = list->next;
-		cur->next = NULL;
+		/* Find the least-significant clear bit in count */
+		for (bits = count; bits & 1; bits >>= 1)
+			tail = &(*tail)->prev;
+		/* Do the indicated merge */
+		if (likely(bits)) {
+			struct list_head *a = *tail, *b = a->prev;
 
-		/* Do merges corresponding to set lsbits in count */
-		for (bits = count; bits & 1; bits >>= 1) {
-			cur = merge(priv, (cmp_func)cmp, pending, cur);
-			pending = pending->prev;  /* Untouched by merge() */
+			a = merge(priv, (cmp_func)cmp, b, a);
+			/* Install the merged result in place of the inputs */
+			a->prev = b->prev;
+			*tail = a;
 		}
-		/* And place the result at the head of "pending" */
-		cur->prev = pending;
-		pending = cur;
+
+		/* Move one element from input list to pending */
+		list->prev = pending;
+		pending = list;
+		list = list->next;
+		pending->next = NULL;
 		count++;
-	} while (list->next);
+	} while (list);
+
+	/* End of input; merge together all the pending lists. */
+	list = pending;
+	pending = pending->prev;
+	for (;;) {
+		struct list_head *next = pending->prev;
 
-	/* Now merge together last element with all pending lists */
-	while (pending->prev) {
+		if (!next)
+			break;
 		list = merge(priv, (cmp_func)cmp, pending, list);
-		pending = pending->prev;
+		pending = next;
 	}
 	/* The final merge, rebuilding prev links */
 	merge_final(priv, (cmp_func)cmp, head, pending, list);
-- 
cgit 


From 2c64e9cb0b6b858901e9a386860d7d929d1cbaeb Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 14 May 2019 15:43:05 -0700
Subject: lib: Move mathematic helpers to separate folder

For better maintenance and expansion move the mathematic helpers to the
separate folder.

No functional change intended.

Note, the int_sqrt() is not used as a part of lib, so, moved to regular
obj.

Link: http://lkml.kernel.org/r/20190323172531.80025-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Ray Jui <rjui@broadcom.com>
[mchehab+samsung@kernel.org: fix broken doc references for div64.c and gcd.c]
  Link: http://lkml.kernel.org/r/734f49bae5d4052b3c25691dfefad59bea2e5843.1555580999.git.mchehab+samsung@kernel.org
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig               |  14 +--
 lib/Makefile              |  15 +--
 lib/cordic.c              |  92 --------------
 lib/div64.c               | 192 ----------------------------
 lib/gcd.c                 |  84 -------------
 lib/int_sqrt.c            |  70 -----------
 lib/lcm.c                 |  25 ----
 lib/math/Kconfig          |  11 ++
 lib/math/Makefile         |   5 +
 lib/math/cordic.c         |  92 ++++++++++++++
 lib/math/div64.c          | 192 ++++++++++++++++++++++++++++
 lib/math/gcd.c            |  84 +++++++++++++
 lib/math/int_sqrt.c       |  70 +++++++++++
 lib/math/lcm.c            |  25 ++++
 lib/math/prime_numbers.c  | 315 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/math/rational.c       |  65 ++++++++++
 lib/math/reciprocal_div.c |  69 ++++++++++
 lib/prime_numbers.c       | 315 ----------------------------------------------
 lib/rational.c            |  65 ----------
 lib/reciprocal_div.c      |  69 ----------
 20 files changed, 936 insertions(+), 933 deletions(-)
 delete mode 100644 lib/cordic.c
 delete mode 100644 lib/div64.c
 delete mode 100644 lib/gcd.c
 delete mode 100644 lib/int_sqrt.c
 delete mode 100644 lib/lcm.c
 create mode 100644 lib/math/Kconfig
 create mode 100644 lib/math/Makefile
 create mode 100644 lib/math/cordic.c
 create mode 100644 lib/math/div64.c
 create mode 100644 lib/math/gcd.c
 create mode 100644 lib/math/int_sqrt.c
 create mode 100644 lib/math/lcm.c
 create mode 100644 lib/math/prime_numbers.c
 create mode 100644 lib/math/rational.c
 create mode 100644 lib/math/reciprocal_div.c
 delete mode 100644 lib/prime_numbers.c
 delete mode 100644 lib/rational.c
 delete mode 100644 lib/reciprocal_div.c

(limited to 'lib')

diff --git a/lib/Kconfig b/lib/Kconfig
index f323b85ad11c..3577609b61be 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -46,9 +46,6 @@ config HAVE_ARCH_BITREVERSE
 	  This option enables the use of hardware bit-reversal instructions on
 	  architectures which support such operations.
 
-config RATIONAL
-	bool
-
 config GENERIC_STRNCPY_FROM_USER
 	bool
 
@@ -61,6 +58,8 @@ config GENERIC_NET_UTILS
 config GENERIC_FIND_FIRST_BIT
 	bool
 
+source "lib/math/Kconfig"
+
 config NO_GENERIC_PCI_IOPORT_MAP
 	bool
 
@@ -531,12 +530,6 @@ config LRU_CACHE
 config CLZ_TAB
 	bool
 
-config CORDIC
-	tristate "CORDIC algorithm"
-	help
-	  This option provides an implementation of the CORDIC algorithm;
-	  calculations are in fixed point. Module will be called cordic.
-
 config DDR
 	bool "JEDEC DDR data"
 	help
@@ -628,9 +621,6 @@ config SBITMAP
 config PARMAN
 	tristate "parman" if COMPILE_TEST
 
-config PRIME_NUMBERS
-	tristate
-
 config STRING_SELFTEST
 	tristate "Test string functions"
 
diff --git a/lib/Makefile b/lib/Makefile
index 83d7df2661ff..fb7697031a79 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -30,7 +30,7 @@ endif
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o timerqueue.o xarray.o \
-	 idr.o int_sqrt.o extable.o \
+	 idr.o extable.o \
 	 sha1.o chacha.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
@@ -44,11 +44,11 @@ lib-$(CONFIG_SMP) += cpumask.o
 lib-y	+= kobject.o klist.o
 obj-y	+= lockref.o
 
-obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
+obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \
 	 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
-	 gcd.o lcm.o list_sort.o uuid.o iov_iter.o clz_ctz.o \
+	 list_sort.o uuid.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o rhashtable.o reciprocal_div.o \
+	 percpu-refcount.o rhashtable.o \
 	 once.o refcount.o usercopy.o errseq.o bucket_locks.o \
 	 generic-radix-tree.o
 obj-$(CONFIG_STRING_SELFTEST) += test_string.o
@@ -102,6 +102,8 @@ endif
 obj-$(CONFIG_DEBUG_INFO_REDUCED) += debug_info.o
 CFLAGS_debug_info.o += $(call cc-option, -femit-struct-debug-detailed=any)
 
+obj-y += math/
+
 obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
 obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o
 obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
@@ -121,7 +123,6 @@ obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
 obj-$(CONFIG_BITREVERSE) += bitrev.o
 obj-$(CONFIG_PACKING)	+= packing.o
-obj-$(CONFIG_RATIONAL)	+= rational.o
 obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
 obj-$(CONFIG_CRC16)	+= crc16.o
 obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
@@ -195,8 +196,6 @@ obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
 
 obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
 
-obj-$(CONFIG_CORDIC) += cordic.o
-
 obj-$(CONFIG_DQL) += dynamic_queue_limits.o
 
 obj-$(CONFIG_GLOB) += glob.o
@@ -238,8 +237,6 @@ obj-$(CONFIG_ASN1) += asn1_decoder.o
 
 obj-$(CONFIG_FONT_SUPPORT) += fonts/
 
-obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o
-
 hostprogs-y	:= gen_crc32table
 hostprogs-y	+= gen_crc64table
 clean-files	:= crc32table.h
diff --git a/lib/cordic.c b/lib/cordic.c
deleted file mode 100644
index 8ef27c12956f..000000000000
--- a/lib/cordic.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2011 Broadcom Corporation
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/cordic.h>
-
-static const s32 arctan_table[] = {
-	2949120,
-	1740967,
-	919879,
-	466945,
-	234379,
-	117304,
-	58666,
-	29335,
-	14668,
-	7334,
-	3667,
-	1833,
-	917,
-	458,
-	229,
-	115,
-	57,
-	29
-};
-
-/*
- * cordic_calc_iq() - calculates the i/q coordinate for given angle
- *
- * theta: angle in degrees for which i/q coordinate is to be calculated
- * coord: function output parameter holding the i/q coordinate
- */
-struct cordic_iq cordic_calc_iq(s32 theta)
-{
-	struct cordic_iq coord;
-	s32 angle, valtmp;
-	unsigned iter;
-	int signx = 1;
-	int signtheta;
-
-	coord.i = CORDIC_ANGLE_GEN;
-	coord.q = 0;
-	angle = 0;
-
-	theta = CORDIC_FIXED(theta);
-	signtheta = (theta < 0) ? -1 : 1;
-	theta = ((theta + CORDIC_FIXED(180) * signtheta) % CORDIC_FIXED(360)) -
-		CORDIC_FIXED(180) * signtheta;
-
-	if (CORDIC_FLOAT(theta) > 90) {
-		theta -= CORDIC_FIXED(180);
-		signx = -1;
-	} else if (CORDIC_FLOAT(theta) < -90) {
-		theta += CORDIC_FIXED(180);
-		signx = -1;
-	}
-
-	for (iter = 0; iter < CORDIC_NUM_ITER; iter++) {
-		if (theta > angle) {
-			valtmp = coord.i - (coord.q >> iter);
-			coord.q += (coord.i >> iter);
-			angle += arctan_table[iter];
-		} else {
-			valtmp = coord.i + (coord.q >> iter);
-			coord.q -= (coord.i >> iter);
-			angle -= arctan_table[iter];
-		}
-		coord.i = valtmp;
-	}
-
-	coord.i *= signx;
-	coord.q *= signx;
-	return coord;
-}
-EXPORT_SYMBOL(cordic_calc_iq);
-
-MODULE_DESCRIPTION("CORDIC algorithm");
-MODULE_AUTHOR("Broadcom Corporation");
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/div64.c b/lib/div64.c
deleted file mode 100644
index ee146bb4c558..000000000000
--- a/lib/div64.c
+++ /dev/null
@@ -1,192 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
- *
- * Based on former do_div() implementation from asm-parisc/div64.h:
- *	Copyright (C) 1999 Hewlett-Packard Co
- *	Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
- *
- *
- * Generic C version of 64bit/32bit division and modulo, with
- * 64bit result and 32bit remainder.
- *
- * The fast case for (n>>32 == 0) is handled inline by do_div(). 
- *
- * Code generated for this function might be very inefficient
- * for some CPUs. __div64_32() can be overridden by linking arch-specific
- * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S
- * or by defining a preprocessor macro in arch/include/asm/div64.h.
- */
-
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/math64.h>
-
-/* Not needed on 64bit architectures */
-#if BITS_PER_LONG == 32
-
-#ifndef __div64_32
-uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
-{
-	uint64_t rem = *n;
-	uint64_t b = base;
-	uint64_t res, d = 1;
-	uint32_t high = rem >> 32;
-
-	/* Reduce the thing a bit first */
-	res = 0;
-	if (high >= base) {
-		high /= base;
-		res = (uint64_t) high << 32;
-		rem -= (uint64_t) (high*base) << 32;
-	}
-
-	while ((int64_t)b > 0 && b < rem) {
-		b = b+b;
-		d = d+d;
-	}
-
-	do {
-		if (rem >= b) {
-			rem -= b;
-			res += d;
-		}
-		b >>= 1;
-		d >>= 1;
-	} while (d);
-
-	*n = res;
-	return rem;
-}
-EXPORT_SYMBOL(__div64_32);
-#endif
-
-/**
- * div_s64_rem - signed 64bit divide with 64bit divisor and remainder
- * @dividend:	64bit dividend
- * @divisor:	64bit divisor
- * @remainder:  64bit remainder
- */
-#ifndef div_s64_rem
-s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
-{
-	u64 quotient;
-
-	if (dividend < 0) {
-		quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
-		*remainder = -*remainder;
-		if (divisor > 0)
-			quotient = -quotient;
-	} else {
-		quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
-		if (divisor < 0)
-			quotient = -quotient;
-	}
-	return quotient;
-}
-EXPORT_SYMBOL(div_s64_rem);
-#endif
-
-/**
- * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
- * @dividend:	64bit dividend
- * @divisor:	64bit divisor
- * @remainder:  64bit remainder
- *
- * This implementation is a comparable to algorithm used by div64_u64.
- * But this operation, which includes math for calculating the remainder,
- * is kept distinct to avoid slowing down the div64_u64 operation on 32bit
- * systems.
- */
-#ifndef div64_u64_rem
-u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
-{
-	u32 high = divisor >> 32;
-	u64 quot;
-
-	if (high == 0) {
-		u32 rem32;
-		quot = div_u64_rem(dividend, divisor, &rem32);
-		*remainder = rem32;
-	} else {
-		int n = fls(high);
-		quot = div_u64(dividend >> n, divisor >> n);
-
-		if (quot != 0)
-			quot--;
-
-		*remainder = dividend - quot * divisor;
-		if (*remainder >= divisor) {
-			quot++;
-			*remainder -= divisor;
-		}
-	}
-
-	return quot;
-}
-EXPORT_SYMBOL(div64_u64_rem);
-#endif
-
-/**
- * div64_u64 - unsigned 64bit divide with 64bit divisor
- * @dividend:	64bit dividend
- * @divisor:	64bit divisor
- *
- * This implementation is a modified version of the algorithm proposed
- * by the book 'Hacker's Delight'.  The original source and full proof
- * can be found here and is available for use without restriction.
- *
- * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
- */
-#ifndef div64_u64
-u64 div64_u64(u64 dividend, u64 divisor)
-{
-	u32 high = divisor >> 32;
-	u64 quot;
-
-	if (high == 0) {
-		quot = div_u64(dividend, divisor);
-	} else {
-		int n = fls(high);
-		quot = div_u64(dividend >> n, divisor >> n);
-
-		if (quot != 0)
-			quot--;
-		if ((dividend - quot * divisor) >= divisor)
-			quot++;
-	}
-
-	return quot;
-}
-EXPORT_SYMBOL(div64_u64);
-#endif
-
-/**
- * div64_s64 - signed 64bit divide with 64bit divisor
- * @dividend:	64bit dividend
- * @divisor:	64bit divisor
- */
-#ifndef div64_s64
-s64 div64_s64(s64 dividend, s64 divisor)
-{
-	s64 quot, t;
-
-	quot = div64_u64(abs(dividend), abs(divisor));
-	t = (dividend ^ divisor) >> 63;
-
-	return (quot ^ t) - t;
-}
-EXPORT_SYMBOL(div64_s64);
-#endif
-
-#endif /* BITS_PER_LONG == 32 */
-
-/*
- * Iterative div/mod for use when dividend is not expected to be much
- * bigger than divisor.
- */
-u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
-{
-	return __iter_div_u64_rem(dividend, divisor, remainder);
-}
-EXPORT_SYMBOL(iter_div_u64_rem);
diff --git a/lib/gcd.c b/lib/gcd.c
deleted file mode 100644
index 7948ab27f0a4..000000000000
--- a/lib/gcd.c
+++ /dev/null
@@ -1,84 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/gcd.h>
-#include <linux/export.h>
-
-/*
- * This implements the binary GCD algorithm. (Often attributed to Stein,
- * but as Knuth has noted, appears in a first-century Chinese math text.)
- *
- * This is faster than the division-based algorithm even on x86, which
- * has decent hardware division.
- */
-
-#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS)
-
-/* If __ffs is available, the even/odd algorithm benchmarks slower. */
-
-/**
- * gcd - calculate and return the greatest common divisor of 2 unsigned longs
- * @a: first value
- * @b: second value
- */
-unsigned long gcd(unsigned long a, unsigned long b)
-{
-	unsigned long r = a | b;
-
-	if (!a || !b)
-		return r;
-
-	b >>= __ffs(b);
-	if (b == 1)
-		return r & -r;
-
-	for (;;) {
-		a >>= __ffs(a);
-		if (a == 1)
-			return r & -r;
-		if (a == b)
-			return a << __ffs(r);
-
-		if (a < b)
-			swap(a, b);
-		a -= b;
-	}
-}
-
-#else
-
-/* If normalization is done by loops, the even/odd algorithm is a win. */
-unsigned long gcd(unsigned long a, unsigned long b)
-{
-	unsigned long r = a | b;
-
-	if (!a || !b)
-		return r;
-
-	/* Isolate lsbit of r */
-	r &= -r;
-
-	while (!(b & r))
-		b >>= 1;
-	if (b == r)
-		return r;
-
-	for (;;) {
-		while (!(a & r))
-			a >>= 1;
-		if (a == r)
-			return r;
-		if (a == b)
-			return a;
-
-		if (a < b)
-			swap(a, b);
-		a -= b;
-		a >>= 1;
-		if (a & r)
-			a += b;
-		a >>= 1;
-	}
-}
-
-#endif
-
-EXPORT_SYMBOL_GPL(gcd);
diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c
deleted file mode 100644
index 30e0f9770f88..000000000000
--- a/lib/int_sqrt.c
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2013 Davidlohr Bueso <davidlohr.bueso@hp.com>
- *
- *  Based on the shift-and-subtract algorithm for computing integer
- *  square root from Guy L. Steele.
- */
-
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/bitops.h>
-
-/**
- * int_sqrt - computes the integer square root
- * @x: integer of which to calculate the sqrt
- *
- * Computes: floor(sqrt(x))
- */
-unsigned long int_sqrt(unsigned long x)
-{
-	unsigned long b, m, y = 0;
-
-	if (x <= 1)
-		return x;
-
-	m = 1UL << (__fls(x) & ~1UL);
-	while (m != 0) {
-		b = y + m;
-		y >>= 1;
-
-		if (x >= b) {
-			x -= b;
-			y += m;
-		}
-		m >>= 2;
-	}
-
-	return y;
-}
-EXPORT_SYMBOL(int_sqrt);
-
-#if BITS_PER_LONG < 64
-/**
- * int_sqrt64 - strongly typed int_sqrt function when minimum 64 bit input
- * is expected.
- * @x: 64bit integer of which to calculate the sqrt
- */
-u32 int_sqrt64(u64 x)
-{
-	u64 b, m, y = 0;
-
-	if (x <= ULONG_MAX)
-		return int_sqrt((unsigned long) x);
-
-	m = 1ULL << ((fls64(x) - 1) & ~1ULL);
-	while (m != 0) {
-		b = y + m;
-		y >>= 1;
-
-		if (x >= b) {
-			x -= b;
-			y += m;
-		}
-		m >>= 2;
-	}
-
-	return y;
-}
-EXPORT_SYMBOL(int_sqrt64);
-#endif
diff --git a/lib/lcm.c b/lib/lcm.c
deleted file mode 100644
index 03d7fcb420b5..000000000000
--- a/lib/lcm.c
+++ /dev/null
@@ -1,25 +0,0 @@
-#include <linux/compiler.h>
-#include <linux/gcd.h>
-#include <linux/export.h>
-#include <linux/lcm.h>
-
-/* Lowest common multiple */
-unsigned long lcm(unsigned long a, unsigned long b)
-{
-	if (a && b)
-		return (a / gcd(a, b)) * b;
-	else
-		return 0;
-}
-EXPORT_SYMBOL_GPL(lcm);
-
-unsigned long lcm_not_zero(unsigned long a, unsigned long b)
-{
-	unsigned long l = lcm(a, b);
-
-	if (l)
-		return l;
-
-	return (b ? : a);
-}
-EXPORT_SYMBOL_GPL(lcm_not_zero);
diff --git a/lib/math/Kconfig b/lib/math/Kconfig
new file mode 100644
index 000000000000..73bdf37178d1
--- /dev/null
+++ b/lib/math/Kconfig
@@ -0,0 +1,11 @@
+config CORDIC
+	tristate "CORDIC algorithm"
+	help
+	  This option provides an implementation of the CORDIC algorithm;
+	  calculations are in fixed point. Module will be called cordic.
+
+config PRIME_NUMBERS
+	tristate
+
+config RATIONAL
+	bool
diff --git a/lib/math/Makefile b/lib/math/Makefile
new file mode 100644
index 000000000000..b75878420da6
--- /dev/null
+++ b/lib/math/Makefile
@@ -0,0 +1,5 @@
+obj-y += div64.o gcd.o lcm.o int_sqrt.o reciprocal_div.o
+
+obj-$(CONFIG_CORDIC)		+= cordic.o
+obj-$(CONFIG_PRIME_NUMBERS)	+= prime_numbers.o
+obj-$(CONFIG_RATIONAL)		+= rational.o
diff --git a/lib/math/cordic.c b/lib/math/cordic.c
new file mode 100644
index 000000000000..8ef27c12956f
--- /dev/null
+++ b/lib/math/cordic.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2011 Broadcom Corporation
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/cordic.h>
+
+static const s32 arctan_table[] = {
+	2949120,
+	1740967,
+	919879,
+	466945,
+	234379,
+	117304,
+	58666,
+	29335,
+	14668,
+	7334,
+	3667,
+	1833,
+	917,
+	458,
+	229,
+	115,
+	57,
+	29
+};
+
+/*
+ * cordic_calc_iq() - calculates the i/q coordinate for given angle
+ *
+ * theta: angle in degrees for which i/q coordinate is to be calculated
+ * coord: function output parameter holding the i/q coordinate
+ */
+struct cordic_iq cordic_calc_iq(s32 theta)
+{
+	struct cordic_iq coord;
+	s32 angle, valtmp;
+	unsigned iter;
+	int signx = 1;
+	int signtheta;
+
+	coord.i = CORDIC_ANGLE_GEN;
+	coord.q = 0;
+	angle = 0;
+
+	theta = CORDIC_FIXED(theta);
+	signtheta = (theta < 0) ? -1 : 1;
+	theta = ((theta + CORDIC_FIXED(180) * signtheta) % CORDIC_FIXED(360)) -
+		CORDIC_FIXED(180) * signtheta;
+
+	if (CORDIC_FLOAT(theta) > 90) {
+		theta -= CORDIC_FIXED(180);
+		signx = -1;
+	} else if (CORDIC_FLOAT(theta) < -90) {
+		theta += CORDIC_FIXED(180);
+		signx = -1;
+	}
+
+	for (iter = 0; iter < CORDIC_NUM_ITER; iter++) {
+		if (theta > angle) {
+			valtmp = coord.i - (coord.q >> iter);
+			coord.q += (coord.i >> iter);
+			angle += arctan_table[iter];
+		} else {
+			valtmp = coord.i + (coord.q >> iter);
+			coord.q -= (coord.i >> iter);
+			angle -= arctan_table[iter];
+		}
+		coord.i = valtmp;
+	}
+
+	coord.i *= signx;
+	coord.q *= signx;
+	return coord;
+}
+EXPORT_SYMBOL(cordic_calc_iq);
+
+MODULE_DESCRIPTION("CORDIC algorithm");
+MODULE_AUTHOR("Broadcom Corporation");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/math/div64.c b/lib/math/div64.c
new file mode 100644
index 000000000000..368ca7fd0d82
--- /dev/null
+++ b/lib/math/div64.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
+ *
+ * Based on former do_div() implementation from asm-parisc/div64.h:
+ *	Copyright (C) 1999 Hewlett-Packard Co
+ *	Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ *
+ * Generic C version of 64bit/32bit division and modulo, with
+ * 64bit result and 32bit remainder.
+ *
+ * The fast case for (n>>32 == 0) is handled inline by do_div().
+ *
+ * Code generated for this function might be very inefficient
+ * for some CPUs. __div64_32() can be overridden by linking arch-specific
+ * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S
+ * or by defining a preprocessor macro in arch/include/asm/div64.h.
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+
+/* Not needed on 64bit architectures */
+#if BITS_PER_LONG == 32
+
+#ifndef __div64_32
+uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
+{
+	uint64_t rem = *n;
+	uint64_t b = base;
+	uint64_t res, d = 1;
+	uint32_t high = rem >> 32;
+
+	/* Reduce the thing a bit first */
+	res = 0;
+	if (high >= base) {
+		high /= base;
+		res = (uint64_t) high << 32;
+		rem -= (uint64_t) (high*base) << 32;
+	}
+
+	while ((int64_t)b > 0 && b < rem) {
+		b = b+b;
+		d = d+d;
+	}
+
+	do {
+		if (rem >= b) {
+			rem -= b;
+			res += d;
+		}
+		b >>= 1;
+		d >>= 1;
+	} while (d);
+
+	*n = res;
+	return rem;
+}
+EXPORT_SYMBOL(__div64_32);
+#endif
+
+/**
+ * div_s64_rem - signed 64bit divide with 64bit divisor and remainder
+ * @dividend:	64bit dividend
+ * @divisor:	64bit divisor
+ * @remainder:  64bit remainder
+ */
+#ifndef div_s64_rem
+s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
+{
+	u64 quotient;
+
+	if (dividend < 0) {
+		quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
+		*remainder = -*remainder;
+		if (divisor > 0)
+			quotient = -quotient;
+	} else {
+		quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
+		if (divisor < 0)
+			quotient = -quotient;
+	}
+	return quotient;
+}
+EXPORT_SYMBOL(div_s64_rem);
+#endif
+
+/**
+ * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
+ * @dividend:	64bit dividend
+ * @divisor:	64bit divisor
+ * @remainder:  64bit remainder
+ *
+ * This implementation is a comparable to algorithm used by div64_u64.
+ * But this operation, which includes math for calculating the remainder,
+ * is kept distinct to avoid slowing down the div64_u64 operation on 32bit
+ * systems.
+ */
+#ifndef div64_u64_rem
+u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
+{
+	u32 high = divisor >> 32;
+	u64 quot;
+
+	if (high == 0) {
+		u32 rem32;
+		quot = div_u64_rem(dividend, divisor, &rem32);
+		*remainder = rem32;
+	} else {
+		int n = fls(high);
+		quot = div_u64(dividend >> n, divisor >> n);
+
+		if (quot != 0)
+			quot--;
+
+		*remainder = dividend - quot * divisor;
+		if (*remainder >= divisor) {
+			quot++;
+			*remainder -= divisor;
+		}
+	}
+
+	return quot;
+}
+EXPORT_SYMBOL(div64_u64_rem);
+#endif
+
+/**
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
+ * @dividend:	64bit dividend
+ * @divisor:	64bit divisor
+ *
+ * This implementation is a modified version of the algorithm proposed
+ * by the book 'Hacker's Delight'.  The original source and full proof
+ * can be found here and is available for use without restriction.
+ *
+ * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
+ */
+#ifndef div64_u64
+u64 div64_u64(u64 dividend, u64 divisor)
+{
+	u32 high = divisor >> 32;
+	u64 quot;
+
+	if (high == 0) {
+		quot = div_u64(dividend, divisor);
+	} else {
+		int n = fls(high);
+		quot = div_u64(dividend >> n, divisor >> n);
+
+		if (quot != 0)
+			quot--;
+		if ((dividend - quot * divisor) >= divisor)
+			quot++;
+	}
+
+	return quot;
+}
+EXPORT_SYMBOL(div64_u64);
+#endif
+
+/**
+ * div64_s64 - signed 64bit divide with 64bit divisor
+ * @dividend:	64bit dividend
+ * @divisor:	64bit divisor
+ */
+#ifndef div64_s64
+s64 div64_s64(s64 dividend, s64 divisor)
+{
+	s64 quot, t;
+
+	quot = div64_u64(abs(dividend), abs(divisor));
+	t = (dividend ^ divisor) >> 63;
+
+	return (quot ^ t) - t;
+}
+EXPORT_SYMBOL(div64_s64);
+#endif
+
+#endif /* BITS_PER_LONG == 32 */
+
+/*
+ * Iterative div/mod for use when dividend is not expected to be much
+ * bigger than divisor.
+ */
+u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
+{
+	return __iter_div_u64_rem(dividend, divisor, remainder);
+}
+EXPORT_SYMBOL(iter_div_u64_rem);
diff --git a/lib/math/gcd.c b/lib/math/gcd.c
new file mode 100644
index 000000000000..7948ab27f0a4
--- /dev/null
+++ b/lib/math/gcd.c
@@ -0,0 +1,84 @@
+#include <linux/kernel.h>
+#include <linux/gcd.h>
+#include <linux/export.h>
+
+/*
+ * This implements the binary GCD algorithm. (Often attributed to Stein,
+ * but as Knuth has noted, appears in a first-century Chinese math text.)
+ *
+ * This is faster than the division-based algorithm even on x86, which
+ * has decent hardware division.
+ */
+
+#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS)
+
+/* If __ffs is available, the even/odd algorithm benchmarks slower. */
+
+/**
+ * gcd - calculate and return the greatest common divisor of 2 unsigned longs
+ * @a: first value
+ * @b: second value
+ */
+unsigned long gcd(unsigned long a, unsigned long b)
+{
+	unsigned long r = a | b;
+
+	if (!a || !b)
+		return r;
+
+	b >>= __ffs(b);
+	if (b == 1)
+		return r & -r;
+
+	for (;;) {
+		a >>= __ffs(a);
+		if (a == 1)
+			return r & -r;
+		if (a == b)
+			return a << __ffs(r);
+
+		if (a < b)
+			swap(a, b);
+		a -= b;
+	}
+}
+
+#else
+
+/* If normalization is done by loops, the even/odd algorithm is a win. */
+unsigned long gcd(unsigned long a, unsigned long b)
+{
+	unsigned long r = a | b;
+
+	if (!a || !b)
+		return r;
+
+	/* Isolate lsbit of r */
+	r &= -r;
+
+	while (!(b & r))
+		b >>= 1;
+	if (b == r)
+		return r;
+
+	for (;;) {
+		while (!(a & r))
+			a >>= 1;
+		if (a == r)
+			return r;
+		if (a == b)
+			return a;
+
+		if (a < b)
+			swap(a, b);
+		a -= b;
+		a >>= 1;
+		if (a & r)
+			a += b;
+		a >>= 1;
+	}
+}
+
+#endif
+
+EXPORT_SYMBOL_GPL(gcd);
diff --git a/lib/math/int_sqrt.c b/lib/math/int_sqrt.c
new file mode 100644
index 000000000000..30e0f9770f88
--- /dev/null
+++ b/lib/math/int_sqrt.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr.bueso@hp.com>
+ *
+ *  Based on the shift-and-subtract algorithm for computing integer
+ *  square root from Guy L. Steele.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/bitops.h>
+
+/**
+ * int_sqrt - computes the integer square root
+ * @x: integer of which to calculate the sqrt
+ *
+ * Computes: floor(sqrt(x))
+ */
+unsigned long int_sqrt(unsigned long x)
+{
+	unsigned long b, m, y = 0;
+
+	if (x <= 1)
+		return x;
+
+	m = 1UL << (__fls(x) & ~1UL);
+	while (m != 0) {
+		b = y + m;
+		y >>= 1;
+
+		if (x >= b) {
+			x -= b;
+			y += m;
+		}
+		m >>= 2;
+	}
+
+	return y;
+}
+EXPORT_SYMBOL(int_sqrt);
+
+#if BITS_PER_LONG < 64
+/**
+ * int_sqrt64 - strongly typed int_sqrt function when minimum 64 bit input
+ * is expected.
+ * @x: 64bit integer of which to calculate the sqrt
+ */
+u32 int_sqrt64(u64 x)
+{
+	u64 b, m, y = 0;
+
+	if (x <= ULONG_MAX)
+		return int_sqrt((unsigned long) x);
+
+	m = 1ULL << ((fls64(x) - 1) & ~1ULL);
+	while (m != 0) {
+		b = y + m;
+		y >>= 1;
+
+		if (x >= b) {
+			x -= b;
+			y += m;
+		}
+		m >>= 2;
+	}
+
+	return y;
+}
+EXPORT_SYMBOL(int_sqrt64);
+#endif
diff --git a/lib/math/lcm.c b/lib/math/lcm.c
new file mode 100644
index 000000000000..03d7fcb420b5
--- /dev/null
+++ b/lib/math/lcm.c
@@ -0,0 +1,25 @@
+#include <linux/compiler.h>
+#include <linux/gcd.h>
+#include <linux/export.h>
+#include <linux/lcm.h>
+
+/* Lowest common multiple */
+unsigned long lcm(unsigned long a, unsigned long b)
+{
+	if (a && b)
+		return (a / gcd(a, b)) * b;
+	else
+		return 0;
+}
+EXPORT_SYMBOL_GPL(lcm);
+
+unsigned long lcm_not_zero(unsigned long a, unsigned long b)
+{
+	unsigned long l = lcm(a, b);
+
+	if (l)
+		return l;
+
+	return (b ? : a);
+}
+EXPORT_SYMBOL_GPL(lcm_not_zero);
diff --git a/lib/math/prime_numbers.c b/lib/math/prime_numbers.c
new file mode 100644
index 000000000000..550eec457c2e
--- /dev/null
+++ b/lib/math/prime_numbers.c
@@ -0,0 +1,315 @@
+#define pr_fmt(fmt) "prime numbers: " fmt "\n"
+
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/prime_numbers.h>
+#include <linux/slab.h>
+
+#define bitmap_size(nbits) (BITS_TO_LONGS(nbits) * sizeof(unsigned long))
+
+struct primes {
+	struct rcu_head rcu;
+	unsigned long last, sz;
+	unsigned long primes[];
+};
+
+#if BITS_PER_LONG == 64
+static const struct primes small_primes = {
+	.last = 61,
+	.sz = 64,
+	.primes = {
+		BIT(2) |
+		BIT(3) |
+		BIT(5) |
+		BIT(7) |
+		BIT(11) |
+		BIT(13) |
+		BIT(17) |
+		BIT(19) |
+		BIT(23) |
+		BIT(29) |
+		BIT(31) |
+		BIT(37) |
+		BIT(41) |
+		BIT(43) |
+		BIT(47) |
+		BIT(53) |
+		BIT(59) |
+		BIT(61)
+	}
+};
+#elif BITS_PER_LONG == 32
+static const struct primes small_primes = {
+	.last = 31,
+	.sz = 32,
+	.primes = {
+		BIT(2) |
+		BIT(3) |
+		BIT(5) |
+		BIT(7) |
+		BIT(11) |
+		BIT(13) |
+		BIT(17) |
+		BIT(19) |
+		BIT(23) |
+		BIT(29) |
+		BIT(31)
+	}
+};
+#else
+#error "unhandled BITS_PER_LONG"
+#endif
+
+static DEFINE_MUTEX(lock);
+static const struct primes __rcu *primes = RCU_INITIALIZER(&small_primes);
+
+static unsigned long selftest_max;
+
+static bool slow_is_prime_number(unsigned long x)
+{
+	unsigned long y = int_sqrt(x);
+
+	while (y > 1) {
+		if ((x % y) == 0)
+			break;
+		y--;
+	}
+
+	return y == 1;
+}
+
+static unsigned long slow_next_prime_number(unsigned long x)
+{
+	while (x < ULONG_MAX && !slow_is_prime_number(++x))
+		;
+
+	return x;
+}
+
+static unsigned long clear_multiples(unsigned long x,
+				     unsigned long *p,
+				     unsigned long start,
+				     unsigned long end)
+{
+	unsigned long m;
+
+	m = 2 * x;
+	if (m < start)
+		m = roundup(start, x);
+
+	while (m < end) {
+		__clear_bit(m, p);
+		m += x;
+	}
+
+	return x;
+}
+
+static bool expand_to_next_prime(unsigned long x)
+{
+	const struct primes *p;
+	struct primes *new;
+	unsigned long sz, y;
+
+	/* Betrand's Postulate (or Chebyshev's theorem) states that if n > 3,
+	 * there is always at least one prime p between n and 2n - 2.
+	 * Equivalently, if n > 1, then there is always at least one prime p
+	 * such that n < p < 2n.
+	 *
+	 * http://mathworld.wolfram.com/BertrandsPostulate.html
+	 * https://en.wikipedia.org/wiki/Bertrand's_postulate
+	 */
+	sz = 2 * x;
+	if (sz < x)
+		return false;
+
+	sz = round_up(sz, BITS_PER_LONG);
+	new = kmalloc(sizeof(*new) + bitmap_size(sz),
+		      GFP_KERNEL | __GFP_NOWARN);
+	if (!new)
+		return false;
+
+	mutex_lock(&lock);
+	p = rcu_dereference_protected(primes, lockdep_is_held(&lock));
+	if (x < p->last) {
+		kfree(new);
+		goto unlock;
+	}
+
+	/* Where memory permits, track the primes using the
+	 * Sieve of Eratosthenes. The sieve is to remove all multiples of known
+	 * primes from the set, what remains in the set is therefore prime.
+	 */
+	bitmap_fill(new->primes, sz);
+	bitmap_copy(new->primes, p->primes, p->sz);
+	for (y = 2UL; y < sz; y = find_next_bit(new->primes, sz, y + 1))
+		new->last = clear_multiples(y, new->primes, p->sz, sz);
+	new->sz = sz;
+
+	BUG_ON(new->last <= x);
+
+	rcu_assign_pointer(primes, new);
+	if (p != &small_primes)
+		kfree_rcu((struct primes *)p, rcu);
+
+unlock:
+	mutex_unlock(&lock);
+	return true;
+}
+
+static void free_primes(void)
+{
+	const struct primes *p;
+
+	mutex_lock(&lock);
+	p = rcu_dereference_protected(primes, lockdep_is_held(&lock));
+	if (p != &small_primes) {
+		rcu_assign_pointer(primes, &small_primes);
+		kfree_rcu((struct primes *)p, rcu);
+	}
+	mutex_unlock(&lock);
+}
+
+/**
+ * next_prime_number - return the next prime number
+ * @x: the starting point for searching to test
+ *
+ * A prime number is an integer greater than 1 that is only divisible by
+ * itself and 1.  The set of prime numbers is computed using the Sieve of
+ * Eratoshenes (on finding a prime, all multiples of that prime are removed
+ * from the set) enabling a fast lookup of the next prime number larger than
+ * @x. If the sieve fails (memory limitation), the search falls back to using
+ * slow trial-divison, up to the value of ULONG_MAX (which is reported as the
+ * final prime as a sentinel).
+ *
+ * Returns: the next prime number larger than @x
+ */
+unsigned long next_prime_number(unsigned long x)
+{
+	const struct primes *p;
+
+	rcu_read_lock();
+	p = rcu_dereference(primes);
+	while (x >= p->last) {
+		rcu_read_unlock();
+
+		if (!expand_to_next_prime(x))
+			return slow_next_prime_number(x);
+
+		rcu_read_lock();
+		p = rcu_dereference(primes);
+	}
+	x = find_next_bit(p->primes, p->last, x + 1);
+	rcu_read_unlock();
+
+	return x;
+}
+EXPORT_SYMBOL(next_prime_number);
+
+/**
+ * is_prime_number - test whether the given number is prime
+ * @x: the number to test
+ *
+ * A prime number is an integer greater than 1 that is only divisible by
+ * itself and 1. Internally a cache of prime numbers is kept (to speed up
+ * searching for sequential primes, see next_prime_number()), but if the number
+ * falls outside of that cache, its primality is tested using trial-divison.
+ *
+ * Returns: true if @x is prime, false for composite numbers.
+ */
+bool is_prime_number(unsigned long x)
+{
+	const struct primes *p;
+	bool result;
+
+	rcu_read_lock();
+	p = rcu_dereference(primes);
+	while (x >= p->sz) {
+		rcu_read_unlock();
+
+		if (!expand_to_next_prime(x))
+			return slow_is_prime_number(x);
+
+		rcu_read_lock();
+		p = rcu_dereference(primes);
+	}
+	result = test_bit(x, p->primes);
+	rcu_read_unlock();
+
+	return result;
+}
+EXPORT_SYMBOL(is_prime_number);
+
+static void dump_primes(void)
+{
+	const struct primes *p;
+	char *buf;
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+
+	rcu_read_lock();
+	p = rcu_dereference(primes);
+
+	if (buf)
+		bitmap_print_to_pagebuf(true, buf, p->primes, p->sz);
+	pr_info("primes.{last=%lu, .sz=%lu, .primes[]=...x%lx} = %s",
+		p->last, p->sz, p->primes[BITS_TO_LONGS(p->sz) - 1], buf);
+
+	rcu_read_unlock();
+
+	kfree(buf);
+}
+
+static int selftest(unsigned long max)
+{
+	unsigned long x, last;
+
+	if (!max)
+		return 0;
+
+	for (last = 0, x = 2; x < max; x++) {
+		bool slow = slow_is_prime_number(x);
+		bool fast = is_prime_number(x);
+
+		if (slow != fast) {
+			pr_err("inconsistent result for is-prime(%lu): slow=%s, fast=%s!",
+			       x, slow ? "yes" : "no", fast ? "yes" : "no");
+			goto err;
+		}
+
+		if (!slow)
+			continue;
+
+		if (next_prime_number(last) != x) {
+			pr_err("incorrect result for next-prime(%lu): expected %lu, got %lu",
+			       last, x, next_prime_number(last));
+			goto err;
+		}
+		last = x;
+	}
+
+	pr_info("selftest(%lu) passed, last prime was %lu", x, last);
+	return 0;
+
+err:
+	dump_primes();
+	return -EINVAL;
+}
+
+static int __init primes_init(void)
+{
+	return selftest(selftest_max);
+}
+
+static void __exit primes_exit(void)
+{
+	free_primes();
+}
+
+module_init(primes_init);
+module_exit(primes_exit);
+
+module_param_named(selftest, selftest_max, ulong, 0400);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/lib/math/rational.c b/lib/math/rational.c
new file mode 100644
index 000000000000..ba7443677c90
--- /dev/null
+++ b/lib/math/rational.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rational fractions
+ *
+ * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <oskar@scara.com>
+ *
+ * helper functions when coping with rational numbers
+ */
+
+#include <linux/rational.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+
+/*
+ * calculate best rational approximation for a given fraction
+ * taking into account restricted register size, e.g. to find
+ * appropriate values for a pll with 5 bit denominator and
+ * 8 bit numerator register fields, trying to set up with a
+ * frequency ratio of 3.1415, one would say:
+ *
+ * rational_best_approximation(31415, 10000,
+ *		(1 << 8) - 1, (1 << 5) - 1, &n, &d);
+ *
+ * you may look at given_numerator as a fixed point number,
+ * with the fractional part size described in given_denominator.
+ *
+ * for theoretical background, see:
+ * http://en.wikipedia.org/wiki/Continued_fraction
+ */
+
+void rational_best_approximation(
+	unsigned long given_numerator, unsigned long given_denominator,
+	unsigned long max_numerator, unsigned long max_denominator,
+	unsigned long *best_numerator, unsigned long *best_denominator)
+{
+	unsigned long n, d, n0, d0, n1, d1;
+	n = given_numerator;
+	d = given_denominator;
+	n0 = d1 = 0;
+	n1 = d0 = 1;
+	for (;;) {
+		unsigned long t, a;
+		if ((n1 > max_numerator) || (d1 > max_denominator)) {
+			n1 = n0;
+			d1 = d0;
+			break;
+		}
+		if (d == 0)
+			break;
+		t = d;
+		a = n / d;
+		d = n % d;
+		n = t;
+		t = n0 + a * n1;
+		n0 = n1;
+		n1 = t;
+		t = d0 + a * d1;
+		d0 = d1;
+		d1 = t;
+	}
+	*best_numerator = n1;
+	*best_denominator = d1;
+}
+
+EXPORT_SYMBOL(rational_best_approximation);
diff --git a/lib/math/reciprocal_div.c b/lib/math/reciprocal_div.c
new file mode 100644
index 000000000000..bf043258fa00
--- /dev/null
+++ b/lib/math/reciprocal_div.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <asm/div64.h>
+#include <linux/reciprocal_div.h>
+#include <linux/export.h>
+
+/*
+ * For a description of the algorithm please have a look at
+ * include/linux/reciprocal_div.h
+ */
+
+struct reciprocal_value reciprocal_value(u32 d)
+{
+	struct reciprocal_value R;
+	u64 m;
+	int l;
+
+	l = fls(d - 1);
+	m = ((1ULL << 32) * ((1ULL << l) - d));
+	do_div(m, d);
+	++m;
+	R.m = (u32)m;
+	R.sh1 = min(l, 1);
+	R.sh2 = max(l - 1, 0);
+
+	return R;
+}
+EXPORT_SYMBOL(reciprocal_value);
+
+struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec)
+{
+	struct reciprocal_value_adv R;
+	u32 l, post_shift;
+	u64 mhigh, mlow;
+
+	/* ceil(log2(d)) */
+	l = fls(d - 1);
+	/* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to
+	 * be handled before calling "reciprocal_value_adv", please see the
+	 * comment at include/linux/reciprocal_div.h.
+	 */
+	WARN(l == 32,
+	     "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor",
+	     d, __func__);
+	post_shift = l;
+	mlow = 1ULL << (32 + l);
+	do_div(mlow, d);
+	mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec));
+	do_div(mhigh, d);
+
+	for (; post_shift > 0; post_shift--) {
+		u64 lo = mlow >> 1, hi = mhigh >> 1;
+
+		if (lo >= hi)
+			break;
+
+		mlow = lo;
+		mhigh = hi;
+	}
+
+	R.m = (u32)mhigh;
+	R.sh = post_shift;
+	R.exp = l;
+	R.is_wide_m = mhigh > U32_MAX;
+
+	return R;
+}
+EXPORT_SYMBOL(reciprocal_value_adv);
diff --git a/lib/prime_numbers.c b/lib/prime_numbers.c
deleted file mode 100644
index 550eec457c2e..000000000000
--- a/lib/prime_numbers.c
+++ /dev/null
@@ -1,315 +0,0 @@
-#define pr_fmt(fmt) "prime numbers: " fmt "\n"
-
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/prime_numbers.h>
-#include <linux/slab.h>
-
-#define bitmap_size(nbits) (BITS_TO_LONGS(nbits) * sizeof(unsigned long))
-
-struct primes {
-	struct rcu_head rcu;
-	unsigned long last, sz;
-	unsigned long primes[];
-};
-
-#if BITS_PER_LONG == 64
-static const struct primes small_primes = {
-	.last = 61,
-	.sz = 64,
-	.primes = {
-		BIT(2) |
-		BIT(3) |
-		BIT(5) |
-		BIT(7) |
-		BIT(11) |
-		BIT(13) |
-		BIT(17) |
-		BIT(19) |
-		BIT(23) |
-		BIT(29) |
-		BIT(31) |
-		BIT(37) |
-		BIT(41) |
-		BIT(43) |
-		BIT(47) |
-		BIT(53) |
-		BIT(59) |
-		BIT(61)
-	}
-};
-#elif BITS_PER_LONG == 32
-static const struct primes small_primes = {
-	.last = 31,
-	.sz = 32,
-	.primes = {
-		BIT(2) |
-		BIT(3) |
-		BIT(5) |
-		BIT(7) |
-		BIT(11) |
-		BIT(13) |
-		BIT(17) |
-		BIT(19) |
-		BIT(23) |
-		BIT(29) |
-		BIT(31)
-	}
-};
-#else
-#error "unhandled BITS_PER_LONG"
-#endif
-
-static DEFINE_MUTEX(lock);
-static const struct primes __rcu *primes = RCU_INITIALIZER(&small_primes);
-
-static unsigned long selftest_max;
-
-static bool slow_is_prime_number(unsigned long x)
-{
-	unsigned long y = int_sqrt(x);
-
-	while (y > 1) {
-		if ((x % y) == 0)
-			break;
-		y--;
-	}
-
-	return y == 1;
-}
-
-static unsigned long slow_next_prime_number(unsigned long x)
-{
-	while (x < ULONG_MAX && !slow_is_prime_number(++x))
-		;
-
-	return x;
-}
-
-static unsigned long clear_multiples(unsigned long x,
-				     unsigned long *p,
-				     unsigned long start,
-				     unsigned long end)
-{
-	unsigned long m;
-
-	m = 2 * x;
-	if (m < start)
-		m = roundup(start, x);
-
-	while (m < end) {
-		__clear_bit(m, p);
-		m += x;
-	}
-
-	return x;
-}
-
-static bool expand_to_next_prime(unsigned long x)
-{
-	const struct primes *p;
-	struct primes *new;
-	unsigned long sz, y;
-
-	/* Betrand's Postulate (or Chebyshev's theorem) states that if n > 3,
-	 * there is always at least one prime p between n and 2n - 2.
-	 * Equivalently, if n > 1, then there is always at least one prime p
-	 * such that n < p < 2n.
-	 *
-	 * http://mathworld.wolfram.com/BertrandsPostulate.html
-	 * https://en.wikipedia.org/wiki/Bertrand's_postulate
-	 */
-	sz = 2 * x;
-	if (sz < x)
-		return false;
-
-	sz = round_up(sz, BITS_PER_LONG);
-	new = kmalloc(sizeof(*new) + bitmap_size(sz),
-		      GFP_KERNEL | __GFP_NOWARN);
-	if (!new)
-		return false;
-
-	mutex_lock(&lock);
-	p = rcu_dereference_protected(primes, lockdep_is_held(&lock));
-	if (x < p->last) {
-		kfree(new);
-		goto unlock;
-	}
-
-	/* Where memory permits, track the primes using the
-	 * Sieve of Eratosthenes. The sieve is to remove all multiples of known
-	 * primes from the set, what remains in the set is therefore prime.
-	 */
-	bitmap_fill(new->primes, sz);
-	bitmap_copy(new->primes, p->primes, p->sz);
-	for (y = 2UL; y < sz; y = find_next_bit(new->primes, sz, y + 1))
-		new->last = clear_multiples(y, new->primes, p->sz, sz);
-	new->sz = sz;
-
-	BUG_ON(new->last <= x);
-
-	rcu_assign_pointer(primes, new);
-	if (p != &small_primes)
-		kfree_rcu((struct primes *)p, rcu);
-
-unlock:
-	mutex_unlock(&lock);
-	return true;
-}
-
-static void free_primes(void)
-{
-	const struct primes *p;
-
-	mutex_lock(&lock);
-	p = rcu_dereference_protected(primes, lockdep_is_held(&lock));
-	if (p != &small_primes) {
-		rcu_assign_pointer(primes, &small_primes);
-		kfree_rcu((struct primes *)p, rcu);
-	}
-	mutex_unlock(&lock);
-}
-
-/**
- * next_prime_number - return the next prime number
- * @x: the starting point for searching to test
- *
- * A prime number is an integer greater than 1 that is only divisible by
- * itself and 1.  The set of prime numbers is computed using the Sieve of
- * Eratoshenes (on finding a prime, all multiples of that prime are removed
- * from the set) enabling a fast lookup of the next prime number larger than
- * @x. If the sieve fails (memory limitation), the search falls back to using
- * slow trial-divison, up to the value of ULONG_MAX (which is reported as the
- * final prime as a sentinel).
- *
- * Returns: the next prime number larger than @x
- */
-unsigned long next_prime_number(unsigned long x)
-{
-	const struct primes *p;
-
-	rcu_read_lock();
-	p = rcu_dereference(primes);
-	while (x >= p->last) {
-		rcu_read_unlock();
-
-		if (!expand_to_next_prime(x))
-			return slow_next_prime_number(x);
-
-		rcu_read_lock();
-		p = rcu_dereference(primes);
-	}
-	x = find_next_bit(p->primes, p->last, x + 1);
-	rcu_read_unlock();
-
-	return x;
-}
-EXPORT_SYMBOL(next_prime_number);
-
-/**
- * is_prime_number - test whether the given number is prime
- * @x: the number to test
- *
- * A prime number is an integer greater than 1 that is only divisible by
- * itself and 1. Internally a cache of prime numbers is kept (to speed up
- * searching for sequential primes, see next_prime_number()), but if the number
- * falls outside of that cache, its primality is tested using trial-divison.
- *
- * Returns: true if @x is prime, false for composite numbers.
- */
-bool is_prime_number(unsigned long x)
-{
-	const struct primes *p;
-	bool result;
-
-	rcu_read_lock();
-	p = rcu_dereference(primes);
-	while (x >= p->sz) {
-		rcu_read_unlock();
-
-		if (!expand_to_next_prime(x))
-			return slow_is_prime_number(x);
-
-		rcu_read_lock();
-		p = rcu_dereference(primes);
-	}
-	result = test_bit(x, p->primes);
-	rcu_read_unlock();
-
-	return result;
-}
-EXPORT_SYMBOL(is_prime_number);
-
-static void dump_primes(void)
-{
-	const struct primes *p;
-	char *buf;
-
-	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
-
-	rcu_read_lock();
-	p = rcu_dereference(primes);
-
-	if (buf)
-		bitmap_print_to_pagebuf(true, buf, p->primes, p->sz);
-	pr_info("primes.{last=%lu, .sz=%lu, .primes[]=...x%lx} = %s",
-		p->last, p->sz, p->primes[BITS_TO_LONGS(p->sz) - 1], buf);
-
-	rcu_read_unlock();
-
-	kfree(buf);
-}
-
-static int selftest(unsigned long max)
-{
-	unsigned long x, last;
-
-	if (!max)
-		return 0;
-
-	for (last = 0, x = 2; x < max; x++) {
-		bool slow = slow_is_prime_number(x);
-		bool fast = is_prime_number(x);
-
-		if (slow != fast) {
-			pr_err("inconsistent result for is-prime(%lu): slow=%s, fast=%s!",
-			       x, slow ? "yes" : "no", fast ? "yes" : "no");
-			goto err;
-		}
-
-		if (!slow)
-			continue;
-
-		if (next_prime_number(last) != x) {
-			pr_err("incorrect result for next-prime(%lu): expected %lu, got %lu",
-			       last, x, next_prime_number(last));
-			goto err;
-		}
-		last = x;
-	}
-
-	pr_info("selftest(%lu) passed, last prime was %lu", x, last);
-	return 0;
-
-err:
-	dump_primes();
-	return -EINVAL;
-}
-
-static int __init primes_init(void)
-{
-	return selftest(selftest_max);
-}
-
-static void __exit primes_exit(void)
-{
-	free_primes();
-}
-
-module_init(primes_init);
-module_exit(primes_exit);
-
-module_param_named(selftest, selftest_max, ulong, 0400);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
diff --git a/lib/rational.c b/lib/rational.c
deleted file mode 100644
index ba7443677c90..000000000000
--- a/lib/rational.c
+++ /dev/null
@@ -1,65 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * rational fractions
- *
- * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <oskar@scara.com>
- *
- * helper functions when coping with rational numbers
- */
-
-#include <linux/rational.h>
-#include <linux/compiler.h>
-#include <linux/export.h>
-
-/*
- * calculate best rational approximation for a given fraction
- * taking into account restricted register size, e.g. to find
- * appropriate values for a pll with 5 bit denominator and
- * 8 bit numerator register fields, trying to set up with a
- * frequency ratio of 3.1415, one would say:
- *
- * rational_best_approximation(31415, 10000,
- *		(1 << 8) - 1, (1 << 5) - 1, &n, &d);
- *
- * you may look at given_numerator as a fixed point number,
- * with the fractional part size described in given_denominator.
- *
- * for theoretical background, see:
- * http://en.wikipedia.org/wiki/Continued_fraction
- */
-
-void rational_best_approximation(
-	unsigned long given_numerator, unsigned long given_denominator,
-	unsigned long max_numerator, unsigned long max_denominator,
-	unsigned long *best_numerator, unsigned long *best_denominator)
-{
-	unsigned long n, d, n0, d0, n1, d1;
-	n = given_numerator;
-	d = given_denominator;
-	n0 = d1 = 0;
-	n1 = d0 = 1;
-	for (;;) {
-		unsigned long t, a;
-		if ((n1 > max_numerator) || (d1 > max_denominator)) {
-			n1 = n0;
-			d1 = d0;
-			break;
-		}
-		if (d == 0)
-			break;
-		t = d;
-		a = n / d;
-		d = n % d;
-		n = t;
-		t = n0 + a * n1;
-		n0 = n1;
-		n1 = t;
-		t = d0 + a * d1;
-		d0 = d1;
-		d1 = t;
-	}
-	*best_numerator = n1;
-	*best_denominator = d1;
-}
-
-EXPORT_SYMBOL(rational_best_approximation);
diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c
deleted file mode 100644
index bf043258fa00..000000000000
--- a/lib/reciprocal_div.c
+++ /dev/null
@@ -1,69 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/bug.h>
-#include <linux/kernel.h>
-#include <asm/div64.h>
-#include <linux/reciprocal_div.h>
-#include <linux/export.h>
-
-/*
- * For a description of the algorithm please have a look at
- * include/linux/reciprocal_div.h
- */
-
-struct reciprocal_value reciprocal_value(u32 d)
-{
-	struct reciprocal_value R;
-	u64 m;
-	int l;
-
-	l = fls(d - 1);
-	m = ((1ULL << 32) * ((1ULL << l) - d));
-	do_div(m, d);
-	++m;
-	R.m = (u32)m;
-	R.sh1 = min(l, 1);
-	R.sh2 = max(l - 1, 0);
-
-	return R;
-}
-EXPORT_SYMBOL(reciprocal_value);
-
-struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec)
-{
-	struct reciprocal_value_adv R;
-	u32 l, post_shift;
-	u64 mhigh, mlow;
-
-	/* ceil(log2(d)) */
-	l = fls(d - 1);
-	/* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to
-	 * be handled before calling "reciprocal_value_adv", please see the
-	 * comment at include/linux/reciprocal_div.h.
-	 */
-	WARN(l == 32,
-	     "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor",
-	     d, __func__);
-	post_shift = l;
-	mlow = 1ULL << (32 + l);
-	do_div(mlow, d);
-	mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec));
-	do_div(mhigh, d);
-
-	for (; post_shift > 0; post_shift--) {
-		u64 lo = mlow >> 1, hi = mhigh >> 1;
-
-		if (lo >= hi)
-			break;
-
-		mlow = lo;
-		mhigh = hi;
-	}
-
-	R.m = (u32)mhigh;
-	R.sh = post_shift;
-	R.exp = l;
-	R.is_wide_m = mhigh > U32_MAX;
-
-	return R;
-}
-EXPORT_SYMBOL(reciprocal_value_adv);
-- 
cgit 


From 9f6158946987a5ce3f16da097d18f240a89db417 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 14 May 2019 15:43:08 -0700
Subject: lib/math: move int_pow() from pwm_bl.c for wider use

The integer exponentiation is used in few places and might be used in
the future by other call sites.  Move it to wider use.

Link: http://lkml.kernel.org/r/20190323172531.80025-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Ray Jui <rjui@broadcom.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/math/Makefile  |  2 +-
 lib/math/int_pow.c | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 lib/math/int_pow.c

(limited to 'lib')

diff --git a/lib/math/Makefile b/lib/math/Makefile
index b75878420da6..583bbfebfc09 100644
--- a/lib/math/Makefile
+++ b/lib/math/Makefile
@@ -1,4 +1,4 @@
-obj-y += div64.o gcd.o lcm.o int_sqrt.o reciprocal_div.o
+obj-y += div64.o gcd.o lcm.o int_pow.o int_sqrt.o reciprocal_div.o
 
 obj-$(CONFIG_CORDIC)		+= cordic.o
 obj-$(CONFIG_PRIME_NUMBERS)	+= prime_numbers.o
diff --git a/lib/math/int_pow.c b/lib/math/int_pow.c
new file mode 100644
index 000000000000..622fc1ab3c74
--- /dev/null
+++ b/lib/math/int_pow.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * An integer based power function
+ *
+ * Derived from drivers/video/backlight/pwm_bl.c
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+/**
+ * int_pow - computes the exponentiation of the given base and exponent
+ * @base: base which will be raised to the given power
+ * @exp: power to be raised to
+ *
+ * Computes: pow(base, exp), i.e. @base raised to the @exp power
+ */
+u64 int_pow(u64 base, unsigned int exp)
+{
+	u64 result = 1;
+
+	while (exp) {
+		if (exp & 1)
+			result *= base;
+		exp >>= 1;
+		base *= base;
+	}
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(int_pow);
-- 
cgit 


From 281327c99bcaa8dbd4fe49cc32178dc59b0e61b8 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Tue, 14 May 2019 15:43:11 -0700
Subject: lib: make bitmap_parselist_user() a wrapper on bitmap_parselist()

Patch series "lib: rework bitmap_parselist and tests", v5.

bitmap_parselist has been evolved from a pretty simple idea for long and
now lacks for refactoring.  It is not structured, has nested loops and a
set of opaque-named variables.

Things are more complicated because bitmap_parselist() is a part of user
interface, and its behavior should not change.

In this patchset
 - bitmap_parselist_user() made a wrapper on bitmap_parselist();
 - bitmap_parselist() reworked (patch 2);
 - time measurement in test_bitmap_parselist switched to ktime_get
   (patch 3);
 - new tests introduced (patch 4), and
 - bitmap_parselist_user() testing enabled with the same testset as
   bitmap_parselist() (patch 5).

This patch (of 5):

Currently we parse user data byte after byte which leads to
overcomplification of parsing algorithm.  The only user of
bitmap_parselist_user() is not performance-critical, and so we can
duplicate user data to kernel buffer and simply call bitmap_parselist().
This rework lets us unify and simplify bitmap_parselist() and
bitmap_parselist_user(), which is done in the following patch.

Link: http://lkml.kernel.org/r/20190405173211.11373-2-ynorov@marvell.com
Signed-off-by: Yury Norov <ynorov@marvell.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Mike Travis <travis@sgi.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/bitmap.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'lib')

diff --git a/lib/bitmap.c b/lib/bitmap.c
index 3f3b8051f342..c63ddd06a5da 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -632,19 +632,22 @@ EXPORT_SYMBOL(bitmap_parselist);
  * @nmaskbits: size of bitmap, in bits.
  *
  * Wrapper for bitmap_parselist(), providing it with user buffer.
- *
- * We cannot have this as an inline function in bitmap.h because it needs
- * linux/uaccess.h to get the access_ok() declaration and this causes
- * cyclic dependencies.
  */
 int bitmap_parselist_user(const char __user *ubuf,
 			unsigned int ulen, unsigned long *maskp,
 			int nmaskbits)
 {
-	if (!access_ok(ubuf, ulen))
-		return -EFAULT;
-	return __bitmap_parselist((const char __force *)ubuf,
-					ulen, 1, maskp, nmaskbits);
+	char *buf;
+	int ret;
+
+	buf = memdup_user_nul(ubuf, ulen);
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	ret = bitmap_parselist(buf, maskp, nmaskbits);
+
+	kfree(buf);
+	return ret;
 }
 EXPORT_SYMBOL(bitmap_parselist_user);
 
-- 
cgit 


From e371c481d89cd6b9db72e077efd64059dfc87711 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Tue, 14 May 2019 15:43:14 -0700
Subject: lib: rework bitmap_parselist

Remove __bitmap_parselist helper and split the function to logical
parts.

[ynorov@marvell.com: v5]
  Link: http://lkml.kernel.org/r/20190416063801.20134-3-ynorov@marvell.com
Link: http://lkml.kernel.org/r/20190405173211.11373-3-ynorov@marvell.com
Signed-off-by: Yury Norov <ynorov@marvell.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Travis <travis@sgi.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/bitmap.c | 255 +++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 142 insertions(+), 113 deletions(-)

(limited to 'lib')

diff --git a/lib/bitmap.c b/lib/bitmap.c
index c63ddd06a5da..f235434df87b 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -20,6 +20,8 @@
 
 #include <asm/page.h>
 
+#include "kstrtox.h"
+
 /**
  * DOC: bitmap introduction
  *
@@ -477,12 +479,128 @@ int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp,
 }
 EXPORT_SYMBOL(bitmap_print_to_pagebuf);
 
+/*
+ * Region 9-38:4/10 describes the following bitmap structure:
+ * 0	   9  12    18			38
+ * .........****......****......****......
+ *	    ^  ^     ^			 ^
+ *      start  off   group_len	       end
+ */
+struct region {
+	unsigned int start;
+	unsigned int off;
+	unsigned int group_len;
+	unsigned int end;
+};
+
+static int bitmap_set_region(const struct region *r,
+				unsigned long *bitmap, int nbits)
+{
+	unsigned int start;
+
+	if (r->end >= nbits)
+		return -ERANGE;
+
+	for (start = r->start; start <= r->end; start += r->group_len)
+		bitmap_set(bitmap, start, min(r->end - start + 1, r->off));
+
+	return 0;
+}
+
+static int bitmap_check_region(const struct region *r)
+{
+	if (r->start > r->end || r->group_len == 0 || r->off > r->group_len)
+		return -EINVAL;
+
+	return 0;
+}
+
+static const char *bitmap_getnum(const char *str, unsigned int *num)
+{
+	unsigned long long n;
+	unsigned int len;
+
+	len = _parse_integer(str, 10, &n);
+	if (!len)
+		return ERR_PTR(-EINVAL);
+	if (len & KSTRTOX_OVERFLOW || n != (unsigned int)n)
+		return ERR_PTR(-EOVERFLOW);
+
+	*num = n;
+	return str + len;
+}
+
+static inline bool end_of_str(char c)
+{
+	return c == '\0' || c == '\n';
+}
+
+static inline bool __end_of_region(char c)
+{
+	return isspace(c) || c == ',';
+}
+
+static inline bool end_of_region(char c)
+{
+	return __end_of_region(c) || end_of_str(c);
+}
+
+/*
+ * The format allows commas and whitespases at the beginning
+ * of the region.
+ */
+static const char *bitmap_find_region(const char *str)
+{
+	while (__end_of_region(*str))
+		str++;
+
+	return end_of_str(*str) ? NULL : str;
+}
+
+static const char *bitmap_parse_region(const char *str, struct region *r)
+{
+	str = bitmap_getnum(str, &r->start);
+	if (IS_ERR(str))
+		return str;
+
+	if (end_of_region(*str))
+		goto no_end;
+
+	if (*str != '-')
+		return ERR_PTR(-EINVAL);
+
+	str = bitmap_getnum(str + 1, &r->end);
+	if (IS_ERR(str))
+		return str;
+
+	if (end_of_region(*str))
+		goto no_pattern;
+
+	if (*str != ':')
+		return ERR_PTR(-EINVAL);
+
+	str = bitmap_getnum(str + 1, &r->off);
+	if (IS_ERR(str))
+		return str;
+
+	if (*str != '/')
+		return ERR_PTR(-EINVAL);
+
+	return bitmap_getnum(str + 1, &r->group_len);
+
+no_end:
+	r->end = r->start;
+no_pattern:
+	r->off = r->end + 1;
+	r->group_len = r->end + 1;
+
+	return end_of_str(*str) ? NULL : str;
+}
+
 /**
- * __bitmap_parselist - convert list format ASCII string to bitmap
- * @buf: read nul-terminated user string from this buffer
- * @buflen: buffer size in bytes.  If string is smaller than this
- *    then it must be terminated with a \0.
- * @is_user: location of buffer, 0 indicates kernel space
+ * bitmap_parselist - convert list format ASCII string to bitmap
+ * @buf: read user string from this buffer; must be terminated
+ *    with a \0 or \n.
  * @maskp: write resulting mask here
  * @nmaskbits: number of bits in mask to be written
  *
@@ -498,127 +616,38 @@ EXPORT_SYMBOL(bitmap_print_to_pagebuf);
  *
  * Returns: 0 on success, -errno on invalid input strings. Error values:
  *
- *   - ``-EINVAL``: second number in range smaller than first
+ *   - ``-EINVAL``: wrong region format
  *   - ``-EINVAL``: invalid character in string
  *   - ``-ERANGE``: bit number specified too large for mask
+ *   - ``-EOVERFLOW``: integer overflow in the input parameters
  */
-static int __bitmap_parselist(const char *buf, unsigned int buflen,
-		int is_user, unsigned long *maskp,
-		int nmaskbits)
+int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits)
 {
-	unsigned int a, b, old_a, old_b;
-	unsigned int group_size, used_size, off;
-	int c, old_c, totaldigits, ndigits;
-	const char __user __force *ubuf = (const char __user __force *)buf;
-	int at_start, in_range, in_partial_range;
+	struct region r;
+	long ret;
 
-	totaldigits = c = 0;
-	old_a = old_b = 0;
-	group_size = used_size = 0;
 	bitmap_zero(maskp, nmaskbits);
-	do {
-		at_start = 1;
-		in_range = 0;
-		in_partial_range = 0;
-		a = b = 0;
-		ndigits = totaldigits;
-
-		/* Get the next cpu# or a range of cpu#'s */
-		while (buflen) {
-			old_c = c;
-			if (is_user) {
-				if (__get_user(c, ubuf++))
-					return -EFAULT;
-			} else
-				c = *buf++;
-			buflen--;
-			if (isspace(c))
-				continue;
 
-			/* A '\0' or a ',' signal the end of a cpu# or range */
-			if (c == '\0' || c == ',')
-				break;
-			/*
-			* whitespaces between digits are not allowed,
-			* but it's ok if whitespaces are on head or tail.
-			* when old_c is whilespace,
-			* if totaldigits == ndigits, whitespace is on head.
-			* if whitespace is on tail, it should not run here.
-			* as c was ',' or '\0',
-			* the last code line has broken the current loop.
-			*/
-			if ((totaldigits != ndigits) && isspace(old_c))
-				return -EINVAL;
-
-			if (c == '/') {
-				used_size = a;
-				at_start = 1;
-				in_range = 0;
-				a = b = 0;
-				continue;
-			}
+	while (buf) {
+		buf = bitmap_find_region(buf);
+		if (buf == NULL)
+			return 0;
 
-			if (c == ':') {
-				old_a = a;
-				old_b = b;
-				at_start = 1;
-				in_range = 0;
-				in_partial_range = 1;
-				a = b = 0;
-				continue;
-			}
+		buf = bitmap_parse_region(buf, &r);
+		if (IS_ERR(buf))
+			return PTR_ERR(buf);
 
-			if (c == '-') {
-				if (at_start || in_range)
-					return -EINVAL;
-				b = 0;
-				in_range = 1;
-				at_start = 1;
-				continue;
-			}
+		ret = bitmap_check_region(&r);
+		if (ret)
+			return ret;
 
-			if (!isdigit(c))
-				return -EINVAL;
+		ret = bitmap_set_region(&r, maskp, nmaskbits);
+		if (ret)
+			return ret;
+	}
 
-			b = b * 10 + (c - '0');
-			if (!in_range)
-				a = b;
-			at_start = 0;
-			totaldigits++;
-		}
-		if (ndigits == totaldigits)
-			continue;
-		if (in_partial_range) {
-			group_size = a;
-			a = old_a;
-			b = old_b;
-			old_a = old_b = 0;
-		} else {
-			used_size = group_size = b - a + 1;
-		}
-		/* if no digit is after '-', it's wrong*/
-		if (at_start && in_range)
-			return -EINVAL;
-		if (!(a <= b) || group_size == 0 || !(used_size <= group_size))
-			return -EINVAL;
-		if (b >= nmaskbits)
-			return -ERANGE;
-		while (a <= b) {
-			off = min(b - a + 1, used_size);
-			bitmap_set(maskp, a, off);
-			a += group_size;
-		}
-	} while (buflen && c == ',');
 	return 0;
 }
-
-int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
-{
-	char *nl  = strchrnul(bp, '\n');
-	int len = nl - bp;
-
-	return __bitmap_parselist(bp, len, 0, maskp, nmaskbits);
-}
 EXPORT_SYMBOL(bitmap_parselist);
 
 
-- 
cgit 


From 0c2111a5c852aa0ded7c5644a58058df80ed58bb Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Tue, 14 May 2019 15:43:18 -0700
Subject: lib/test_bitmap: switch test_bitmap_parselist to ktime_get()

test_bitmap_parselist currently uses get_cycles which is not implemented
on some platforms, so use ktime_get() instead.

Link: http://lkml.kernel.org/r/20190405173211.11373-4-ynorov@marvell.com
Signed-off-by: Yury Norov <ynorov@marvell.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Travis <travis@sgi.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_bitmap.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 792d90608052..63d4a21955f0 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -268,15 +268,15 @@ static void __init test_bitmap_parselist(void)
 {
 	int i;
 	int err;
-	cycles_t cycles;
+	ktime_t time;
 	DECLARE_BITMAP(bmap, 2048);
 
 	for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) {
 #define ptest parselist_tests[i]
 
-		cycles = get_cycles();
+		time = ktime_get();
 		err = bitmap_parselist(ptest.in, bmap, ptest.nbits);
-		cycles = get_cycles() - cycles;
+		time = ktime_get() - time;
 
 		if (err != ptest.errno) {
 			pr_err("test %d: input is %s, errno is %d, expected %d\n",
@@ -293,8 +293,7 @@ static void __init test_bitmap_parselist(void)
 
 		if (ptest.flags & PARSE_TIME)
 			pr_err("test %d: input is '%s' OK, Time: %llu\n",
-					i, ptest.in,
-					(unsigned long long)cycles);
+					i, ptest.in, time);
 	}
 }
 
-- 
cgit 


From a4ab50509c76a03d338f434a271494b450250f0d Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Tue, 14 May 2019 15:43:21 -0700
Subject: lib/test_bitmap: add testcases for bitmap_parselist()

Add tests for non-number character, empty regions, integer overflow.

[ynorov@marvell.com: v5]
  Link: http://lkml.kernel.org/r/20190416063801.20134-5-ynorov@marvell.com
Link: http://lkml.kernel.org/r/20190405173211.11373-5-ynorov@marvell.com
Signed-off-by: Yury Norov <ynorov@marvell.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Travis <travis@sgi.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_bitmap.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 63d4a21955f0..6640a82ad44b 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -226,7 +226,8 @@ static const unsigned long exp[] __initconst = {
 	BITMAP_FROM_U64(0xffffffff),
 	BITMAP_FROM_U64(0xfffffffe),
 	BITMAP_FROM_U64(0x3333333311111111ULL),
-	BITMAP_FROM_U64(0xffffffff77777777ULL)
+	BITMAP_FROM_U64(0xffffffff77777777ULL),
+	BITMAP_FROM_U64(0),
 };
 
 static const unsigned long exp2[] __initconst = {
@@ -249,19 +250,34 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
 	{0, "1-31:4/4",			&exp[9 * step], 32, 0},
 	{0, "0-31:1/4,32-63:2/4",	&exp[10 * step], 64, 0},
 	{0, "0-31:3/4,32-63:4/4",	&exp[11 * step], 64, 0},
+	{0, "  ,,  0-31:3/4  ,, 32-63:4/4  ,,  ",	&exp[11 * step], 64, 0},
 
 	{0, "0-31:1/4,32-63:2/4,64-95:3/4,96-127:4/4",	exp2, 128, 0},
 
 	{0, "0-2047:128/256", NULL, 2048, PARSE_TIME},
 
+	{0, "",				&exp[12 * step], 8, 0},
+	{0, "\n",			&exp[12 * step], 8, 0},
+	{0, ",,  ,,  , ,  ,",		&exp[12 * step], 8, 0},
+	{0, " ,  ,,  , ,   ",		&exp[12 * step], 8, 0},
+	{0, " ,  ,,  , ,   \n",		&exp[12 * step], 8, 0},
+
 	{-EINVAL, "-1",	NULL, 8, 0},
 	{-EINVAL, "-0",	NULL, 8, 0},
 	{-EINVAL, "10-1", NULL, 8, 0},
 	{-EINVAL, "0-31:", NULL, 8, 0},
 	{-EINVAL, "0-31:0", NULL, 8, 0},
+	{-EINVAL, "0-31:0/", NULL, 8, 0},
 	{-EINVAL, "0-31:0/0", NULL, 8, 0},
 	{-EINVAL, "0-31:1/0", NULL, 8, 0},
 	{-EINVAL, "0-31:10/1", NULL, 8, 0},
+	{-EOVERFLOW, "0-98765432123456789:10/1", NULL, 8, 0},
+
+	{-EINVAL, "a-31", NULL, 8, 0},
+	{-EINVAL, "0-a1", NULL, 8, 0},
+	{-EINVAL, "a-31:10/1", NULL, 8, 0},
+	{-EINVAL, "0-31:a/1", NULL, 8, 0},
+	{-EINVAL, "0-\n", NULL, 8, 0},
 };
 
 static void __init test_bitmap_parselist(void)
-- 
cgit 


From 6ea86bdfc169ba9df8484e9d2bb250a874b03c47 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Tue, 14 May 2019 15:43:24 -0700
Subject: lib/test_bitmap: add tests for bitmap_parselist_user()

Propagate existing bitmap_parselist() tests to bitmap_parselist_user().

Link: http://lkml.kernel.org/r/20190405173211.11373-6-ynorov@marvell.com
Signed-off-by: Yury Norov <ynorov@marvell.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Travis <travis@sgi.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_bitmap.c | 46 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 10 deletions(-)

(limited to 'lib')

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 6640a82ad44b..d3a501f2a81a 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -11,6 +11,7 @@
 #include <linux/printk.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/uaccess.h>
 
 #include "../tools/testing/selftests/kselftest_module.h"
 
@@ -280,39 +281,63 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
 	{-EINVAL, "0-\n", NULL, 8, 0},
 };
 
-static void __init test_bitmap_parselist(void)
+static void __init __test_bitmap_parselist(int is_user)
 {
 	int i;
 	int err;
 	ktime_t time;
 	DECLARE_BITMAP(bmap, 2048);
+	char *mode = is_user ? "_user"  : "";
 
 	for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) {
 #define ptest parselist_tests[i]
 
-		time = ktime_get();
-		err = bitmap_parselist(ptest.in, bmap, ptest.nbits);
-		time = ktime_get() - time;
+		if (is_user) {
+			mm_segment_t orig_fs = get_fs();
+			size_t len = strlen(ptest.in);
+
+			set_fs(KERNEL_DS);
+			time = ktime_get();
+			err = bitmap_parselist_user(ptest.in, len,
+						    bmap, ptest.nbits);
+			time = ktime_get() - time;
+			set_fs(orig_fs);
+		} else {
+			time = ktime_get();
+			err = bitmap_parselist(ptest.in, bmap, ptest.nbits);
+			time = ktime_get() - time;
+		}
 
 		if (err != ptest.errno) {
-			pr_err("test %d: input is %s, errno is %d, expected %d\n",
-					i, ptest.in, err, ptest.errno);
+			pr_err("parselist%s: %d: input is %s, errno is %d, expected %d\n",
+					mode, i, ptest.in, err, ptest.errno);
 			continue;
 		}
 
 		if (!err && ptest.expected
 			 && !__bitmap_equal(bmap, ptest.expected, ptest.nbits)) {
-			pr_err("test %d: input is %s, result is 0x%lx, expected 0x%lx\n",
-					i, ptest.in, bmap[0], *ptest.expected);
+			pr_err("parselist%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
+					mode, i, ptest.in, bmap[0],
+					*ptest.expected);
 			continue;
 		}
 
 		if (ptest.flags & PARSE_TIME)
-			pr_err("test %d: input is '%s' OK, Time: %llu\n",
-					i, ptest.in, time);
+			pr_err("parselist%s: %d: input is '%s' OK, Time: %llu\n",
+					mode, i, ptest.in, time);
 	}
 }
 
+static void __init test_bitmap_parselist(void)
+{
+	__test_bitmap_parselist(0);
+}
+
+static void __init test_bitmap_parselist_user(void)
+{
+	__test_bitmap_parselist(1);
+}
+
 #define EXP_BYTES	(sizeof(exp) * 8)
 
 static void __init test_bitmap_arr32(void)
@@ -385,6 +410,7 @@ static void __init selftest(void)
 	test_copy();
 	test_bitmap_arr32();
 	test_bitmap_parselist();
+	test_bitmap_parselist_user();
 	test_mem_optimisations();
 }
 
-- 
cgit 


From 7507c40258726ea7a07374db00e8b55a138de88c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 14 May 2019 15:43:30 -0700
Subject: lib/test_vmalloc.c:test_func(): eliminate local `ret'

Local 'ret' is unneeded and was poorly named: the variable `ret'
generally means the "the value which this function will return".

Cc: Roman Gushchin <guro@fb.com>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_vmalloc.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index f832b095afba..8bbefcaddfe8 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -384,12 +384,11 @@ static int test_func(void *private)
 {
 	struct test_driver *t = private;
 	int random_array[ARRAY_SIZE(test_case_array)];
-	int index, i, j, ret;
+	int index, i, j;
 	ktime_t kt;
 	u64 delta;
 
-	ret = set_cpus_allowed_ptr(current, cpumask_of(t->cpu));
-	if (ret < 0)
+	if (set_cpus_allowed_ptr(current, cpumask_of(t->cpu)) < 0)
 		pr_err("Failed to set affinity to %d CPU\n", t->cpu);
 
 	for (i = 0; i < ARRAY_SIZE(test_case_array); i++)
@@ -415,8 +414,7 @@ static int test_func(void *private)
 
 		kt = ktime_get();
 		for (j = 0; j < test_repeat_count; j++) {
-			ret = test_case_array[index].test_func();
-			if (!ret)
+			if (!test_case_array[index].test_func())
 				per_cpu_test_data[t->cpu][index].test_passed++;
 			else
 				per_cpu_test_data[t->cpu][index].test_failed++;
-- 
cgit 


From c66d7a27b794b43d19a96df03822dd476b03b0a3 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@kernel.org>
Date: Tue, 14 May 2019 15:44:00 -0700
Subject: init: introduce DEBUG_MISC option

Patch series "init: Do not select DEBUG_KERNEL by default", v5.

CONFIG_DEBUG_KERNEL has been designed to just enable Kconfig options.
Kernel code generatoin should not depend on CONFIG_DEBUG_KERNEL.

Proposed alternative plan: let's add a new symbol, something like
DEBUG_MISC ("Miscellaneous debug code that should be under a more
specific debug option but isn't"), make it depend on DEBUG_KERNEL and be
"default DEBUG_KERNEL" but allow itself to be turned off, and then
mechanically change the small handful of "#ifdef CONFIG_DEBUG_KERNEL" to
"#ifdef CONFIG_DEBUG_MISC".

This patch (of 5):

Introduce DEBUG_MISC ("Miscellaneous debug code that should be under a
more specific debug option but isn't"), make it depend on DEBUG_KERNEL
and be "default DEBUG_KERNEL" but allow itself to be turned off, and
then mechanically change the small handful of "#ifdef
CONFIG_DEBUG_KERNEL" to "#ifdef CONFIG_DEBUG_MISC".

Link: http://lkml.kernel.org/r/20190413224438.10802-2-okaya@kernel.org
Signed-off-by: Sinan Kaya <okaya@kernel.org>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Anders Roxell <anders.roxell@linaro.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc:  Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Florian Westphal <fw@strlen.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 181bd56238b0..fdfa173651eb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -460,6 +460,15 @@ config DEBUG_KERNEL
 	  Say Y here if you are developing drivers or trying to debug and
 	  identify kernel problems.
 
+config DEBUG_MISC
+	bool "Miscellaneous debug code"
+	default DEBUG_KERNEL
+	depends on DEBUG_KERNEL
+	help
+	  Say Y here if you need to enable miscellaneous debug code that should
+	  be under a more specific debug option but isn't.
+
+
 menu "Memory Debugging"
 
 source "mm/Kconfig.debug"
-- 
cgit 


From 2ea622b887e74497ce5aac5bfe247502b5786f56 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Tue, 14 May 2019 15:45:10 -0700
Subject: tools/testing/selftests/sysctl/sysctl.sh: add proc_do_large_bitmap()
 test case

The kernel has only two users of proc_do_large_bitmap(), the kernel CPU
watchdog, and the ip_local_reserved_ports.  Refer to watchdog_cpumask
and ip_local_reserved_ports in Documentation for further details on
these.  When you input a large buffer into these, when it is larger than
PAGE_SIZE- 1, the input data gets misparsed, and the user get
incorrectly informed that the desired input value was set.  This commit
implements a test which mimics and exploits that use case, it uses a
bitmap size, as in the watchdog case.  The bitmap is used to test the
bitmap proc handler, proc_do_large_bitmap().

The next commit fixes this issue.

[akpm@linux-foundation.org: move proc_do_large_bitmap() export to EOF]
[mcgrof@kernel.org: use new target description for backward compatibility]
[mcgrof@kernel.org: augment test number to 50, ran into issues with bash string comparisons when testing up to 50 cases.]
[mcgrof@kernel.org: introduce and use verify_diff_proc_file() to use diff]
[mcgrof@kernel.org: use mktemp for tmp file]
[mcgrof@kernel.org: merge shell test and C code]
[mcgrof@kernel.org: commit log love]
[mcgrof@kernel.org: export proc_do_large_bitmap() to allow for the test
[mcgrof@kernel.org: check for the return value when writing to the proc file]
Link: http://lkml.kernel.org/r/20190320222831.8243-6-mcgrof@kernel.org
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_sysctl.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 3dd801c1c85b..566dad3f4196 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -47,6 +47,9 @@ struct test_sysctl_data {
 	unsigned int uint_0001;
 
 	char string_0001[65];
+
+#define SYSCTL_TEST_BITMAP_SIZE	65536
+	unsigned long *bitmap_0001;
 };
 
 static struct test_sysctl_data test_data = {
@@ -102,6 +105,13 @@ static struct ctl_table test_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dostring,
 	},
+	{
+		.procname	= "bitmap_0001",
+		.data		= &test_data.bitmap_0001,
+		.maxlen		= SYSCTL_TEST_BITMAP_SIZE,
+		.mode		= 0644,
+		.proc_handler	= proc_do_large_bitmap,
+	},
 	{ }
 };
 
@@ -129,15 +139,21 @@ static struct ctl_table_header *test_sysctl_header;
 
 static int __init test_sysctl_init(void)
 {
+	test_data.bitmap_0001 = kzalloc(SYSCTL_TEST_BITMAP_SIZE/8, GFP_KERNEL);
+	if (!test_data.bitmap_0001)
+		return -ENOMEM;
 	test_sysctl_header = register_sysctl_table(test_sysctl_root_table);
-	if (!test_sysctl_header)
+	if (!test_sysctl_header) {
+		kfree(test_data.bitmap_0001);
 		return -ENOMEM;
+	}
 	return 0;
 }
 late_initcall(test_sysctl_init);
 
 static void __exit test_sysctl_exit(void)
 {
+	kfree(test_data.bitmap_0001);
 	if (test_sysctl_header)
 		unregister_sysctl_table(test_sysctl_header);
 }
-- 
cgit