From 2821fd0c2be0c4e513b1622d86df9170ef62a6d4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 4 Mar 2019 12:00:09 +0200 Subject: lib/test_printf: Switch to bitmap_zalloc() Switch to bitmap_zalloc() to show clearly what we are allocating. Besides that it returns pointer of bitmap type instead of opaque void *. Link: http://lkml.kernel.org/r/20190304100009.65147-1-andriy.shevchenko@linux.intel.com To: linux-kernel@vger.kernel.org To: Andrew Morton To: linux@rasmusvillemoes.dk Signed-off-by: Andy Shevchenko Signed-off-by: Petr Mladek --- lib/test_printf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index 659b6cc0d483..e8206d8d2d08 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -481,14 +481,14 @@ static void __init large_bitmap(void) { const int nbits = 1 << 16; - unsigned long *bits = kcalloc(BITS_TO_LONGS(nbits), sizeof(long), GFP_KERNEL); + unsigned long *bits = bitmap_zalloc(nbits, GFP_KERNEL); if (!bits) return; bitmap_set(bits, 1, 20); bitmap_set(bits, 60000, 15); test("1-20,60000-60014", "%*pbl", nbits, bits); - kfree(bits); + bitmap_free(bits); } static void __init -- cgit From 4feb7c7a4fbb8f63371be31cda79433c7cf3da86 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 Mar 2019 14:42:40 +1100 Subject: rhashtable: don't hold lock on first table throughout insertion. rhashtable_try_insert() currently holds a lock on the bucket in the first table, while also locking buckets in subsequent tables. This is unnecessary and looks like a hold-over from some earlier version of the implementation. As insert and remove always lock a bucket in each table in turn, and as insert only inserts in the final table, there cannot be any races that are not covered by simply locking a bucket in each table in turn. When an insert call reaches that last table it can be sure that there is no matchinf entry in any other table as it has searched them all, and insertion never happens anywhere but in the last table. The fact that code tests for the existence of future_tbl while holding a lock on the relevant bucket ensures that two threads inserting the same key will make compatible decisions about which is the "last" table. This simplifies the code and allows the ->rehash field to be discarded. We still need a way to ensure that a dead bucket_table is never re-linked by rhashtable_walk_stop(). This can be achieved by calling call_rcu() inside the locked region, and checking with rcu_head_after_call_rcu() in rhashtable_walk_stop() to see if the bucket table is empty and dead. Acked-by: Herbert Xu Reviewed-by: Paul E. McKenney Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 52 ++++++++++++++++------------------------------------ 1 file changed, 16 insertions(+), 36 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 0a105d4af166..776b3a82d3a1 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -197,6 +197,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, return NULL; } + rcu_head_init(&tbl->rcu); INIT_LIST_HEAD(&tbl->walkers); tbl->hash_rnd = get_random_u32(); @@ -280,10 +281,9 @@ static int rhashtable_rehash_chain(struct rhashtable *ht, while (!(err = rhashtable_rehash_one(ht, old_hash))) ; - if (err == -ENOENT) { - old_tbl->rehash++; + if (err == -ENOENT) err = 0; - } + spin_unlock_bh(old_bucket_lock); return err; @@ -330,13 +330,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht) spin_lock(&ht->lock); list_for_each_entry(walker, &old_tbl->walkers, list) walker->tbl = NULL; - spin_unlock(&ht->lock); /* Wait for readers. All new readers will see the new * table, and thus no references to the old table will * remain. + * We do this inside the locked region so that + * rhashtable_walk_stop() can use rcu_head_after_call_rcu() + * to check if it should not re-link the table. */ call_rcu(&old_tbl->rcu, bucket_table_free_rcu); + spin_unlock(&ht->lock); return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } @@ -578,46 +581,22 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, struct bucket_table *new_tbl; struct bucket_table *tbl; unsigned int hash; - spinlock_t *lock; void *data; - tbl = rcu_dereference(ht->tbl); - - /* All insertions must grab the oldest table containing - * the hashed bucket that is yet to be rehashed. - */ - for (;;) { - hash = rht_head_hashfn(ht, tbl, obj, ht->p); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); - - if (tbl->rehash <= hash) - break; - - spin_unlock_bh(lock); - tbl = rht_dereference_rcu(tbl->future_tbl, ht); - } - - data = rhashtable_lookup_one(ht, tbl, hash, key, obj); - new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); - if (PTR_ERR(new_tbl) != -EEXIST) - data = ERR_CAST(new_tbl); + new_tbl = rcu_dereference(ht->tbl); - while (!IS_ERR_OR_NULL(new_tbl)) { + do { tbl = new_tbl; hash = rht_head_hashfn(ht, tbl, obj, ht->p); - spin_lock_nested(rht_bucket_lock(tbl, hash), - SINGLE_DEPTH_NESTING); + spin_lock_bh(rht_bucket_lock(tbl, hash)); data = rhashtable_lookup_one(ht, tbl, hash, key, obj); new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); if (PTR_ERR(new_tbl) != -EEXIST) data = ERR_CAST(new_tbl); - spin_unlock(rht_bucket_lock(tbl, hash)); - } - - spin_unlock_bh(lock); + spin_unlock_bh(rht_bucket_lock(tbl, hash)); + } while (!IS_ERR_OR_NULL(new_tbl)); if (PTR_ERR(data) == -EAGAIN) data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: @@ -939,10 +918,11 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) ht = iter->ht; spin_lock(&ht->lock); - if (tbl->rehash < tbl->size) - list_add(&iter->walker.list, &tbl->walkers); - else + if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu)) + /* This bucket table is being freed, don't re-link it. */ iter->walker.tbl = NULL; + else + list_add(&iter->walker.list, &tbl->walkers); spin_unlock(&ht->lock); out: -- cgit From f7ad68bf98506f48129267438ada1255fc4edfa2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 Mar 2019 14:42:40 +1100 Subject: rhashtable: rename rht_for_each*continue as *from. The pattern set by list.h is that for_each..continue() iterators start at the next entry after the given one, while for_each..from() iterators start at the given entry. The rht_for_each*continue() iterators are documented as though the start at the 'next' entry, but actually start at the given entry, and they are used expecting that behaviour. So fix the documentation and change the names to *from for consistency with list.h Acked-by: Herbert Xu Acked-by: Miguel Ojeda Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 776b3a82d3a1..f65e43fb1ff8 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -490,7 +490,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, elasticity = RHT_ELASTICITY; pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(head, *pprev, tbl, hash) { + rht_for_each_from(head, *pprev, tbl, hash) { struct rhlist_head *list; struct rhlist_head *plist; -- cgit From c03a0fd0b609e2f5c669c2b7f27c8e1928e9196e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 17 Mar 2019 14:02:31 +0900 Subject: kobject: Don't trigger kobject_uevent(KOBJ_REMOVE) twice. syzbot is hitting use-after-free bug in uinput module [1]. This is because kobject_uevent(KOBJ_REMOVE) is called again due to commit 0f4dafc0563c6c49 ("Kobject: auto-cleanup on final unref") after memory allocation fault injection made kobject_uevent(KOBJ_REMOVE) from device_del() from input_unregister_device() fail, while uinput_destroy_device() is expecting that kobject_uevent(KOBJ_REMOVE) is not called after device_del() from input_unregister_device() completed. That commit intended to catch cases where nobody even attempted to send "remove" uevents. But there is no guarantee that an event will ultimately be sent. We are at the point of no return as far as the rest of the kernel is concerned; there are no repeats or do-overs. Also, it is not clear whether some subsystem depends on that commit. If no subsystem depends on that commit, it will be better to remove the state_{add,remove}_uevent_sent logic. But we don't want to risk a regression (in a patch which will be backported) by trying to remove that logic. Therefore, as a first step, let's avoid the use-after-free bug by making sure that kobject_uevent(KOBJ_REMOVE) won't be triggered twice. [1] https://syzkaller.appspot.com/bug?id=8b17c134fe938bbddd75a45afaa9e68af43a362d Reported-by: syzbot Analyzed-by: Dmitry Torokhov Fixes: 0f4dafc0563c6c49 ("Kobject: auto-cleanup on final unref") Cc: Kay Sievers Signed-off-by: Tetsuo Handa Signed-off-by: Greg Kroah-Hartman --- lib/kobject_uevent.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index f05802687ba4..7998affa45d4 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -466,6 +466,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, int i = 0; int retval = 0; + /* + * Mark "remove" event done regardless of result, for some subsystems + * do not want to re-trigger "remove" event via automatic cleanup. + */ + if (action == KOBJ_REMOVE) + kobj->state_remove_uevent_sent = 1; + pr_debug("kobject: '%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); @@ -567,10 +574,6 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, kobj->state_add_uevent_sent = 1; break; - case KOBJ_REMOVE: - kobj->state_remove_uevent_sent = 1; - break; - case KOBJ_UNBIND: zap_modalias_env(env); break; -- cgit From e83b9f55448afce3fe1abcd1d10db9584f8042a6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 2 Apr 2019 09:49:50 -0700 Subject: kbuild: add ability to generate BTF type info for vmlinux This patch adds new config option to trigger generation of BTF type information from DWARF debuginfo for vmlinux and kernel modules through pahole, which in turn relies on libbpf for btf_dedup() algorithm. The intent is to record compact type information of all types used inside kernel, including all the structs/unions/typedefs/etc. This enables BPF's compile-once-run-everywhere ([0]) approach, in which tracing programs that are inspecting kernel's internal data (e.g., struct task_struct) can be compiled on a system running some kernel version, but would be possible to run on other kernel versions (and configurations) without recompilation, even if the layout of structs changed and/or some of the fields were added, removed, or renamed. This is only possible if BPF loader can get kernel type info to adjust all the offsets correctly. This patch is a first time in this direction, making sure that BTF type info is part of Linux kernel image in non-loadable ELF section. BTF deduplication ([1]) algorithm typically provides 100x savings compared to DWARF data, so resulting .BTF section is not big as is typically about 2MB in size. [0] http://vger.kernel.org/lpc-bpf2018.html#session-2 [1] https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html Cc: Masahiro Yamada Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alexei Starovoitov Cc: Yonghong Song Cc: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Acked-by: David S. Miller Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- lib/Kconfig.debug | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0d9e81779e37..188fc17c2202 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -219,6 +219,14 @@ config DEBUG_INFO_DWARF4 But it significantly improves the success of resolving variables in gdb on optimized code. +config DEBUG_INFO_BTF + bool "Generate BTF typeinfo" + depends on DEBUG_INFO + help + Generate deduplicated BTF type information from DWARF debug info. + Turning this on expects presence of pahole tool, which will convert + DWARF type info into equivalent deduplicated BTF type info. + config GDB_SCRIPTS bool "Provide GDB scripts for kernel debugging" depends on DEBUG_INFO -- cgit From 72deb455b5ec619ff043c30bc90025aa3de3cdda Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Apr 2019 18:08:59 +0200 Subject: block: remove CONFIG_LBDAF Currently support for 64-bit sector_t and blkcnt_t is optional on 32-bit architectures. These types are required to support block device and/or file sizes larger than 2 TiB, and have generally defaulted to on for a long time. Enabling the option only increases the i386 tinyconfig size by 145 bytes, and many data structures already always use 64-bit values for their in-core and on-disk data structures anyway, so there should not be a large change in dynamic memory usage either. Dropping this option removes a somewhat weird non-default config that has cause various bugs or compiler warnings when actually used. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- lib/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0d9e81779e37..d8781786cf63 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1927,7 +1927,6 @@ config TEST_STATIC_KEYS config TEST_KMOD tristate "kmod stress tester" depends on m - depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS depends on NETDEVICES && NET_CORE && INET # for TUN select TEST_LKM select XFS_FS -- cgit From 7a41c294c1463100fdc82a356e22e36bbaa6b0f9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 2 Apr 2019 10:07:45 +1100 Subject: rhashtable: use cmpxchg() in nested_table_alloc() nested_table_alloc() relies on the fact that there is at most one spinlock allocated for every slot in the top level nested table, so it is not possible for two threads to try to allocate the same table at the same time. This assumption is a little fragile (it is not explicit) and is unnecessary as cmpxchg() can be used instead. A future patch will replace the spinlocks by per-bucket bitlocks, and then we won't be able to protect the slot pointer with a spinlock. So replace rcu_assign_pointer() with cmpxchg() - which has equivalent barrier properties. If it the cmp fails, free the table that was just allocated. Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 811d51b7cb86..6c4f5c8e9baa 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -131,9 +131,11 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht, INIT_RHT_NULLS_HEAD(ntbl[i].bucket); } - rcu_assign_pointer(*prev, ntbl); - - return ntbl; + if (cmpxchg(prev, NULL, ntbl) == NULL) + return ntbl; + /* Raced with another thread. */ + kfree(ntbl); + return rcu_dereference(*prev); } static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, -- cgit From ff302db965b57c141297911ea647d36d11fedfbe Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 2 Apr 2019 10:07:45 +1100 Subject: rhashtable: allow rht_bucket_var to return NULL. Rather than returning a pointer to a static nulls, rht_bucket_var() now returns NULL if the bucket doesn't exist. This will make the next patch, which stores a bitlock in the bucket pointer, somewhat cleaner. This change involves introducing __rht_bucket_nested() which is like rht_bucket_nested(), but doesn't provide the static nulls, and changing rht_bucket_nested() to call this and possible provide a static nulls - as is still needed for the non-var case. Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 6c4f5c8e9baa..b28fdd560ea9 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -237,8 +237,10 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) goto out; err = -ENOENT; + if (!pprev) + goto out; - rht_for_each(entry, old_tbl, old_hash) { + rht_for_each_from(entry, *pprev, old_tbl, old_hash) { err = 0; next = rht_dereference_bucket(entry->next, old_tbl, old_hash); @@ -496,6 +498,8 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, elasticity = RHT_ELASTICITY; pprev = rht_bucket_var(tbl, hash); + if (!pprev) + return ERR_PTR(-ENOENT); rht_for_each_from(head, *pprev, tbl, hash) { struct rhlist_head *list; struct rhlist_head *plist; @@ -1161,11 +1165,10 @@ void rhashtable_destroy(struct rhashtable *ht) } EXPORT_SYMBOL_GPL(rhashtable_destroy); -struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash) +struct rhash_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); - static struct rhash_head __rcu *rhnull; unsigned int index = hash & ((1 << tbl->nest) - 1); unsigned int size = tbl->size >> tbl->nest; unsigned int subhash = hash; @@ -1183,15 +1186,23 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, subhash >>= shift; } - if (!ntbl) { - if (!rhnull) - INIT_RHT_NULLS_HEAD(rhnull); - return &rhnull; - } + if (!ntbl) + return NULL; return &ntbl[subhash].bucket; } +EXPORT_SYMBOL_GPL(__rht_bucket_nested); + +struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash) +{ + static struct rhash_head __rcu *rhnull; + + if (!rhnull) + INIT_RHT_NULLS_HEAD(rhnull); + return __rht_bucket_nested(tbl, hash) ?: &rhnull; +} EXPORT_SYMBOL_GPL(rht_bucket_nested); struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, -- cgit From 8f0db018006a421956965e1149234c4e8db718ee Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 2 Apr 2019 10:07:45 +1100 Subject: rhashtable: use bit_spin_locks to protect hash bucket. This patch changes rhashtables to use a bit_spin_lock on BIT(1) of the bucket pointer to lock the hash chain for that bucket. The benefits of a bit spin_lock are: - no need to allocate a separate array of locks. - no need to have a configuration option to guide the choice of the size of this array - locking cost is often a single test-and-set in a cache line that will have to be loaded anyway. When inserting at, or removing from, the head of the chain, the unlock is free - writing the new address in the bucket head implicitly clears the lock bit. For __rhashtable_insert_fast() we ensure this always happens when adding a new key. - even when lockings costs 2 updates (lock and unlock), they are in a cacheline that needs to be read anyway. The cost of using a bit spin_lock is a little bit of code complexity, which I think is quite manageable. Bit spin_locks are sometimes inappropriate because they are not fair - if multiple CPUs repeatedly contend of the same lock, one CPU can easily be starved. This is not a credible situation with rhashtable. Multiple CPUs may want to repeatedly add or remove objects, but they will typically do so at different buckets, so they will attempt to acquire different locks. As we have more bit-locks than we previously had spinlocks (by at least a factor of two) we can expect slightly less contention to go with the slightly better cache behavior and reduced memory consumption. To enhance type checking, a new struct is introduced to represent the pointer plus lock-bit that is stored in the bucket-table. This is "struct rhash_lock_head" and is empty. A pointer to this needs to be cast to either an unsigned lock, or a "struct rhash_head *" to be useful. Variables of this type are most often called "bkt". Previously "pprev" would sometimes point to a bucket, and sometimes a ->next pointer in an rhash_head. As these are now different types, pprev is NULL when it would have pointed to the bucket. In that case, 'blk' is used, together with correct locking protocol. Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 141 +++++++++++++++++++++++++------------------------- lib/test_rhashtable.c | 2 +- 2 files changed, 71 insertions(+), 72 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index b28fdd560ea9..c5d0974467ee 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -31,11 +31,10 @@ #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U -#define BUCKET_LOCKS_PER_CPU 32UL union nested_table { union nested_table __rcu *table; - struct rhash_head __rcu *bucket; + struct rhash_lock_head __rcu *bucket; }; static u32 head_hashfn(struct rhashtable *ht, @@ -56,9 +55,11 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) { - spinlock_t *lock = rht_bucket_lock(tbl, hash); - - return (debug_locks) ? lockdep_is_held(lock) : 1; + if (!debug_locks) + return 1; + if (unlikely(tbl->nest)) + return 1; + return bit_spin_is_locked(1, (unsigned long *)&tbl->buckets[hash]); } EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); #else @@ -104,7 +105,6 @@ static void bucket_table_free(const struct bucket_table *tbl) if (tbl->nest) nested_bucket_table_free(tbl); - free_bucket_spinlocks(tbl->locks); kvfree(tbl); } @@ -171,7 +171,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, gfp_t gfp) { struct bucket_table *tbl = NULL; - size_t size, max_locks; + size_t size; int i; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); @@ -189,16 +189,6 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, tbl->size = size; - max_locks = size >> 1; - if (tbl->nest) - max_locks = min_t(size_t, max_locks, 1U << tbl->nest); - - if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks, - ht->p.locks_mul, gfp) < 0) { - bucket_table_free(tbl); - return NULL; - } - rcu_head_init(&tbl->rcu); INIT_LIST_HEAD(&tbl->walkers); @@ -223,24 +213,23 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, return new_tbl; } -static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) +static int rhashtable_rehash_one(struct rhashtable *ht, + struct rhash_lock_head __rcu **bkt, + unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl); - struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash); int err = -EAGAIN; struct rhash_head *head, *next, *entry; - spinlock_t *new_bucket_lock; + struct rhash_head **pprev = NULL; unsigned int new_hash; if (new_tbl->nest) goto out; err = -ENOENT; - if (!pprev) - goto out; - rht_for_each_from(entry, *pprev, old_tbl, old_hash) { + rht_for_each_from(entry, rht_ptr(*bkt), old_tbl, old_hash) { err = 0; next = rht_dereference_bucket(entry->next, old_tbl, old_hash); @@ -255,18 +244,20 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) new_hash = head_hashfn(ht, new_tbl, entry); - new_bucket_lock = rht_bucket_lock(new_tbl, new_hash); + rht_lock(&new_tbl->buckets[new_hash]); - spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING); - head = rht_dereference_bucket(new_tbl->buckets[new_hash], - new_tbl, new_hash); + head = rht_ptr(rht_dereference_bucket(new_tbl->buckets[new_hash], + new_tbl, new_hash)); RCU_INIT_POINTER(entry->next, head); - rcu_assign_pointer(new_tbl->buckets[new_hash], entry); - spin_unlock(new_bucket_lock); + rht_assign_unlock(&new_tbl->buckets[new_hash], entry); - rcu_assign_pointer(*pprev, next); + if (pprev) + rcu_assign_pointer(*pprev, next); + else + /* Need to preserved the bit lock. */ + rcu_assign_pointer(*bkt, rht_ptr_locked(next)); out: return err; @@ -276,19 +267,19 @@ static int rhashtable_rehash_chain(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - spinlock_t *old_bucket_lock; + struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash); int err; - old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); + if (!bkt) + return 0; + rht_lock(bkt); - spin_lock_bh(old_bucket_lock); - while (!(err = rhashtable_rehash_one(ht, old_hash))) + while (!(err = rhashtable_rehash_one(ht, bkt, old_hash))) ; if (err == -ENOENT) err = 0; - - spin_unlock_bh(old_bucket_lock); + rht_unlock(bkt); return err; } @@ -485,6 +476,7 @@ fail: } static void *rhashtable_lookup_one(struct rhashtable *ht, + struct rhash_lock_head __rcu **bkt, struct bucket_table *tbl, unsigned int hash, const void *key, struct rhash_head *obj) { @@ -492,15 +484,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, .ht = ht, .key = key, }; - struct rhash_head __rcu **pprev; + struct rhash_head **pprev = NULL; struct rhash_head *head; int elasticity; elasticity = RHT_ELASTICITY; - pprev = rht_bucket_var(tbl, hash); - if (!pprev) - return ERR_PTR(-ENOENT); - rht_for_each_from(head, *pprev, tbl, hash) { + rht_for_each_from(head, rht_ptr(*bkt), tbl, hash) { struct rhlist_head *list; struct rhlist_head *plist; @@ -522,7 +511,11 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, RCU_INIT_POINTER(list->next, plist); head = rht_dereference_bucket(head->next, tbl, hash); RCU_INIT_POINTER(list->rhead.next, head); - rcu_assign_pointer(*pprev, obj); + if (pprev) + rcu_assign_pointer(*pprev, obj); + else + /* Need to preserve the bit lock */ + rcu_assign_pointer(*bkt, rht_ptr_locked(obj)); return NULL; } @@ -534,12 +527,12 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, } static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, + struct rhash_lock_head __rcu **bkt, struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj, void *data) { - struct rhash_head __rcu **pprev; struct bucket_table *new_tbl; struct rhash_head *head; @@ -562,11 +555,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, if (unlikely(rht_grow_above_100(ht, tbl))) return ERR_PTR(-EAGAIN); - pprev = rht_bucket_insert(ht, tbl, hash); - if (!pprev) - return ERR_PTR(-ENOMEM); - - head = rht_dereference_bucket(*pprev, tbl, hash); + head = rht_ptr(rht_dereference_bucket(*bkt, tbl, hash)); RCU_INIT_POINTER(obj->next, head); if (ht->rhlist) { @@ -576,7 +565,10 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(*pprev, obj); + /* bkt is always the head of the list, so it holds + * the lock, which we need to preserve + */ + rcu_assign_pointer(*bkt, rht_ptr_locked(obj)); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) @@ -590,6 +582,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, { struct bucket_table *new_tbl; struct bucket_table *tbl; + struct rhash_lock_head __rcu **bkt; unsigned int hash; void *data; @@ -598,14 +591,25 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, do { tbl = new_tbl; hash = rht_head_hashfn(ht, tbl, obj, ht->p); - spin_lock_bh(rht_bucket_lock(tbl, hash)); - - data = rhashtable_lookup_one(ht, tbl, hash, key, obj); - new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); - if (PTR_ERR(new_tbl) != -EEXIST) - data = ERR_CAST(new_tbl); - - spin_unlock_bh(rht_bucket_lock(tbl, hash)); + if (rcu_access_pointer(tbl->future_tbl)) + /* Failure is OK */ + bkt = rht_bucket_var(tbl, hash); + else + bkt = rht_bucket_insert(ht, tbl, hash); + if (bkt == NULL) { + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); + data = ERR_PTR(-EAGAIN); + } else { + rht_lock(bkt); + data = rhashtable_lookup_one(ht, bkt, tbl, + hash, key, obj); + new_tbl = rhashtable_insert_one(ht, bkt, tbl, + hash, obj, data); + if (PTR_ERR(new_tbl) != -EEXIST) + data = ERR_CAST(new_tbl); + + rht_unlock(bkt); + } } while (!IS_ERR_OR_NULL(new_tbl)); if (PTR_ERR(data) == -EAGAIN) @@ -1032,11 +1036,6 @@ int rhashtable_init(struct rhashtable *ht, size = rounded_hashtable_size(&ht->p); - if (params->locks_mul) - ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); - else - ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; - ht->key_len = ht->p.key_len; if (!params->hashfn) { ht->p.hashfn = jhash; @@ -1138,7 +1137,7 @@ restart: struct rhash_head *pos, *next; cond_resched(); - for (pos = rht_dereference(*rht_bucket(tbl, i), ht), + for (pos = rht_ptr(rht_dereference(*rht_bucket(tbl, i), ht)), next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; !rht_is_a_nulls(pos); @@ -1165,8 +1164,8 @@ void rhashtable_destroy(struct rhashtable *ht) } EXPORT_SYMBOL_GPL(rhashtable_destroy); -struct rhash_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash) +struct rhash_lock_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); unsigned int index = hash & ((1 << tbl->nest) - 1); @@ -1194,10 +1193,10 @@ struct rhash_head __rcu **__rht_bucket_nested(const struct bucket_table *tbl, } EXPORT_SYMBOL_GPL(__rht_bucket_nested); -struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash) +struct rhash_lock_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash) { - static struct rhash_head __rcu *rhnull; + static struct rhash_lock_head __rcu *rhnull; if (!rhnull) INIT_RHT_NULLS_HEAD(rhnull); @@ -1205,9 +1204,9 @@ struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, } EXPORT_SYMBOL_GPL(rht_bucket_nested); -struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash) +struct rhash_lock_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); unsigned int index = hash & ((1 << tbl->nest) - 1); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 3bd2e91bfc29..02592c2a249c 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -500,7 +500,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt) struct rhash_head *pos, *next; struct test_obj_rhl *p; - pos = rht_dereference(tbl->buckets[i], ht); + pos = rht_ptr(rht_dereference(tbl->buckets[i], ht)); next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; if (!rht_is_a_nulls(pos)) { -- cgit From 149212f07856b25a9d342bfd6d736519b2ef66dc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 2 Apr 2019 10:07:45 +1100 Subject: rhashtable: add lockdep tracking to bucket bit-spin-locks. Native bit_spin_locks are not tracked by lockdep. The bit_spin_locks used for rhashtable buckets are local to the rhashtable implementation, so there is little opportunity for the sort of misuse that lockdep might detect. However locks are held while a hash function or compare function is called, and if one of these took a lock, a misbehaviour is possible. As it is quite easy to add lockdep support this unlikely possibility seems to be enough justification. So create a lockdep class for bucket bit_spin_lock and attach through a lockdep_map in each bucket_table. Without the 'nested' annotation in rhashtable_rehash_one(), lockdep correctly reports a possible problem as this lock is taken while another bucket lock (in another table) is held. This confirms that the added support works. With the correct nested annotation in place, lockdep reports no problems. Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index c5d0974467ee..a8583af43b59 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -173,6 +173,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, struct bucket_table *tbl = NULL; size_t size; int i; + static struct lock_class_key __key; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); tbl = kvzalloc(size, gfp); @@ -187,6 +188,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, if (tbl == NULL) return NULL; + lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0); + tbl->size = size; rcu_head_init(&tbl->rcu); @@ -244,14 +247,14 @@ static int rhashtable_rehash_one(struct rhashtable *ht, new_hash = head_hashfn(ht, new_tbl, entry); - rht_lock(&new_tbl->buckets[new_hash]); + rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING); head = rht_ptr(rht_dereference_bucket(new_tbl->buckets[new_hash], new_tbl, new_hash)); RCU_INIT_POINTER(entry->next, head); - rht_assign_unlock(&new_tbl->buckets[new_hash], entry); + rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry); if (pprev) rcu_assign_pointer(*pprev, next); @@ -272,14 +275,14 @@ static int rhashtable_rehash_chain(struct rhashtable *ht, if (!bkt) return 0; - rht_lock(bkt); + rht_lock(old_tbl, bkt); while (!(err = rhashtable_rehash_one(ht, bkt, old_hash))) ; if (err == -ENOENT) err = 0; - rht_unlock(bkt); + rht_unlock(old_tbl, bkt); return err; } @@ -600,7 +603,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); data = ERR_PTR(-EAGAIN); } else { - rht_lock(bkt); + rht_lock(tbl, bkt); data = rhashtable_lookup_one(ht, bkt, tbl, hash, key, obj); new_tbl = rhashtable_insert_one(ht, bkt, tbl, @@ -608,7 +611,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, if (PTR_ERR(new_tbl) != -EEXIST) data = ERR_CAST(new_tbl); - rht_unlock(bkt); + rht_unlock(tbl, bkt); } } while (!IS_ERR_OR_NULL(new_tbl)); -- cgit From 6989808ee7636188cc091224bc76ab8e1696088a Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 5 Apr 2019 12:58:54 +1100 Subject: lib/test_printf: Add empty module_exit function Currently the test_printf module does not have an exit function, this prevents the module from being unloaded. If we cannot unload the module we cannot run the tests a second time. Add an empty exit function. Acked-by: Kees Cook Signed-off-by: Tobin C. Harding Signed-off-by: Shuah Khan --- lib/test_printf.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index 659b6cc0d483..601e8519319a 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -615,5 +615,11 @@ test_printf_init(void) module_init(test_printf_init); +static void __exit test_printf_exit(void) +{ +} + +module_exit(test_printf_exit); + MODULE_AUTHOR("Rasmus Villemoes "); MODULE_LICENSE("GPL"); -- cgit From 6b1a4d5b1a26ae830d50e08d7b3ca0e8b3e6b453 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 5 Apr 2019 12:58:57 +1100 Subject: lib: Use new kselftest header We just added a new C header file for use with test modules that are intended to be run with kselftest. We can reduce code duplication by using this header. Use new kselftest header to reduce code duplication in test_printf and test_bitmap test modules. Acked-by: Kees Cook Signed-off-by: Tobin C. Harding Signed-off-by: Shuah Khan --- lib/test_bitmap.c | 20 ++++---------------- lib/test_printf.c | 23 +++++------------------ 2 files changed, 9 insertions(+), 34 deletions(-) (limited to 'lib') diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 6cd7d0740005..792d90608052 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -12,6 +12,8 @@ #include #include +#include "../tools/testing/selftests/kselftest_module.h" + static unsigned total_tests __initdata; static unsigned failed_tests __initdata; @@ -361,7 +363,7 @@ static void noinline __init test_mem_optimisations(void) } } -static int __init test_bitmap_init(void) +static void __init selftest(void) { test_zero_clear(); test_fill_set(); @@ -369,22 +371,8 @@ static int __init test_bitmap_init(void) test_bitmap_arr32(); test_bitmap_parselist(); test_mem_optimisations(); - - if (failed_tests == 0) - pr_info("all %u tests passed\n", total_tests); - else - pr_warn("failed %u out of %u tests\n", - failed_tests, total_tests); - - return failed_tests ? -EINVAL : 0; } -static void __exit test_bitmap_cleanup(void) -{ -} - -module_init(test_bitmap_init); -module_exit(test_bitmap_cleanup); - +KSTM_MODULE_LOADERS(test_bitmap); MODULE_AUTHOR("david decotigny "); MODULE_LICENSE("GPL"); diff --git a/lib/test_printf.c b/lib/test_printf.c index 601e8519319a..f4fcc1c43739 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -21,6 +21,8 @@ #include #include +#include "../tools/testing/selftests/kselftest_module.h" + #define BUF_SIZE 256 #define PAD_SIZE 16 #define FILL_CHAR '$' @@ -590,12 +592,11 @@ test_pointer(void) flags(); } -static int __init -test_printf_init(void) +static void __init selftest(void) { alloced_buffer = kmalloc(BUF_SIZE + 2*PAD_SIZE, GFP_KERNEL); if (!alloced_buffer) - return -ENOMEM; + return; test_buffer = alloced_buffer + PAD_SIZE; test_basic(); @@ -604,22 +605,8 @@ test_printf_init(void) test_pointer(); kfree(alloced_buffer); - - if (failed_tests == 0) - pr_info("all %u tests passed\n", total_tests); - else - pr_warn("failed %u out of %u tests\n", failed_tests, total_tests); - - return failed_tests ? -EINVAL : 0; } -module_init(test_printf_init); - -static void __exit test_printf_exit(void) -{ -} - -module_exit(test_printf_exit); - +KSTM_MODULE_LOADERS(test_printf); MODULE_AUTHOR("Rasmus Villemoes "); MODULE_LICENSE("GPL"); -- cgit From 458a3bf82df4fe1f951d0f52b1e0c1e9d5a88a3b Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 5 Apr 2019 12:58:58 +1100 Subject: lib/string: Add strscpy_pad() function We have a function to copy strings safely and we have a function to copy strings and zero the tail of the destination (if source string is shorter than destination buffer) but we do not have a function to do both at once. This means developers must write this themselves if they desire this functionality. This is a chore, and also leaves us open to off by one errors unnecessarily. Add a function that calls strscpy() then memset()s the tail to zero if the source string is shorter than the destination buffer. Acked-by: Kees Cook Signed-off-by: Tobin C. Harding Signed-off-by: Shuah Khan --- lib/string.c | 47 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/string.c b/lib/string.c index 3ab861c1a857..6016eb3ac73d 100644 --- a/lib/string.c +++ b/lib/string.c @@ -159,11 +159,9 @@ EXPORT_SYMBOL(strlcpy); * @src: Where to copy the string from * @count: Size of destination buffer * - * Copy the string, or as much of it as fits, into the dest buffer. - * The routine returns the number of characters copied (not including - * the trailing NUL) or -E2BIG if the destination buffer wasn't big enough. - * The behavior is undefined if the string buffers overlap. - * The destination buffer is always NUL terminated, unless it's zero-sized. + * Copy the string, or as much of it as fits, into the dest buffer. The + * behavior is undefined if the string buffers overlap. The destination + * buffer is always NUL terminated, unless it's zero-sized. * * Preferred to strlcpy() since the API doesn't require reading memory * from the src string beyond the specified "count" bytes, and since @@ -173,8 +171,10 @@ EXPORT_SYMBOL(strlcpy); * * Preferred to strncpy() since it always returns a valid string, and * doesn't unnecessarily force the tail of the destination buffer to be - * zeroed. If the zeroing is desired, it's likely cleaner to use strscpy() - * with an overflow test, then just memset() the tail of the dest buffer. + * zeroed. If zeroing is desired please use strscpy_pad(). + * + * Return: The number of characters copied (not including the trailing + * %NUL) or -E2BIG if the destination buffer wasn't big enough. */ ssize_t strscpy(char *dest, const char *src, size_t count) { @@ -237,6 +237,39 @@ ssize_t strscpy(char *dest, const char *src, size_t count) EXPORT_SYMBOL(strscpy); #endif +/** + * strscpy_pad() - Copy a C-string into a sized buffer + * @dest: Where to copy the string to + * @src: Where to copy the string from + * @count: Size of destination buffer + * + * Copy the string, or as much of it as fits, into the dest buffer. The + * behavior is undefined if the string buffers overlap. The destination + * buffer is always %NUL terminated, unless it's zero-sized. + * + * If the source string is shorter than the destination buffer, zeros + * the tail of the destination buffer. + * + * For full explanation of why you may want to consider using the + * 'strscpy' functions please see the function docstring for strscpy(). + * + * Return: The number of characters copied (not including the trailing + * %NUL) or -E2BIG if the destination buffer wasn't big enough. + */ +ssize_t strscpy_pad(char *dest, const char *src, size_t count) +{ + ssize_t written; + + written = strscpy(dest, src, count); + if (written < 0 || written == count - 1) + return written; + + memset(dest + written + 1, 0, count - written - 1); + + return written; +} +EXPORT_SYMBOL(strscpy_pad); + #ifndef __HAVE_ARCH_STRCAT /** * strcat - Append one %NUL-terminated string to another -- cgit From 0b0600c8c97abe070724140802f3b8c8aee93170 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 5 Apr 2019 12:58:59 +1100 Subject: lib: Add test module for strscpy_pad Add a test module for the new strscpy_pad() function. Tie it into the kselftest infrastructure for lib/ tests. Acked-by: Kees Cook Signed-off-by: Tobin C. Harding Signed-off-by: Shuah Khan --- lib/Kconfig.debug | 3 ++ lib/Makefile | 1 + lib/test_strscpy.c | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+) create mode 100644 lib/test_strscpy.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0d9e81779e37..4b644ad399dd 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1769,6 +1769,9 @@ config TEST_HEXDUMP config TEST_STRING_HELPERS tristate "Test functions located in the string_helpers module at runtime" +config TEST_STRSCPY + tristate "Test strscpy*() family of functions at runtime" + config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" diff --git a/lib/Makefile b/lib/Makefile index 3b08673e8881..b4e08d6234ba 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o +obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_XARRAY) += test_xarray.o diff --git a/lib/test_strscpy.c b/lib/test_strscpy.c new file mode 100644 index 000000000000..a827f94601f5 --- /dev/null +++ b/lib/test_strscpy.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include + +#include "../tools/testing/selftests/kselftest_module.h" + +/* + * Kernel module for testing 'strscpy' family of functions. + */ + +KSTM_MODULE_GLOBALS(); + +/* + * tc() - Run a specific test case. + * @src: Source string, argument to strscpy_pad() + * @count: Size of destination buffer, argument to strscpy_pad() + * @expected: Expected return value from call to strscpy_pad() + * @terminator: 1 if there should be a terminating null byte 0 otherwise. + * @chars: Number of characters from the src string expected to be + * written to the dst buffer. + * @pad: Number of pad characters expected (in the tail of dst buffer). + * (@pad does not include the null terminator byte.) + * + * Calls strscpy_pad() and verifies the return value and state of the + * destination buffer after the call returns. + */ +static int __init tc(char *src, int count, int expected, + int chars, int terminator, int pad) +{ + int nr_bytes_poison; + int max_expected; + int max_count; + int written; + char buf[6]; + int index, i; + const char POISON = 'z'; + + total_tests++; + + if (!src) { + pr_err("null source string not supported\n"); + return -1; + } + + memset(buf, POISON, sizeof(buf)); + /* Future proofing test suite, validate args */ + max_count = sizeof(buf) - 2; /* Space for null and to verify overflow */ + max_expected = count - 1; /* Space for the null */ + if (count > max_count) { + pr_err("count (%d) is too big (%d) ... aborting", count, max_count); + return -1; + } + if (expected > max_expected) { + pr_warn("expected (%d) is bigger than can possibly be returned (%d)", + expected, max_expected); + } + + written = strscpy_pad(buf, src, count); + if ((written) != (expected)) { + pr_err("%d != %d (written, expected)\n", written, expected); + goto fail; + } + + if (count && written == -E2BIG) { + if (strncmp(buf, src, count - 1) != 0) { + pr_err("buffer state invalid for -E2BIG\n"); + goto fail; + } + if (buf[count - 1] != '\0') { + pr_err("too big string is not null terminated correctly\n"); + goto fail; + } + } + + for (i = 0; i < chars; i++) { + if (buf[i] != src[i]) { + pr_err("buf[i]==%c != src[i]==%c\n", buf[i], src[i]); + goto fail; + } + } + + if (terminator) { + if (buf[count - 1] != '\0') { + pr_err("string is not null terminated correctly\n"); + goto fail; + } + } + + for (i = 0; i < pad; i++) { + index = chars + terminator + i; + if (buf[index] != '\0') { + pr_err("padding missing at index: %d\n", i); + goto fail; + } + } + + nr_bytes_poison = sizeof(buf) - chars - terminator - pad; + for (i = 0; i < nr_bytes_poison; i++) { + index = sizeof(buf) - 1 - i; /* Check from the end back */ + if (buf[index] != POISON) { + pr_err("poison value missing at index: %d\n", i); + goto fail; + } + } + + return 0; +fail: + failed_tests++; + return -1; +} + +static void __init selftest(void) +{ + /* + * tc() uses a destination buffer of size 6 and needs at + * least 2 characters spare (one for null and one to check for + * overflow). This means we should only call tc() with + * strings up to a maximum of 4 characters long and 'count' + * should not exceed 4. To test with longer strings increase + * the buffer size in tc(). + */ + + /* tc(src, count, expected, chars, terminator, pad) */ + KSTM_CHECK_ZERO(tc("a", 0, -E2BIG, 0, 0, 0)); + KSTM_CHECK_ZERO(tc("", 0, -E2BIG, 0, 0, 0)); + + KSTM_CHECK_ZERO(tc("a", 1, -E2BIG, 0, 1, 0)); + KSTM_CHECK_ZERO(tc("", 1, 0, 0, 1, 0)); + + KSTM_CHECK_ZERO(tc("ab", 2, -E2BIG, 1, 1, 0)); + KSTM_CHECK_ZERO(tc("a", 2, 1, 1, 1, 0)); + KSTM_CHECK_ZERO(tc("", 2, 0, 0, 1, 1)); + + KSTM_CHECK_ZERO(tc("abc", 3, -E2BIG, 2, 1, 0)); + KSTM_CHECK_ZERO(tc("ab", 3, 2, 2, 1, 0)); + KSTM_CHECK_ZERO(tc("a", 3, 1, 1, 1, 1)); + KSTM_CHECK_ZERO(tc("", 3, 0, 0, 1, 2)); + + KSTM_CHECK_ZERO(tc("abcd", 4, -E2BIG, 3, 1, 0)); + KSTM_CHECK_ZERO(tc("abc", 4, 3, 3, 1, 0)); + KSTM_CHECK_ZERO(tc("ab", 4, 2, 2, 1, 1)); + KSTM_CHECK_ZERO(tc("a", 4, 1, 1, 1, 2)); + KSTM_CHECK_ZERO(tc("", 4, 0, 0, 1, 3)); +} + +KSTM_MODULE_LOADERS(test_strscpy); +MODULE_AUTHOR("Tobin C. Harding "); +MODULE_LICENSE("GPL"); -- cgit From 224b44d46ffe9ad7785cc45c7a18934d492e66ec Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 24 Jan 2019 21:37:43 -0600 Subject: lib: zstd: Mark expected switch fall-throughs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warnings: lib/zstd/bitstream.h:261:30: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/zstd/bitstream.h:262:30: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/zstd/bitstream.h:263:30: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/zstd/bitstream.h:264:30: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/zstd/bitstream.h:265:30: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/zstd/compress.c:3183:16: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/zstd/decompress.c:1770:18: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/zstd/decompress.c:2376:15: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/zstd/decompress.c:2404:15: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/zstd/decompress.c:2435:16: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/zstd/huf_compress.c: In function ‘HUF_compress1X_usingCTable’: lib/zstd/huf_compress.c:535:5: warning: this statement may fall through [-Wimplicit-fallthrough=] if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 4 + 7) \ ^ lib/zstd/huf_compress.c:558:54: note: in expansion of macro ‘HUF_FLUSHBITS_2’ case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC); ^~~~~~~~~~~~~~~ lib/zstd/huf_compress.c:559:2: note: here case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC); ^~~~ lib/zstd/huf_compress.c:531:5: warning: this statement may fall through [-Wimplicit-fallthrough=] if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 2 + 7) \ ^ lib/zstd/huf_compress.c:559:54: note: in expansion of macro ‘HUF_FLUSHBITS_1’ case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC); ^~~~~~~~~~~~~~~ lib/zstd/huf_compress.c:560:2: note: here case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC); ^~~~ AR lib/zstd//built-in.a Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enabling -Wimplicit-fallthrough. Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- lib/zstd/bitstream.h | 5 +++++ lib/zstd/compress.c | 1 + lib/zstd/decompress.c | 5 ++++- lib/zstd/huf_compress.c | 2 ++ 4 files changed, 12 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/zstd/bitstream.h b/lib/zstd/bitstream.h index a826b99e1d63..3a49784d5c61 100644 --- a/lib/zstd/bitstream.h +++ b/lib/zstd/bitstream.h @@ -259,10 +259,15 @@ ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, s bitD->bitContainer = *(const BYTE *)(bitD->start); switch (srcSize) { case 7: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[6]) << (sizeof(bitD->bitContainer) * 8 - 16); + /* fall through */ case 6: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[5]) << (sizeof(bitD->bitContainer) * 8 - 24); + /* fall through */ case 5: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[4]) << (sizeof(bitD->bitContainer) * 8 - 32); + /* fall through */ case 4: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[3]) << 24; + /* fall through */ case 3: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[2]) << 16; + /* fall through */ case 2: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[1]) << 8; default:; } diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c index f9166cf4f7a9..5e0b67003e55 100644 --- a/lib/zstd/compress.c +++ b/lib/zstd/compress.c @@ -3182,6 +3182,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream *zcs, void *dst, size_t * zcs->outBuffFlushedSize = 0; zcs->stage = zcss_flush; /* pass-through to flush stage */ } + /* fall through */ case zcss_flush: { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c index b17846725ca0..269ee9a796c1 100644 --- a/lib/zstd/decompress.c +++ b/lib/zstd/decompress.c @@ -1768,6 +1768,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, c return 0; } dctx->expected = 0; /* not necessary to copy more */ + /* fall through */ case ZSTDds_decodeFrameHeader: memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); @@ -2375,7 +2376,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB } zds->stage = zdss_read; } - /* pass-through */ + /* fall through */ case zdss_read: { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); @@ -2404,6 +2405,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB zds->stage = zdss_load; /* pass-through */ } + /* fall through */ case zdss_load: { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); @@ -2436,6 +2438,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB /* pass-through */ } } + /* fall through */ case zdss_flush: { size_t const toFlushSize = zds->outEnd - zds->outStart; diff --git a/lib/zstd/huf_compress.c b/lib/zstd/huf_compress.c index 40055a7016e6..e727812d12aa 100644 --- a/lib/zstd/huf_compress.c +++ b/lib/zstd/huf_compress.c @@ -556,7 +556,9 @@ size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, si n = srcSize & ~3; /* join to mod 4 */ switch (srcSize & 3) { case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC); + /* fall through */ case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC); + /* fall through */ case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC); case 0: default:; -- cgit From 8a05452ca460b05c985eadc7b5a4f040f124463e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 25 Jan 2019 14:25:24 -0600 Subject: lib/cmdline.c: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warnings: lib/cmdline.c:137:7: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/cmdline.c:140:7: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/cmdline.c:143:7: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/cmdline.c:146:7: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/cmdline.c:149:7: warning: this statement may fall through [-Wimplicit-fallthrough=] Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enabling -Wimplicit-fallthrough. Acked-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- lib/cmdline.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/cmdline.c b/lib/cmdline.c index 171c19b6888e..dc59d6216318 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -135,18 +135,23 @@ unsigned long long memparse(const char *ptr, char **retptr) case 'E': case 'e': ret <<= 10; + /* fall through */ case 'P': case 'p': ret <<= 10; + /* fall through */ case 'T': case 't': ret <<= 10; + /* fall through */ case 'G': case 'g': ret <<= 10; + /* fall through */ case 'M': case 'm': ret <<= 10; + /* fall through */ case 'K': case 'k': ret <<= 10; -- cgit From afb33e40d54e365457a4fb5eada6df55cd11a9cf Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 25 Jan 2019 14:46:46 -0600 Subject: ASN.1: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warnings: lib/asn1_decoder.c:386:6: warning: this statement may fall through [-Wimplicit-fallthrough=] lib/asn1_decoder.c:449:6: warning: this statement may fall through [-Wimplicit-fallthrough=] Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enabling -Wimplicit-fallthrough. Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- lib/asn1_decoder.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index dc14beae2c9a..8f3d207d2b00 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -385,6 +385,8 @@ next_op: case ASN1_OP_END_SET_ACT: if (unlikely(!(flags & FLAG_MATCHED))) goto tag_mismatch; + /* fall through */ + case ASN1_OP_END_SEQ: case ASN1_OP_END_SET_OF: case ASN1_OP_END_SEQ_OF: @@ -450,6 +452,8 @@ next_op: pc += asn1_op_lengths[op]; goto next_op; } + /* fall through */ + case ASN1_OP_ACT: ret = actions[machine[pc + 1]](context, hdr, tag, data + tdp, len); if (ret < 0) -- cgit From d75f773c86a2b8b7278e2c33343b46a4024bc002 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 25 Mar 2019 21:32:28 +0200 Subject: treewide: Switch printk users from %pf and %pF to %ps and %pS, respectively %pF and %pf are functionally equivalent to %pS and %ps conversion specifiers. The former are deprecated, therefore switch the current users to use the preferred variant. The changes have been produced by the following command: git grep -l '%p[fF]' | grep -v '^\(tools\|Documentation\)/' | \ while read i; do perl -i -pe 's/%pf/%ps/g; s/%pF/%pS/g;' $i; done And verifying the result. Link: http://lkml.kernel.org/r/20190325193229.23390-1-sakari.ailus@linux.intel.com Cc: Andy Shevchenko Cc: linux-arm-kernel@lists.infradead.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: xen-devel@lists.xenproject.org Cc: linux-acpi@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: drbd-dev@lists.linbit.com Cc: linux-block@vger.kernel.org Cc: linux-mmc@vger.kernel.org Cc: linux-nvdimm@lists.01.org Cc: linux-pci@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: linux-btrfs@vger.kernel.org Cc: linux-f2fs-devel@lists.sourceforge.net Cc: linux-mm@kvack.org Cc: ceph-devel@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Sakari Ailus Acked-by: David Sterba (for btrfs) Acked-by: Mike Rapoport (for mm/memblock.c) Acked-by: Bjorn Helgaas (for drivers/pci) Acked-by: Rafael J. Wysocki Signed-off-by: Petr Mladek --- lib/error-inject.c | 2 +- lib/percpu-refcount.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/error-inject.c b/lib/error-inject.c index c0d4600f4896..aa63751c916f 100644 --- a/lib/error-inject.c +++ b/lib/error-inject.c @@ -189,7 +189,7 @@ static int ei_seq_show(struct seq_file *m, void *v) { struct ei_entry *ent = list_entry(v, struct ei_entry, list); - seq_printf(m, "%pf\t%s\n", (void *)ent->start_addr, + seq_printf(m, "%ps\t%s\n", (void *)ent->start_addr, error_type_string(ent->etype)); return 0; } diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 9877682e49c7..da54318d3b55 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -151,7 +151,7 @@ static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu) atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count); WARN_ONCE(atomic_long_read(&ref->count) <= 0, - "percpu ref (%pf) <= 0 (%ld) after switching to atomic", + "percpu ref (%ps) <= 0 (%ld) after switching to atomic", ref->release, atomic_long_read(&ref->count)); /* @ref is viewed as dead on all CPUs, send out switch confirmation */ @@ -333,7 +333,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, spin_lock_irqsave(&percpu_ref_switch_lock, flags); WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, - "%s called more than once on %pf!", __func__, ref->release); + "%s called more than once on %ps!", __func__, ref->release); ref->percpu_count_ptr |= __PERCPU_REF_DEAD; __percpu_ref_switch_mode(ref, confirm_kill); -- cgit From c252aa3e8ed3ac54060b1838f6a47f29799a133d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 11 Apr 2019 18:43:06 -0500 Subject: rhashtable: use struct_size() in kvzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = kvzalloc(size, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kvzalloc(struct_size(instance, entry, count), GFP_KERNEL); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- lib/rhashtable.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index a8583af43b59..9c84f5cef69c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -175,8 +175,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, int i; static struct lock_class_key __key; - size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - tbl = kvzalloc(size, gfp); + tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp); size = nbuckets; -- cgit From e4edbe3c1f44c84f319149aeb998e7e36b3b897f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 12 Apr 2019 11:52:07 +1000 Subject: rhashtable: fix some __rcu annotation errors With these annotations, the rhashtable now gets no warnings when compiled with "C=1" for sparse checking. Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 9c84f5cef69c..e387ceb00e86 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -223,7 +223,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl); int err = -EAGAIN; struct rhash_head *head, *next, *entry; - struct rhash_head **pprev = NULL; + struct rhash_head __rcu **pprev = NULL; unsigned int new_hash; if (new_tbl->nest) @@ -486,7 +486,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, .ht = ht, .key = key, }; - struct rhash_head **pprev = NULL; + struct rhash_head __rcu **pprev = NULL; struct rhash_head *head; int elasticity; -- cgit From adc6a3ab192eb40fb9d8b093c87d9aa785af4513 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 12 Apr 2019 11:52:08 +1000 Subject: rhashtable: move dereference inside rht_ptr() Rather than dereferencing a pointer to a bucket and then passing the result to rht_ptr(), we now pass in the pointer and do the dereference in rht_ptr(). This requires that we pass in the tbl and hash as well to support RCU checks, and means that the various rht_for_each functions can expect a pointer that can be dereferenced without further care. There are two places where we dereference a bucket pointer where there is no testable protection - in each case we know that we much have exclusive access without having taken a lock. The previous code used rht_dereference() to pretend that holding the mutex provided protects, but holding the mutex never provides protection for accessing buckets. So instead introduce rht_ptr_exclusive() that can be used when there is known to be exclusive access without holding any locks. Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 12 ++++++------ lib/test_rhashtable.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index e387ceb00e86..237368ea98c5 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -231,7 +231,8 @@ static int rhashtable_rehash_one(struct rhashtable *ht, err = -ENOENT; - rht_for_each_from(entry, rht_ptr(*bkt), old_tbl, old_hash) { + rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash), + old_tbl, old_hash) { err = 0; next = rht_dereference_bucket(entry->next, old_tbl, old_hash); @@ -248,8 +249,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING); - head = rht_ptr(rht_dereference_bucket(new_tbl->buckets[new_hash], - new_tbl, new_hash)); + head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash); RCU_INIT_POINTER(entry->next, head); @@ -491,7 +491,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, int elasticity; elasticity = RHT_ELASTICITY; - rht_for_each_from(head, rht_ptr(*bkt), tbl, hash) { + rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *list; struct rhlist_head *plist; @@ -557,7 +557,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, if (unlikely(rht_grow_above_100(ht, tbl))) return ERR_PTR(-EAGAIN); - head = rht_ptr(rht_dereference_bucket(*bkt, tbl, hash)); + head = rht_ptr(bkt, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (ht->rhlist) { @@ -1139,7 +1139,7 @@ restart: struct rhash_head *pos, *next; cond_resched(); - for (pos = rht_ptr(rht_dereference(*rht_bucket(tbl, i), ht)), + for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)), next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; !rht_is_a_nulls(pos); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 02592c2a249c..084fe5a6ac57 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -500,7 +500,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt) struct rhash_head *pos, *next; struct test_obj_rhl *p; - pos = rht_ptr(rht_dereference(tbl->buckets[i], ht)); + pos = rht_ptr_exclusive(tbl->buckets + i); next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; if (!rht_is_a_nulls(pos)) { -- cgit From f4712b46a529ca2da078c82d5d99d367c7ebf82b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 12 Apr 2019 11:52:08 +1000 Subject: rhashtable: replace rht_ptr_locked() with rht_assign_locked() The only times rht_ptr_locked() is used, it is to store a new value in a bucket-head. This is the only time it makes sense to use it too. So replace it by a function which does the whole task: Sets the lock bit and assigns to a bucket head. Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 237368ea98c5..ef5378efdef3 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -259,7 +259,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, rcu_assign_pointer(*pprev, next); else /* Need to preserved the bit lock. */ - rcu_assign_pointer(*bkt, rht_ptr_locked(next)); + rht_assign_locked(bkt, next); out: return err; @@ -517,7 +517,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, rcu_assign_pointer(*pprev, obj); else /* Need to preserve the bit lock */ - rcu_assign_pointer(*bkt, rht_ptr_locked(obj)); + rht_assign_locked(bkt, obj); return NULL; } @@ -570,7 +570,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, /* bkt is always the head of the list, so it holds * the lock, which we need to preserve */ - rcu_assign_pointer(*bkt, rht_ptr_locked(obj)); + rht_assign_locked(bkt, obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) -- cgit From ca0b709d1a07b1fe1fb356d8d58f220287f85672 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 12 Apr 2019 11:52:08 +1000 Subject: rhashtable: use BIT(0) for locking. As reported by Guenter Roeck, the new bit-locking using BIT(1) doesn't work on the m68k architecture. m68k only requires 2-byte alignment for words and longwords, so there is only one unused bit in pointers to structs - We current use two, one for the NULLS marker at the end of the linked list, and one for the bit-lock in the head of the list. The two uses don't need to conflict as we never need the head of the list to be a NULLS marker - the marker is only needed to check if an object has moved to a different table, and the bucket head cannot move. The NULLS marker is only needed in a ->next pointer. As we already have different types for the bucket head pointer (struct rhash_lock_head) and the ->next pointers (struct rhash_head), it is fairly easy to treat the lsb differently in each. So: Initialize buckets heads to NULL, and use the lsb for locking. When loading the pointer from the bucket head, if it is NULL (ignoring the lock big), report as being the expected NULLS marker. When storing a value into a bucket head, if it is a NULLS marker, store NULL instead. And convert all places that used bit 1 for locking, to use bit 0. Fixes: 8f0db018006a ("rhashtable: use bit_spin_locks to protect hash bucket.") Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index ef5378efdef3..6529fe1b45c1 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -59,7 +59,7 @@ int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) return 1; if (unlikely(tbl->nest)) return 1; - return bit_spin_is_locked(1, (unsigned long *)&tbl->buckets[hash]); + return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]); } EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); #else -- cgit From 877b5691f27a1aec0d9b53095a323e45c30069e2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 14 Apr 2019 17:37:09 -0700 Subject: crypto: shash - remove shash_desc::flags The flags field in 'struct shash_desc' never actually does anything. The only ostensibly supported flag is CRYPTO_TFM_REQ_MAY_SLEEP. However, no shash algorithm ever sleeps, making this flag a no-op. With this being the case, inevitably some users who can't sleep wrongly pass MAY_SLEEP. These would all need to be fixed if any shash algorithm actually started sleeping. For example, the shash_ahash_*() functions, which wrap a shash algorithm with the ahash API, pass through MAY_SLEEP from the ahash API to the shash API. However, the shash functions are called under kmap_atomic(), so actually they're assumed to never sleep. Even if it turns out that some users do need preemption points while hashing large buffers, we could easily provide a helper function crypto_shash_update_large() which divides the data into smaller chunks and calls crypto_shash_update() and cond_resched() for each chunk. It's not necessary to have a flag in 'struct shash_desc', nor is it necessary to make individual shash algorithms aware of this at all. Therefore, remove shash_desc::flags, and document that the crypto_shash_*() functions can be called from any context. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- lib/crc-t10dif.c | 1 - lib/digsig.c | 1 - lib/libcrc32c.c | 1 - 3 files changed, 3 deletions(-) (limited to 'lib') diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c index 4d0d47c1ffbd..e89ebfdbb0fc 100644 --- a/lib/crc-t10dif.c +++ b/lib/crc-t10dif.c @@ -69,7 +69,6 @@ __u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len) rcu_read_lock(); desc.shash.tfm = rcu_dereference(crct10dif_tfm); - desc.shash.flags = 0; *(__u16 *)desc.ctx = crc; err = crypto_shash_update(&desc.shash, buffer, len); diff --git a/lib/digsig.c b/lib/digsig.c index 6ba6fcd92dd1..3b0a579bdcdf 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -240,7 +240,6 @@ int digsig_verify(struct key *keyring, const char *sig, int siglen, goto err; desc->tfm = shash; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; crypto_shash_init(desc); crypto_shash_update(desc, data, datalen); diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c index f0a2934605bf..4e9829c4d64c 100644 --- a/lib/libcrc32c.c +++ b/lib/libcrc32c.c @@ -47,7 +47,6 @@ u32 crc32c(u32 crc, const void *address, unsigned int length) int err; shash->tfm = tfm; - shash->flags = 0; *ctx = crc; err = crypto_shash_update(shash, address, length); -- cgit From ba2e544075c282a5bb21df7752efad3b42d6077b Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 16 Apr 2019 17:27:20 +1000 Subject: lib/siphash.c: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch aims to suppress up to 18 missing-break-in-switch false positives on some architectures. Cc: Gustavo A. R. Silva Cc: Kees Cook Signed-off-by: Stephen Rothwell Reviewed-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- lib/siphash.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/siphash.c b/lib/siphash.c index 3ae58b4edad6..c47bb6ff2149 100644 --- a/lib/siphash.c +++ b/lib/siphash.c @@ -68,11 +68,11 @@ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) bytemask_from_count(left))); #else switch (left) { - case 7: b |= ((u64)end[6]) << 48; - case 6: b |= ((u64)end[5]) << 40; - case 5: b |= ((u64)end[4]) << 32; + case 7: b |= ((u64)end[6]) << 48; /* fall through */ + case 6: b |= ((u64)end[5]) << 40; /* fall through */ + case 5: b |= ((u64)end[4]) << 32; /* fall through */ case 4: b |= le32_to_cpup(data); break; - case 3: b |= ((u64)end[2]) << 16; + case 3: b |= ((u64)end[2]) << 16; /* fall through */ case 2: b |= le16_to_cpup(data); break; case 1: b |= end[0]; } @@ -101,11 +101,11 @@ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) bytemask_from_count(left))); #else switch (left) { - case 7: b |= ((u64)end[6]) << 48; - case 6: b |= ((u64)end[5]) << 40; - case 5: b |= ((u64)end[4]) << 32; + case 7: b |= ((u64)end[6]) << 48; /* fall through */ + case 6: b |= ((u64)end[5]) << 40; /* fall through */ + case 5: b |= ((u64)end[4]) << 32; /* fall through */ case 4: b |= get_unaligned_le32(end); break; - case 3: b |= ((u64)end[2]) << 16; + case 3: b |= ((u64)end[2]) << 16; /* fall through */ case 2: b |= get_unaligned_le16(end); break; case 1: b |= end[0]; } @@ -268,11 +268,11 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) bytemask_from_count(left))); #else switch (left) { - case 7: b |= ((u64)end[6]) << 48; - case 6: b |= ((u64)end[5]) << 40; - case 5: b |= ((u64)end[4]) << 32; + case 7: b |= ((u64)end[6]) << 48; /* fall through */ + case 6: b |= ((u64)end[5]) << 40; /* fall through */ + case 5: b |= ((u64)end[4]) << 32; /* fall through */ case 4: b |= le32_to_cpup(data); break; - case 3: b |= ((u64)end[2]) << 16; + case 3: b |= ((u64)end[2]) << 16; /* fall through */ case 2: b |= le16_to_cpup(data); break; case 1: b |= end[0]; } @@ -301,11 +301,11 @@ u32 __hsiphash_unaligned(const void *data, size_t len, bytemask_from_count(left))); #else switch (left) { - case 7: b |= ((u64)end[6]) << 48; - case 6: b |= ((u64)end[5]) << 40; - case 5: b |= ((u64)end[4]) << 32; + case 7: b |= ((u64)end[6]) << 48; /* fall through */ + case 6: b |= ((u64)end[5]) << 40; /* fall through */ + case 5: b |= ((u64)end[4]) << 32; /* fall through */ case 4: b |= get_unaligned_le32(end); break; - case 3: b |= ((u64)end[2]) << 16; + case 3: b |= ((u64)end[2]) << 16; /* fall through */ case 2: b |= get_unaligned_le16(end); break; case 1: b |= end[0]; } @@ -431,7 +431,7 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) v0 ^= m; } switch (left) { - case 3: b |= ((u32)end[2]) << 16; + case 3: b |= ((u32)end[2]) << 16; /* fall through */ case 2: b |= le16_to_cpup(data); break; case 1: b |= end[0]; } @@ -454,7 +454,7 @@ u32 __hsiphash_unaligned(const void *data, size_t len, v0 ^= m; } switch (left) { - case 3: b |= ((u32)end[2]) << 16; + case 3: b |= ((u32)end[2]) << 16; /* fall through */ case 2: b |= get_unaligned_le16(end); break; case 1: b |= end[0]; } -- cgit From aa30f47cf666111f6bbfd15f290a27e8a7b9d854 Mon Sep 17 00:00:00 2001 From: Kimberly Brown Date: Mon, 1 Apr 2019 22:51:18 -0400 Subject: kobject: Add support for default attribute groups to kobj_type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kobj_type currently uses a list of individual attributes to store default attributes. Attribute groups are more flexible than a list of attributes because groups provide support for attribute visibility. So, add support for default attribute groups to kobj_type. In future patches, the existing uses of kobj_type’s attribute list will be converted to attribute groups. When that is complete, kobj_type’s attribute list, “default_attrs”, will be removed. Signed-off-by: Kimberly Brown Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index aa89edcd2b63..ede40005db28 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -82,6 +82,7 @@ static int populate_dir(struct kobject *kobj) static int create_dir(struct kobject *kobj) { + const struct kobj_type *ktype = get_ktype(kobj); const struct kobj_ns_type_operations *ops; int error; @@ -95,6 +96,14 @@ static int create_dir(struct kobject *kobj) return error; } + if (ktype) { + error = sysfs_create_groups(kobj, ktype->default_groups); + if (error) { + sysfs_remove_dir(kobj); + return error; + } + } + /* * @kobj->sd may be deleted by an ancestor going away. Hold an * extra reference so that it stays until @kobj is gone. @@ -584,11 +593,16 @@ EXPORT_SYMBOL_GPL(kobject_move); void kobject_del(struct kobject *kobj) { struct kernfs_node *sd; + const struct kobj_type *ktype = get_ktype(kobj); if (!kobj) return; sd = kobj->sd; + + if (ktype) + sysfs_remove_groups(kobj, ktype->default_groups); + sysfs_remove_dir(kobj); sysfs_put(sd); -- cgit From 6eea242f9bcdf828bb56334d8ee5c7cb466e4bcd Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 17 Apr 2019 13:53:41 +0200 Subject: vsprintf: Shuffle restricted_pointer() This is just a preparation step for further changes. The patch does not change the code. Link: http://lkml.kernel.org/r/20190417115350.20479-2-pmladek@suse.com To: Rasmus Villemoes Cc: Linus Torvalds Cc: "Tobin C . Harding" Cc: Joe Perches Cc: Andrew Morton Cc: Michal Hocko Cc: Steven Rostedt Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Reviewed-by: Andy Shevchenko Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- lib/vsprintf.c | 98 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 49 insertions(+), 49 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 791b6fa36905..eb7b4a06e1f0 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -717,6 +717,55 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr, return pointer_string(buf, end, (const void *)hashval, spec); } +int kptr_restrict __read_mostly; + +static noinline_for_stack +char *restricted_pointer(char *buf, char *end, const void *ptr, + struct printf_spec spec) +{ + switch (kptr_restrict) { + case 0: + /* Always print %pK values */ + break; + case 1: { + const struct cred *cred; + + /* + * kptr_restrict==1 cannot be used in IRQ context + * because its test for CAP_SYSLOG would be meaningless. + */ + if (in_irq() || in_serving_softirq() || in_nmi()) { + if (spec.field_width == -1) + spec.field_width = 2 * sizeof(ptr); + return string(buf, end, "pK-error", spec); + } + + /* + * Only print the real pointer value if the current + * process has CAP_SYSLOG and is running with the + * same credentials it started with. This is because + * access to files is checked at open() time, but %pK + * checks permission at read() time. We don't want to + * leak pointer values if a binary opens a file using + * %pK and then elevates privileges before reading it. + */ + cred = current_cred(); + if (!has_capability_noaudit(current, CAP_SYSLOG) || + !uid_eq(cred->euid, cred->uid) || + !gid_eq(cred->egid, cred->gid)) + ptr = NULL; + break; + } + case 2: + default: + /* Always print 0's for %pK */ + ptr = NULL; + break; + } + + return pointer_string(buf, end, ptr, spec); +} + static noinline_for_stack char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_spec spec, const char *fmt) @@ -1476,55 +1525,6 @@ char *uuid_string(char *buf, char *end, const u8 *addr, return string(buf, end, uuid, spec); } -int kptr_restrict __read_mostly; - -static noinline_for_stack -char *restricted_pointer(char *buf, char *end, const void *ptr, - struct printf_spec spec) -{ - switch (kptr_restrict) { - case 0: - /* Always print %pK values */ - break; - case 1: { - const struct cred *cred; - - /* - * kptr_restrict==1 cannot be used in IRQ context - * because its test for CAP_SYSLOG would be meaningless. - */ - if (in_irq() || in_serving_softirq() || in_nmi()) { - if (spec.field_width == -1) - spec.field_width = 2 * sizeof(ptr); - return string(buf, end, "pK-error", spec); - } - - /* - * Only print the real pointer value if the current - * process has CAP_SYSLOG and is running with the - * same credentials it started with. This is because - * access to files is checked at open() time, but %pK - * checks permission at read() time. We don't want to - * leak pointer values if a binary opens a file using - * %pK and then elevates privileges before reading it. - */ - cred = current_cred(); - if (!has_capability_noaudit(current, CAP_SYSLOG) || - !uid_eq(cred->euid, cred->uid) || - !gid_eq(cred->egid, cred->gid)) - ptr = NULL; - break; - } - case 2: - default: - /* Always print 0's for %pK */ - ptr = NULL; - break; - } - - return pointer_string(buf, end, ptr, spec); -} - static noinline_for_stack char *netdev_bits(char *buf, char *end, const void *addr, struct printf_spec spec, const char *fmt) -- cgit From 1ac2f9789c4b76ad749870c25ffae0cbcd1f510f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 17 Apr 2019 13:53:42 +0200 Subject: vsprintf: Consistent %pK handling for kptr_restrict == 0 restricted_pointer() pretends that it prints the address when kptr_restrict is set to zero. But it is never called in this situation. Instead, pointer() falls back to ptr_to_id() and hashes the pointer. This patch removes the potential confusion. klp_restrict is checked only in restricted_pointer(). It actually fixes a small race when the address might get printed unhashed: CPU0 CPU1 pointer() if (!kptr_restrict) /* for example set to 2 */ restricted_pointer() /* echo 0 >/proc/sys/kernel/kptr_restrict */ proc_dointvec_minmax_sysadmin() klpr_restrict = 0; switch(kptr_restrict) case 0: break: number() Fixes: ef0010a30935de4e0211 ("vsprintf: don't use 'restricted_pointer()' when not restricting") Link: http://lkml.kernel.org/r/20190417115350.20479-3-pmladek@suse.com To: Andy Shevchenko To: Rasmus Villemoes Cc: Linus Torvalds Cc: "Tobin C . Harding" Cc: Joe Perches Cc: Andrew Morton Cc: Michal Hocko Cc: Steven Rostedt Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Cc: Kees Cook Reviewed-by: Andy Shevchenko Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- lib/vsprintf.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index eb7b4a06e1f0..2af48948a973 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -725,8 +725,8 @@ char *restricted_pointer(char *buf, char *end, const void *ptr, { switch (kptr_restrict) { case 0: - /* Always print %pK values */ - break; + /* Handle as %p, hash and do _not_ leak addresses. */ + return ptr_to_id(buf, end, ptr, spec); case 1: { const struct cred *cred; @@ -2041,8 +2041,6 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return buf; } case 'K': - if (!kptr_restrict) - break; return restricted_pointer(buf, end, ptr, spec); case 'N': return netdev_bits(buf, end, ptr, spec, fmt); -- cgit From d529ac4194f2c346b2f62f0f473a578a7357039b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 17 Apr 2019 13:53:43 +0200 Subject: vsprintf: Do not check address of well-known strings We are going to check the address using probe_kernel_address(). It will be more expensive and it does not make sense for well known address. This patch splits the string() function. The variant without the check is then used on locations that handle string constants or strings defined as local variables. This patch does not change the existing behavior. Link: http://lkml.kernel.org/r/20190417115350.20479-4-pmladek@suse.com To: Rasmus Villemoes Cc: Linus Torvalds Cc: "Tobin C . Harding" Cc: Joe Perches Cc: Andrew Morton Cc: Michal Hocko Cc: Steven Rostedt Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Reviewed-by: Andy Shevchenko Signed-off-by: Petr Mladek Reviewed-by: Sergey Senozhatsky --- lib/vsprintf.c | 81 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 44 insertions(+), 37 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 2af48948a973..c9c9a1179870 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -593,15 +593,13 @@ char *widen_string(char *buf, int n, char *end, struct printf_spec spec) return buf; } -static noinline_for_stack -char *string(char *buf, char *end, const char *s, struct printf_spec spec) +/* Handle string from a well known address. */ +static char *string_nocheck(char *buf, char *end, const char *s, + struct printf_spec spec) { int len = 0; size_t lim = spec.precision; - if ((unsigned long)s < PAGE_SIZE) - s = "(null)"; - while (lim--) { char c = *s++; if (!c) @@ -615,6 +613,15 @@ char *string(char *buf, char *end, const char *s, struct printf_spec spec) } static noinline_for_stack +char *string(char *buf, char *end, const char *s, + struct printf_spec spec) +{ + if ((unsigned long)s < PAGE_SIZE) + s = "(null)"; + + return string_nocheck(buf, end, s, spec); +} + char *pointer_string(char *buf, char *end, const void *ptr, struct printf_spec spec) { @@ -701,7 +708,7 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr, if (static_branch_unlikely(¬_filled_random_ptr_key)) { spec.field_width = 2 * sizeof(ptr); /* string length must be less than default_width */ - return string(buf, end, str, spec); + return string_nocheck(buf, end, str, spec); } #ifdef CONFIG_64BIT @@ -737,7 +744,7 @@ char *restricted_pointer(char *buf, char *end, const void *ptr, if (in_irq() || in_serving_softirq() || in_nmi()) { if (spec.field_width == -1) spec.field_width = 2 * sizeof(ptr); - return string(buf, end, "pK-error", spec); + return string_nocheck(buf, end, "pK-error", spec); } /* @@ -851,7 +858,7 @@ char *symbol_string(char *buf, char *end, void *ptr, else sprint_symbol_no_offset(sym, value); - return string(buf, end, sym, spec); + return string_nocheck(buf, end, sym, spec); #else return special_hex_number(buf, end, value, sizeof(void *)); #endif @@ -937,27 +944,27 @@ char *resource_string(char *buf, char *end, struct resource *res, *p++ = '['; if (res->flags & IORESOURCE_IO) { - p = string(p, pend, "io ", str_spec); + p = string_nocheck(p, pend, "io ", str_spec); specp = &io_spec; } else if (res->flags & IORESOURCE_MEM) { - p = string(p, pend, "mem ", str_spec); + p = string_nocheck(p, pend, "mem ", str_spec); specp = &mem_spec; } else if (res->flags & IORESOURCE_IRQ) { - p = string(p, pend, "irq ", str_spec); + p = string_nocheck(p, pend, "irq ", str_spec); specp = &default_dec_spec; } else if (res->flags & IORESOURCE_DMA) { - p = string(p, pend, "dma ", str_spec); + p = string_nocheck(p, pend, "dma ", str_spec); specp = &default_dec_spec; } else if (res->flags & IORESOURCE_BUS) { - p = string(p, pend, "bus ", str_spec); + p = string_nocheck(p, pend, "bus ", str_spec); specp = &bus_spec; } else { - p = string(p, pend, "??? ", str_spec); + p = string_nocheck(p, pend, "??? ", str_spec); specp = &mem_spec; decode = 0; } if (decode && res->flags & IORESOURCE_UNSET) { - p = string(p, pend, "size ", str_spec); + p = string_nocheck(p, pend, "size ", str_spec); p = number(p, pend, resource_size(res), *specp); } else { p = number(p, pend, res->start, *specp); @@ -968,21 +975,21 @@ char *resource_string(char *buf, char *end, struct resource *res, } if (decode) { if (res->flags & IORESOURCE_MEM_64) - p = string(p, pend, " 64bit", str_spec); + p = string_nocheck(p, pend, " 64bit", str_spec); if (res->flags & IORESOURCE_PREFETCH) - p = string(p, pend, " pref", str_spec); + p = string_nocheck(p, pend, " pref", str_spec); if (res->flags & IORESOURCE_WINDOW) - p = string(p, pend, " window", str_spec); + p = string_nocheck(p, pend, " window", str_spec); if (res->flags & IORESOURCE_DISABLED) - p = string(p, pend, " disabled", str_spec); + p = string_nocheck(p, pend, " disabled", str_spec); } else { - p = string(p, pend, " flags ", str_spec); + p = string_nocheck(p, pend, " flags ", str_spec); p = number(p, pend, res->flags, default_flag_spec); } *p++ = ']'; *p = '\0'; - return string(buf, end, sym, spec); + return string_nocheck(buf, end, sym, spec); } static noinline_for_stack @@ -1150,7 +1157,7 @@ char *mac_address_string(char *buf, char *end, u8 *addr, } *p = '\0'; - return string(buf, end, mac_addr, spec); + return string_nocheck(buf, end, mac_addr, spec); } static noinline_for_stack @@ -1313,7 +1320,7 @@ char *ip6_addr_string(char *buf, char *end, const u8 *addr, else ip6_string(ip6_addr, addr, fmt); - return string(buf, end, ip6_addr, spec); + return string_nocheck(buf, end, ip6_addr, spec); } static noinline_for_stack @@ -1324,7 +1331,7 @@ char *ip4_addr_string(char *buf, char *end, const u8 *addr, ip4_string(ip4_addr, addr, fmt); - return string(buf, end, ip4_addr, spec); + return string_nocheck(buf, end, ip4_addr, spec); } static noinline_for_stack @@ -1386,7 +1393,7 @@ char *ip6_addr_string_sa(char *buf, char *end, const struct sockaddr_in6 *sa, } *p = '\0'; - return string(buf, end, ip6_addr, spec); + return string_nocheck(buf, end, ip6_addr, spec); } static noinline_for_stack @@ -1421,7 +1428,7 @@ char *ip4_addr_string_sa(char *buf, char *end, const struct sockaddr_in *sa, } *p = '\0'; - return string(buf, end, ip4_addr, spec); + return string_nocheck(buf, end, ip4_addr, spec); } static noinline_for_stack @@ -1522,7 +1529,7 @@ char *uuid_string(char *buf, char *end, const u8 *addr, *p = 0; - return string(buf, end, uuid, spec); + return string_nocheck(buf, end, uuid, spec); } static noinline_for_stack @@ -1736,13 +1743,13 @@ char *device_node_gen_full_name(const struct device_node *np, char *buf, char *e /* special case for root node */ if (!parent) - return string(buf, end, "/", default_str_spec); + return string_nocheck(buf, end, "/", default_str_spec); for (depth = 0; parent->parent; depth++) parent = parent->parent; for ( ; depth >= 0; depth--) { - buf = string(buf, end, "/", default_str_spec); + buf = string_nocheck(buf, end, "/", default_str_spec); buf = string(buf, end, device_node_name_for_depth(np, depth), default_str_spec); } @@ -1770,10 +1777,10 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, str_spec.field_width = -1; if (!IS_ENABLED(CONFIG_OF)) - return string(buf, end, "(!OF)", spec); + return string_nocheck(buf, end, "(!OF)", spec); if ((unsigned long)dn < PAGE_SIZE) - return string(buf, end, "(null)", spec); + return string_nocheck(buf, end, "(null)", spec); /* simple case without anything any more format specifiers */ fmt++; @@ -1814,7 +1821,7 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, tbuf[2] = of_node_check_flag(dn, OF_POPULATED) ? 'P' : '-'; tbuf[3] = of_node_check_flag(dn, OF_POPULATED_BUS) ? 'B' : '-'; tbuf[4] = 0; - buf = string(buf, end, tbuf, str_spec); + buf = string_nocheck(buf, end, tbuf, str_spec); break; case 'c': /* major compatible string */ ret = of_property_read_string(dn, "compatible", &p); @@ -1825,10 +1832,10 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, has_mult = false; of_property_for_each_string(dn, "compatible", prop, p) { if (has_mult) - buf = string(buf, end, ",", str_spec); - buf = string(buf, end, "\"", str_spec); + buf = string_nocheck(buf, end, ",", str_spec); + buf = string_nocheck(buf, end, "\"", str_spec); buf = string(buf, end, p, str_spec); - buf = string(buf, end, "\"", str_spec); + buf = string_nocheck(buf, end, "\"", str_spec); has_mult = true; } @@ -1966,7 +1973,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, */ if (spec.field_width == -1) spec.field_width = default_width; - return string(buf, end, "(null)", spec); + return string_nocheck(buf, end, "(null)", spec); } switch (*fmt) { @@ -2022,7 +2029,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case AF_INET6: return ip6_addr_string_sa(buf, end, &sa->v6, spec, fmt); default: - return string(buf, end, "(invalid address)", spec); + return string_nocheck(buf, end, "(invalid address)", spec); }} } break; -- cgit From f00cc102b862be688fe090aec30e08d61a8f5e63 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 17 Apr 2019 13:53:44 +0200 Subject: vsprintf: Factor out %p[iI] handler as ip_addr_string() Move the non-trivial code from the long pointer() function. We are going to improve error handling that will make it even more complicated. This patch does not change the existing behavior. Link: http://lkml.kernel.org/r/20190417115350.20479-5-pmladek@suse.com To: Rasmus Villemoes Cc: Linus Torvalds Cc: "Tobin C . Harding" Cc: Joe Perches Cc: Andrew Morton Cc: Michal Hocko Cc: Steven Rostedt Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Reviewed-by: Sergey Senozhatsky Reviewed-by: Andy Shevchenko Signed-off-by: Petr Mladek --- lib/vsprintf.c | 52 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index c9c9a1179870..8ca29bc0d786 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1431,6 +1431,35 @@ char *ip4_addr_string_sa(char *buf, char *end, const struct sockaddr_in *sa, return string_nocheck(buf, end, ip4_addr, spec); } +static noinline_for_stack +char *ip_addr_string(char *buf, char *end, const void *ptr, + struct printf_spec spec, const char *fmt) +{ + switch (fmt[1]) { + case '6': + return ip6_addr_string(buf, end, ptr, spec, fmt); + case '4': + return ip4_addr_string(buf, end, ptr, spec, fmt); + case 'S': { + const union { + struct sockaddr raw; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } *sa = ptr; + + switch (sa->raw.sa_family) { + case AF_INET: + return ip4_addr_string_sa(buf, end, &sa->v4, spec, fmt); + case AF_INET6: + return ip6_addr_string_sa(buf, end, &sa->v6, spec, fmt); + default: + return string_nocheck(buf, end, "(invalid address)", spec); + }} + } + + return ptr_to_id(buf, end, ptr, spec); +} + static noinline_for_stack char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, const char *fmt) @@ -2011,28 +2040,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, * 4: 001.002.003.004 * 6: 000102...0f */ - switch (fmt[1]) { - case '6': - return ip6_addr_string(buf, end, ptr, spec, fmt); - case '4': - return ip4_addr_string(buf, end, ptr, spec, fmt); - case 'S': { - const union { - struct sockaddr raw; - struct sockaddr_in v4; - struct sockaddr_in6 v6; - } *sa = ptr; - - switch (sa->raw.sa_family) { - case AF_INET: - return ip4_addr_string_sa(buf, end, &sa->v4, spec, fmt); - case AF_INET6: - return ip6_addr_string_sa(buf, end, &sa->v6, spec, fmt); - default: - return string_nocheck(buf, end, "(invalid address)", spec); - }} - } - break; + return ip_addr_string(buf, end, ptr, spec, fmt); case 'E': return escaped_string(buf, end, ptr, spec, fmt); case 'U': -- cgit From 45c3e93d751ea50861c796da3cbfc848fa6ddf55 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 17 Apr 2019 13:53:45 +0200 Subject: vsprintf: Factor out %pV handler as va_format() Move the code from the long pointer() function. We are going to improve error handling that will make it more complicated. This patch does not change the existing behavior. Link: http://lkml.kernel.org/r/20190417115350.20479-6-pmladek@suse.com To: Rasmus Villemoes Cc: Linus Torvalds Cc: "Tobin C . Harding" Cc: Joe Perches Cc: Andrew Morton Cc: Michal Hocko Cc: Steven Rostedt Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Reviewed-by: Sergey Senozhatsky Reviewed-by: Andy Shevchenko Signed-off-by: Petr Mladek --- lib/vsprintf.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 8ca29bc0d786..12b71a4d4613 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1520,6 +1520,17 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, return buf; } +static char *va_format(char *buf, char *end, struct va_format *va_fmt) +{ + va_list va; + + va_copy(va, *va_fmt->va); + buf += vsnprintf(buf, end > buf ? end - buf : 0, va_fmt->fmt, va); + va_end(va); + + return buf; +} + static noinline_for_stack char *uuid_string(char *buf, char *end, const u8 *addr, struct printf_spec spec, const char *fmt) @@ -2046,15 +2057,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'U': return uuid_string(buf, end, ptr, spec, fmt); case 'V': - { - va_list va; - - va_copy(va, *((struct va_format *)ptr)->va); - buf += vsnprintf(buf, end > buf ? end - buf : 0, - ((struct va_format *)ptr)->fmt, va); - va_end(va); - return buf; - } + return va_format(buf, end, ptr); case 'K': return restricted_pointer(buf, end, ptr, spec); case 'N': -- cgit From 798cc27a305e7b35b7bff3a71257e6fe57f70bc1 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 17 Apr 2019 13:53:46 +0200 Subject: vsprintf: Factor out %pO handler as kobject_string() Move code from the long pointer() function. We are going to improve error handling that will make it even more complicated. This patch does not change the existing behavior. Link: http://lkml.kernel.org/r/20190417115350.20479-7-pmladek@suse.com To: Rasmus Villemoes Cc: Linus Torvalds Cc: "Tobin C . Harding" Cc: Joe Perches Cc: Andrew Morton Cc: Michal Hocko Cc: Steven Rostedt Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Cc: Kees Cook Reviewed-by: Sergey Senozhatsky Reviewed-by: Andy Shevchenko Signed-off-by: Petr Mladek --- lib/vsprintf.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 12b71a4d4613..9817d171f608 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1888,6 +1888,17 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, return widen_string(buf, buf - buf_start, end, spec); } +static char *kobject_string(char *buf, char *end, void *ptr, + struct printf_spec spec, const char *fmt) +{ + switch (fmt[1]) { + case 'F': + return device_node_string(buf, end, ptr, spec, fmt + 1); + } + + return ptr_to_id(buf, end, ptr, spec); +} + /* * Show a '%p' thing. A kernel extension is that the '%p' is followed * by an extra set of alphanumeric characters that are extended format @@ -2082,11 +2093,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'G': return flags_string(buf, end, ptr, fmt); case 'O': - switch (fmt[1]) { - case 'F': - return device_node_string(buf, end, ptr, spec, fmt + 1); - } - break; + return kobject_string(buf, end, ptr, spec, fmt); case 'x': return pointer_string(buf, end, ptr, spec); } -- cgit From 0b74d4d763fd4ee9daa53889324300587c015338 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 17 Apr 2019 13:53:47 +0200 Subject: vsprintf: Consolidate handling of unknown pointer specifiers There are few printk formats that make sense only with two or more specifiers. Also some specifiers make sense only when a kernel feature is enabled. The handling of unknown specifiers is inconsistent and not helpful. Using WARN() looks like an overkill for this type of error. pr_warn() is not good either. It would by handled via printk_safe buffer and it might be hard to match it with the problematic string. A reasonable compromise seems to be writing the unknown format specifier into the original string with a question mark, for example (%pC?). It should be self-explaining enough. Note that it is in brackets to follow the (null) style. Note that it introduces a warning about that test_hashed() function is unused. It is going to be used again by a later patch. Link: http://lkml.kernel.org/r/20190417115350.20479-8-pmladek@suse.com To: Rasmus Villemoes Cc: Linus Torvalds Cc: "Tobin C . Harding" Cc: Joe Perches Cc: Andrew Morton Cc: Michal Hocko Cc: Steven Rostedt Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Reviewed-by: Sergey Senozhatsky Reviewed-by: Andy Shevchenko Signed-off-by: Petr Mladek --- lib/test_printf.c | 3 +-- lib/vsprintf.c | 28 +++++++++++++++++----------- 2 files changed, 18 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index 659b6cc0d483..250ee864b8b8 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -462,8 +462,7 @@ struct_rtc_time(void) .tm_year = 118, }; - test_hashed("%pt", &tm); - + test("(%ptR?)", "%pt", &tm); test("2018-11-26T05:35:43", "%ptR", &tm); test("0118-10-26T05:35:43", "%ptRr", &tm); test("05:35:43|2018-11-26", "%ptRt|%ptRd", &tm, &tm); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 9817d171f608..f471a658422f 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1435,6 +1435,8 @@ static noinline_for_stack char *ip_addr_string(char *buf, char *end, const void *ptr, struct printf_spec spec, const char *fmt) { + char *err_fmt_msg; + switch (fmt[1]) { case '6': return ip6_addr_string(buf, end, ptr, spec, fmt); @@ -1457,7 +1459,8 @@ char *ip_addr_string(char *buf, char *end, const void *ptr, }} } - return ptr_to_id(buf, end, ptr, spec); + err_fmt_msg = fmt[0] == 'i' ? "(%pi?)" : "(%pI?)"; + return string_nocheck(buf, end, err_fmt_msg, spec); } static noinline_for_stack @@ -1585,7 +1588,7 @@ char *netdev_bits(char *buf, char *end, const void *addr, size = sizeof(netdev_features_t); break; default: - return ptr_to_id(buf, end, addr, spec); + return string_nocheck(buf, end, "(%pN?)", spec); } return special_hex_number(buf, end, num, size); @@ -1689,7 +1692,7 @@ char *time_and_date(char *buf, char *end, void *ptr, struct printf_spec spec, case 'R': return rtc_str(buf, end, (const struct rtc_time *)ptr, fmt); default: - return ptr_to_id(buf, end, ptr, spec); + return string_nocheck(buf, end, "(%ptR?)", spec); } } @@ -1697,7 +1700,10 @@ static noinline_for_stack char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, const char *fmt) { - if (!IS_ENABLED(CONFIG_HAVE_CLK) || !clk) + if (!IS_ENABLED(CONFIG_HAVE_CLK)) + return string_nocheck(buf, end, "(%pC?)", spec); + + if (!clk) return string(buf, end, NULL, spec); switch (fmt[1]) { @@ -1706,7 +1712,7 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, #ifdef CONFIG_COMMON_CLK return string(buf, end, __clk_get_name(clk), spec); #else - return ptr_to_id(buf, end, clk, spec); + return string_nocheck(buf, end, "(%pC?)", spec); #endif } } @@ -1739,7 +1745,8 @@ char *format_flags(char *buf, char *end, unsigned long flags, } static noinline_for_stack -char *flags_string(char *buf, char *end, void *flags_ptr, const char *fmt) +char *flags_string(char *buf, char *end, void *flags_ptr, + struct printf_spec spec, const char *fmt) { unsigned long flags; const struct trace_print_flags *names; @@ -1760,8 +1767,7 @@ char *flags_string(char *buf, char *end, void *flags_ptr, const char *fmt) names = gfpflag_names; break; default: - WARN_ONCE(1, "Unsupported flags modifier: %c\n", fmt[1]); - return buf; + return string_nocheck(buf, end, "(%pG?)", spec); } return format_flags(buf, end, flags, names); @@ -1817,7 +1823,7 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, str_spec.field_width = -1; if (!IS_ENABLED(CONFIG_OF)) - return string_nocheck(buf, end, "(!OF)", spec); + return string_nocheck(buf, end, "(%pOF?)", spec); if ((unsigned long)dn < PAGE_SIZE) return string_nocheck(buf, end, "(null)", spec); @@ -1896,7 +1902,7 @@ static char *kobject_string(char *buf, char *end, void *ptr, return device_node_string(buf, end, ptr, spec, fmt + 1); } - return ptr_to_id(buf, end, ptr, spec); + return string_nocheck(buf, end, "(%pO?)", spec); } /* @@ -2091,7 +2097,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, #endif case 'G': - return flags_string(buf, end, ptr, fmt); + return flags_string(buf, end, ptr, spec, fmt); case 'O': return kobject_string(buf, end, ptr, spec, fmt); case 'x': -- cgit From 3e5903eb9cff707301712498aed9e34b3e2ee883 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 17 Apr 2019 13:53:48 +0200 Subject: vsprintf: Prevent crash when dereferencing invalid pointers We already prevent crash when dereferencing some obviously broken pointers. But the handling is not consistent. Sometimes we print "(null)" only for pure NULL pointer, sometimes for pointers in the first page and sometimes also for pointers in the last page (error codes). Note that printk() call this code under logbuf_lock. Any recursive printks are redirected to the printk_safe implementation and the messages are stored into per-CPU buffers. These buffers might be eventually flushed in printk_safe_flush_on_panic() but it is not guaranteed. This patch adds a check using probe_kernel_read(). It is not a full-proof test. But it should help to see the error message in 99% situations where the kernel would silently crash otherwise. Also it makes the error handling unified for "%s" and the many %p* specifiers that need to read the data from a given address. We print: + (null) when accessing data on pure pure NULL address + (efault) when accessing data on an invalid address It does not affect the %p* specifiers that just print the given address in some form, namely %pF, %pf, %pS, %ps, %pB, %pK, %px, and plain %p. Note that we print (efault) from security reasons. In fact, the real address can be seen only by %px or eventually %pK. Link: http://lkml.kernel.org/r/20190417115350.20479-9-pmladek@suse.com To: Rasmus Villemoes Cc: Linus Torvalds Cc: "Tobin C . Harding" Cc: Joe Perches Cc: Andrew Morton Cc: Michal Hocko Cc: Steven Rostedt Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Reviewed-by: Sergey Senozhatsky Reviewed-by: Andy Shevchenko Signed-off-by: Petr Mladek --- lib/test_printf.c | 22 ++++++++- lib/vsprintf.c | 136 ++++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 122 insertions(+), 36 deletions(-) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index 250ee864b8b8..359ae4fb1ece 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -239,6 +239,7 @@ plain_format(void) #define PTR ((void *)0x456789ab) #define PTR_STR "456789ab" #define PTR_VAL_NO_CRNG "(ptrval)" +#define ZEROS "" static int __init plain_format(void) @@ -268,7 +269,6 @@ plain_hash_to_buffer(const void *p, char *buf, size_t len) return 0; } - static int __init plain_hash(void) { @@ -325,6 +325,24 @@ test_hashed(const char *fmt, const void *p) test(buf, fmt, p); } +static void __init +null_pointer(void) +{ + test_hashed("%p", NULL); + test(ZEROS "00000000", "%px", NULL); + test("(null)", "%pE", NULL); +} + +#define PTR_INVALID ((void *)0x000000ab) + +static void __init +invalid_pointer(void) +{ + test_hashed("%p", PTR_INVALID); + test(ZEROS "000000ab", "%px", PTR_INVALID); + test("(efault)", "%pE", PTR_INVALID); +} + static void __init symbol_ptr(void) { @@ -571,6 +589,8 @@ static void __init test_pointer(void) { plain(); + null_pointer(); + invalid_pointer(); symbol_ptr(); kernel_ptr(); struct_resource(); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index f471a658422f..b989f1e8f35b 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -612,12 +612,45 @@ static char *string_nocheck(char *buf, char *end, const char *s, return widen_string(buf, len, end, spec); } +/* + * This is not a fool-proof test. 99% of the time that this will fault is + * due to a bad pointer, not one that crosses into bad memory. Just test + * the address to make sure it doesn't fault due to a poorly added printk + * during debugging. + */ +static const char *check_pointer_msg(const void *ptr) +{ + char byte; + + if (!ptr) + return "(null)"; + + if (probe_kernel_address(ptr, byte)) + return "(efault)"; + + return NULL; +} + +static int check_pointer(char **buf, char *end, const void *ptr, + struct printf_spec spec) +{ + const char *err_msg; + + err_msg = check_pointer_msg(ptr); + if (err_msg) { + *buf = string_nocheck(*buf, end, err_msg, spec); + return -EFAULT; + } + + return 0; +} + static noinline_for_stack char *string(char *buf, char *end, const char *s, struct printf_spec spec) { - if ((unsigned long)s < PAGE_SIZE) - s = "(null)"; + if (check_pointer(&buf, end, s, spec)) + return buf; return string_nocheck(buf, end, s, spec); } @@ -792,6 +825,11 @@ char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_sp rcu_read_lock(); for (i = 0; i < depth; i++, d = p) { + if (check_pointer(&buf, end, d, spec)) { + rcu_read_unlock(); + return buf; + } + p = READ_ONCE(d->d_parent); array[i] = READ_ONCE(d->d_name.name); if (p == d) { @@ -822,8 +860,12 @@ static noinline_for_stack char *bdev_name(char *buf, char *end, struct block_device *bdev, struct printf_spec spec, const char *fmt) { - struct gendisk *hd = bdev->bd_disk; - + struct gendisk *hd; + + if (check_pointer(&buf, end, bdev, spec)) + return buf; + + hd = bdev->bd_disk; buf = string(buf, end, hd->disk_name, spec); if (bdev->bd_part->partno) { if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) { @@ -942,6 +984,9 @@ char *resource_string(char *buf, char *end, struct resource *res, int decode = (fmt[0] == 'R') ? 1 : 0; const struct printf_spec *specp; + if (check_pointer(&buf, end, res, spec)) + return buf; + *p++ = '['; if (res->flags & IORESOURCE_IO) { p = string_nocheck(p, pend, "io ", str_spec); @@ -1004,9 +1049,8 @@ char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec, /* nothing to print */ return buf; - if (ZERO_OR_NULL_PTR(addr)) - /* NULL pointer */ - return string(buf, end, NULL, spec); + if (check_pointer(&buf, end, addr, spec)) + return buf; switch (fmt[1]) { case 'C': @@ -1053,6 +1097,9 @@ char *bitmap_string(char *buf, char *end, unsigned long *bitmap, int i, chunksz; bool first = true; + if (check_pointer(&buf, end, bitmap, spec)) + return buf; + /* reused to print numbers */ spec = (struct printf_spec){ .flags = SMALL | ZEROPAD, .base = 16 }; @@ -1094,6 +1141,9 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, int cur, rbot, rtop; bool first = true; + if (check_pointer(&buf, end, bitmap, spec)) + return buf; + rbot = cur = find_first_bit(bitmap, nr_bits); while (cur < nr_bits) { rtop = cur; @@ -1132,6 +1182,9 @@ char *mac_address_string(char *buf, char *end, u8 *addr, char separator; bool reversed = false; + if (check_pointer(&buf, end, addr, spec)) + return buf; + switch (fmt[1]) { case 'F': separator = '-'; @@ -1437,6 +1490,9 @@ char *ip_addr_string(char *buf, char *end, const void *ptr, { char *err_fmt_msg; + if (check_pointer(&buf, end, ptr, spec)) + return buf; + switch (fmt[1]) { case '6': return ip6_addr_string(buf, end, ptr, spec, fmt); @@ -1475,9 +1531,8 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, if (spec.field_width == 0) return buf; /* nothing to print */ - if (ZERO_OR_NULL_PTR(addr)) - return string(buf, end, NULL, spec); /* NULL pointer */ - + if (check_pointer(&buf, end, addr, spec)) + return buf; do { switch (fmt[count++]) { @@ -1523,10 +1578,14 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, return buf; } -static char *va_format(char *buf, char *end, struct va_format *va_fmt) +static char *va_format(char *buf, char *end, struct va_format *va_fmt, + struct printf_spec spec, const char *fmt) { va_list va; + if (check_pointer(&buf, end, va_fmt, spec)) + return buf; + va_copy(va, *va_fmt->va); buf += vsnprintf(buf, end > buf ? end - buf : 0, va_fmt->fmt, va); va_end(va); @@ -1544,6 +1603,9 @@ char *uuid_string(char *buf, char *end, const u8 *addr, const u8 *index = uuid_index; bool uc = false; + if (check_pointer(&buf, end, addr, spec)) + return buf; + switch (*(++fmt)) { case 'L': uc = true; /* fall-through */ @@ -1582,6 +1644,9 @@ char *netdev_bits(char *buf, char *end, const void *addr, unsigned long long num; int size; + if (check_pointer(&buf, end, addr, spec)) + return buf; + switch (fmt[1]) { case 'F': num = *(const netdev_features_t *)addr; @@ -1595,11 +1660,15 @@ char *netdev_bits(char *buf, char *end, const void *addr, } static noinline_for_stack -char *address_val(char *buf, char *end, const void *addr, const char *fmt) +char *address_val(char *buf, char *end, const void *addr, + struct printf_spec spec, const char *fmt) { unsigned long long num; int size; + if (check_pointer(&buf, end, addr, spec)) + return buf; + switch (fmt[1]) { case 'd': num = *(const dma_addr_t *)addr; @@ -1651,12 +1720,16 @@ char *time_str(char *buf, char *end, const struct rtc_time *tm, bool r) } static noinline_for_stack -char *rtc_str(char *buf, char *end, const struct rtc_time *tm, const char *fmt) +char *rtc_str(char *buf, char *end, const struct rtc_time *tm, + struct printf_spec spec, const char *fmt) { bool have_t = true, have_d = true; bool raw = false; int count = 2; + if (check_pointer(&buf, end, tm, spec)) + return buf; + switch (fmt[count]) { case 'd': have_t = false; @@ -1690,7 +1763,7 @@ char *time_and_date(char *buf, char *end, void *ptr, struct printf_spec spec, { switch (fmt[1]) { case 'R': - return rtc_str(buf, end, (const struct rtc_time *)ptr, fmt); + return rtc_str(buf, end, (const struct rtc_time *)ptr, spec, fmt); default: return string_nocheck(buf, end, "(%ptR?)", spec); } @@ -1703,8 +1776,8 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, if (!IS_ENABLED(CONFIG_HAVE_CLK)) return string_nocheck(buf, end, "(%pC?)", spec); - if (!clk) - return string(buf, end, NULL, spec); + if (check_pointer(&buf, end, clk, spec)) + return buf; switch (fmt[1]) { case 'n': @@ -1751,6 +1824,9 @@ char *flags_string(char *buf, char *end, void *flags_ptr, unsigned long flags; const struct trace_print_flags *names; + if (check_pointer(&buf, end, flags_ptr, spec)) + return buf; + switch (fmt[1]) { case 'p': flags = *(unsigned long *)flags_ptr; @@ -1825,8 +1901,8 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, if (!IS_ENABLED(CONFIG_OF)) return string_nocheck(buf, end, "(%pOF?)", spec); - if ((unsigned long)dn < PAGE_SIZE) - return string_nocheck(buf, end, "(null)", spec); + if (check_pointer(&buf, end, dn, spec)) + return buf; /* simple case without anything any more format specifiers */ fmt++; @@ -2021,18 +2097,6 @@ static noinline_for_stack char *pointer(const char *fmt, char *buf, char *end, void *ptr, struct printf_spec spec) { - const int default_width = 2 * sizeof(void *); - - if (!ptr && *fmt != 'K' && *fmt != 'x') { - /* - * Print (null) with the same width as a pointer so it makes - * tabular output look nice. - */ - if (spec.field_width == -1) - spec.field_width = default_width; - return string_nocheck(buf, end, "(null)", spec); - } - switch (*fmt) { case 'F': case 'f': @@ -2074,13 +2138,13 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'U': return uuid_string(buf, end, ptr, spec, fmt); case 'V': - return va_format(buf, end, ptr); + return va_format(buf, end, ptr, spec, fmt); case 'K': return restricted_pointer(buf, end, ptr, spec); case 'N': return netdev_bits(buf, end, ptr, spec, fmt); case 'a': - return address_val(buf, end, ptr, fmt); + return address_val(buf, end, ptr, spec, fmt); case 'd': return dentry_name(buf, end, ptr, spec, fmt); case 't': @@ -2714,11 +2778,13 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) case FORMAT_TYPE_STR: { const char *save_str = va_arg(args, char *); + const char *err_msg; size_t len; - if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE - || (unsigned long)save_str < PAGE_SIZE) - save_str = "(null)"; + err_msg = check_pointer_msg(save_str); + if (err_msg) + save_str = err_msg; + len = strlen(save_str) + 1; if (str + len < end) memcpy(str, save_str, len); -- cgit From 635720ac75a51092b456bed517ff170047883252 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 17 Apr 2019 13:53:49 +0200 Subject: vsprintf: Avoid confusion between invalid address and value We are able to detect invalid values handled by %p[iI] printk specifier. The current error message is "invalid address". It might cause confusion against "(efault)" reported by the generic valid_pointer_address() check. Let's unify the style and use the more appropriate error code description "(einval)". Link: http://lkml.kernel.org/r/20190417115350.20479-10-pmladek@suse.com To: Rasmus Villemoes Cc: Linus Torvalds Cc: "Tobin C . Harding" Cc: Joe Perches Cc: Andrew Morton Cc: Michal Hocko Cc: Steven Rostedt Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Reviewed-by: Sergey Senozhatsky Reviewed-by: Andy Shevchenko Signed-off-by: Petr Mladek --- lib/vsprintf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index b989f1e8f35b..4e5666035b74 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1511,7 +1511,7 @@ char *ip_addr_string(char *buf, char *end, const void *ptr, case AF_INET6: return ip6_addr_string_sa(buf, end, &sa->v6, spec, fmt); default: - return string_nocheck(buf, end, "(invalid address)", spec); + return string_nocheck(buf, end, "(einval)", spec); }} } -- cgit From c8c3b584343cb7522fc00322769a9f288305743f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 17 Apr 2019 13:53:50 +0200 Subject: vsprintf: Limit the length of inlined error messages The inlined error messages must be used carefully because they need to fit into the given buffer. Handle them using a custom wrapper that makes people aware of the problem. Also define a reasonable hard limit to avoid a completely insane usage. Suggested-by: Sergey Senozhatsky Link: http://lkml.kernel.org/r/20190417115350.20479-11-pmladek@suse.com To: Rasmus Villemoes Cc: Linus Torvalds Cc: "Tobin C . Harding" Cc: Joe Perches Cc: Andrew Morton Cc: Michal Hocko Cc: Steven Rostedt Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Reviewed-by: Sergey Senozhatsky Reviewed-by: Andy Shevchenko Signed-off-by: Petr Mladek --- lib/vsprintf.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 4e5666035b74..1f367f3a7e2b 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -612,6 +612,21 @@ static char *string_nocheck(char *buf, char *end, const char *s, return widen_string(buf, len, end, spec); } +/* Be careful: error messages must fit into the given buffer. */ +static char *error_string(char *buf, char *end, const char *s, + struct printf_spec spec) +{ + /* + * Hard limit to avoid a completely insane messages. It actually + * works pretty well because most error messages are in + * the many pointer format modifiers. + */ + if (spec.precision == -1) + spec.precision = 2 * sizeof(void *); + + return string_nocheck(buf, end, s, spec); +} + /* * This is not a fool-proof test. 99% of the time that this will fault is * due to a bad pointer, not one that crosses into bad memory. Just test @@ -638,7 +653,7 @@ static int check_pointer(char **buf, char *end, const void *ptr, err_msg = check_pointer_msg(ptr); if (err_msg) { - *buf = string_nocheck(*buf, end, err_msg, spec); + *buf = error_string(*buf, end, err_msg, spec); return -EFAULT; } @@ -741,7 +756,7 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr, if (static_branch_unlikely(¬_filled_random_ptr_key)) { spec.field_width = 2 * sizeof(ptr); /* string length must be less than default_width */ - return string_nocheck(buf, end, str, spec); + return error_string(buf, end, str, spec); } #ifdef CONFIG_64BIT @@ -777,7 +792,7 @@ char *restricted_pointer(char *buf, char *end, const void *ptr, if (in_irq() || in_serving_softirq() || in_nmi()) { if (spec.field_width == -1) spec.field_width = 2 * sizeof(ptr); - return string_nocheck(buf, end, "pK-error", spec); + return error_string(buf, end, "pK-error", spec); } /* @@ -1511,12 +1526,12 @@ char *ip_addr_string(char *buf, char *end, const void *ptr, case AF_INET6: return ip6_addr_string_sa(buf, end, &sa->v6, spec, fmt); default: - return string_nocheck(buf, end, "(einval)", spec); + return error_string(buf, end, "(einval)", spec); }} } err_fmt_msg = fmt[0] == 'i' ? "(%pi?)" : "(%pI?)"; - return string_nocheck(buf, end, err_fmt_msg, spec); + return error_string(buf, end, err_fmt_msg, spec); } static noinline_for_stack @@ -1653,7 +1668,7 @@ char *netdev_bits(char *buf, char *end, const void *addr, size = sizeof(netdev_features_t); break; default: - return string_nocheck(buf, end, "(%pN?)", spec); + return error_string(buf, end, "(%pN?)", spec); } return special_hex_number(buf, end, num, size); @@ -1765,7 +1780,7 @@ char *time_and_date(char *buf, char *end, void *ptr, struct printf_spec spec, case 'R': return rtc_str(buf, end, (const struct rtc_time *)ptr, spec, fmt); default: - return string_nocheck(buf, end, "(%ptR?)", spec); + return error_string(buf, end, "(%ptR?)", spec); } } @@ -1774,7 +1789,7 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, const char *fmt) { if (!IS_ENABLED(CONFIG_HAVE_CLK)) - return string_nocheck(buf, end, "(%pC?)", spec); + return error_string(buf, end, "(%pC?)", spec); if (check_pointer(&buf, end, clk, spec)) return buf; @@ -1785,7 +1800,7 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, #ifdef CONFIG_COMMON_CLK return string(buf, end, __clk_get_name(clk), spec); #else - return string_nocheck(buf, end, "(%pC?)", spec); + return error_string(buf, end, "(%pC?)", spec); #endif } } @@ -1843,7 +1858,7 @@ char *flags_string(char *buf, char *end, void *flags_ptr, names = gfpflag_names; break; default: - return string_nocheck(buf, end, "(%pG?)", spec); + return error_string(buf, end, "(%pG?)", spec); } return format_flags(buf, end, flags, names); @@ -1899,7 +1914,7 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, str_spec.field_width = -1; if (!IS_ENABLED(CONFIG_OF)) - return string_nocheck(buf, end, "(%pOF?)", spec); + return error_string(buf, end, "(%pOF?)", spec); if (check_pointer(&buf, end, dn, spec)) return buf; @@ -1978,7 +1993,7 @@ static char *kobject_string(char *buf, char *end, void *ptr, return device_node_string(buf, end, ptr, spec, fmt + 1); } - return string_nocheck(buf, end, "(%pO?)", spec); + return error_string(buf, end, "(%pO?)", spec); } /* -- cgit From 6f455f5f4e9c28aefaefbe18ce7304b499645d75 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Apr 2019 14:07:27 +0200 Subject: netlink: add NLA_MIN_LEN Rather than using NLA_UNSPEC for this type of thing, use NLA_MIN_LEN so we can make NLA_UNSPEC be NLA_REJECT under certain conditions for future attributes. While at it, also use NLA_EXACT_LEN for the struct example. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- lib/nlattr.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/nlattr.c b/lib/nlattr.c index d26de6156b97..465c9e8ef8a5 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -278,10 +278,17 @@ static int validate_nla(const struct nlattr *nla, int maxtype, } } break; + + case NLA_UNSPEC: + case NLA_MIN_LEN: + if (attrlen < pt->len) + goto out_err; + break; + default: if (pt->len) minlen = pt->len; - else if (pt->type != NLA_UNSPEC) + else minlen = nla_attr_minlen[pt->type]; if (attrlen < minlen) -- cgit From 8cb081746c031fb164089322e2336a0bf5b3070c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Apr 2019 14:07:28 +0200 Subject: netlink: make validation more configurable for future strictness We currently have two levels of strict validation: 1) liberal (default) - undefined (type >= max) & NLA_UNSPEC attributes accepted - attribute length >= expected accepted - garbage at end of message accepted 2) strict (opt-in) - NLA_UNSPEC attributes accepted - attribute length >= expected accepted Split out parsing strictness into four different options: * TRAILING - check that there's no trailing data after parsing attributes (in message or nested) * MAXTYPE - reject attrs > max known type * UNSPEC - reject attributes with NLA_UNSPEC policy entries * STRICT_ATTRS - strictly validate attribute size The default for future things should be *everything*. The current *_strict() is a combination of TRAILING and MAXTYPE, and is renamed to _deprecated_strict(). The current regular parsing has none of this, and is renamed to *_parse_deprecated(). Additionally it allows us to selectively set one of the new flags even on old policies. Notably, the UNSPEC flag could be useful in this case, since it can be arranged (by filling in the policy) to not be an incompatible userspace ABI change, but would then going forward prevent forgetting attribute entries. Similar can apply to the POLICY flag. We end up with the following renames: * nla_parse -> nla_parse_deprecated * nla_parse_strict -> nla_parse_deprecated_strict * nlmsg_parse -> nlmsg_parse_deprecated * nlmsg_parse_strict -> nlmsg_parse_deprecated_strict * nla_parse_nested -> nla_parse_nested_deprecated * nla_validate_nested -> nla_validate_nested_deprecated Using spatch, of course: @@ expression TB, MAX, HEAD, LEN, POL, EXT; @@ -nla_parse(TB, MAX, HEAD, LEN, POL, EXT) +nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression TB, MAX, NLA, POL, EXT; @@ -nla_parse_nested(TB, MAX, NLA, POL, EXT) +nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT) @@ expression START, MAX, POL, EXT; @@ -nla_validate_nested(START, MAX, POL, EXT) +nla_validate_nested_deprecated(START, MAX, POL, EXT) @@ expression NLH, HDRLEN, MAX, POL, EXT; @@ -nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT) +nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT) For this patch, don't actually add the strict, non-renamed versions yet so that it breaks compile if I get it wrong. Also, while at it, make nla_validate and nla_parse go down to a common __nla_validate_parse() function to avoid code duplication. Ultimately, this allows us to have very strict validation for every new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the next patch, while existing things will continue to work as is. In effect then, this adds fully strict validation for any new command. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- lib/nlattr.c | 171 ++++++++++++++++++++++++++++++----------------------------- 1 file changed, 88 insertions(+), 83 deletions(-) (limited to 'lib') diff --git a/lib/nlattr.c b/lib/nlattr.c index 465c9e8ef8a5..af0f8b0309c6 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -69,7 +69,8 @@ static int validate_nla_bitfield32(const struct nlattr *nla, static int nla_validate_array(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + unsigned int validate) { const struct nlattr *entry; int rem; @@ -86,8 +87,8 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype, return -ERANGE; } - ret = nla_validate(nla_data(entry), nla_len(entry), - maxtype, policy, extack); + ret = __nla_validate(nla_data(entry), nla_len(entry), + maxtype, policy, validate, extack); if (ret < 0) return ret; } @@ -154,7 +155,7 @@ static int nla_validate_int_range(const struct nla_policy *pt, } static int validate_nla(const struct nlattr *nla, int maxtype, - const struct nla_policy *policy, + const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack) { const struct nla_policy *pt; @@ -172,6 +173,11 @@ static int validate_nla(const struct nlattr *nla, int maxtype, (pt->type == NLA_EXACT_LEN_WARN && attrlen != pt->len)) { pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n", current->comm, type); + if (validate & NL_VALIDATE_STRICT_ATTRS) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "invalid attribute length"); + return -EINVAL; + } } switch (pt->type) { @@ -244,8 +250,9 @@ static int validate_nla(const struct nlattr *nla, int maxtype, if (attrlen < NLA_HDRLEN) goto out_err; if (pt->validation_data) { - err = nla_validate(nla_data(nla), nla_len(nla), pt->len, - pt->validation_data, extack); + err = __nla_validate(nla_data(nla), nla_len(nla), pt->len, + pt->validation_data, validate, + extack); if (err < 0) { /* * return directly to preserve the inner @@ -268,7 +275,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype, err = nla_validate_array(nla_data(nla), nla_len(nla), pt->len, pt->validation_data, - extack); + extack, validate); if (err < 0) { /* * return directly to preserve the inner @@ -280,6 +287,12 @@ static int validate_nla(const struct nlattr *nla, int maxtype, break; case NLA_UNSPEC: + if (validate & NL_VALIDATE_UNSPEC) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Unsupported attribute"); + return -EINVAL; + } + /* fall through */ case NLA_MIN_LEN: if (attrlen < pt->len) goto out_err; @@ -322,37 +335,75 @@ out_err: return err; } +static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy, + unsigned int validate, + struct netlink_ext_ack *extack, + struct nlattr **tb) +{ + const struct nlattr *nla; + int rem; + + if (tb) + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + + nla_for_each_attr(nla, head, len, rem) { + u16 type = nla_type(nla); + + if (type == 0 || type > maxtype) { + if (validate & NL_VALIDATE_MAXTYPE) { + NL_SET_ERR_MSG(extack, "Unknown attribute type"); + return -EINVAL; + } + continue; + } + if (policy) { + int err = validate_nla(nla, maxtype, policy, + validate, extack); + + if (err < 0) + return err; + } + + if (tb) + tb[type] = (struct nlattr *)nla; + } + + if (unlikely(rem > 0)) { + pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n", + rem, current->comm); + NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes"); + if (validate & NL_VALIDATE_TRAILING) + return -EINVAL; + } + + return 0; +} + /** - * nla_validate - Validate a stream of attributes + * __nla_validate - Validate a stream of attributes * @head: head of attribute stream * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the - * specified policy. Attributes with a type exceeding maxtype will be - * ignored. See documenation of struct nla_policy for more details. + * specified policy. Validation depends on the validate flags passed, see + * &enum netlink_validation for more details on that. + * See documenation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ -int nla_validate(const struct nlattr *head, int len, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +int __nla_validate(const struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack) { - const struct nlattr *nla; - int rem; - - nla_for_each_attr(nla, head, len, rem) { - int err = validate_nla(nla, maxtype, policy, extack); - - if (err < 0) - return err; - } - - return 0; + return __nla_validate_parse(head, len, maxtype, policy, validate, + extack, NULL); } -EXPORT_SYMBOL(nla_validate); +EXPORT_SYMBOL(__nla_validate); /** * nla_policy_len - Determin the max. length of a policy @@ -384,76 +435,30 @@ nla_policy_len(const struct nla_policy *p, int n) EXPORT_SYMBOL(nla_policy_len); /** - * nla_parse - Parse a stream of attributes into a tb buffer + * __nla_parse - Parse a stream of attributes into a tb buffer * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream * @policy: validation policy + * @validate: validation strictness + * @extack: extended ACK pointer * * Parses a stream of attributes and stores a pointer to each attribute in - * the tb array accessible via the attribute type. Attributes with a type - * exceeding maxtype will be silently ignored for backwards compatibility - * reasons. policy may be set to NULL if no validation is required. + * the tb array accessible via the attribute type. + * Validation is controlled by the @validate parameter. * * Returns 0 on success or a negative error code. */ -static int __nla_parse(struct nlattr **tb, int maxtype, - const struct nlattr *head, int len, - bool strict, const struct nla_policy *policy, - struct netlink_ext_ack *extack) -{ - const struct nlattr *nla; - int rem; - - memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); - - nla_for_each_attr(nla, head, len, rem) { - u16 type = nla_type(nla); - - if (type == 0 || type > maxtype) { - if (strict) { - NL_SET_ERR_MSG(extack, "Unknown attribute type"); - return -EINVAL; - } - continue; - } - if (policy) { - int err = validate_nla(nla, maxtype, policy, extack); - - if (err < 0) - return err; - } - - tb[type] = (struct nlattr *)nla; - } - - if (unlikely(rem > 0)) { - pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n", - rem, current->comm); - NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes"); - if (strict) - return -EINVAL; - } - - return 0; -} - -int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack) -{ - return __nla_parse(tb, maxtype, head, len, false, policy, extack); -} -EXPORT_SYMBOL(nla_parse); - -int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack) +int __nla_parse(struct nlattr **tb, int maxtype, + const struct nlattr *head, int len, + const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack) { - return __nla_parse(tb, maxtype, head, len, true, policy, extack); + return __nla_validate_parse(head, len, maxtype, policy, validate, + extack, tb); } -EXPORT_SYMBOL(nla_parse_strict); +EXPORT_SYMBOL(__nla_parse); /** * nla_find - Find a specific attribute in a stream of attributes -- cgit From 56738f460841761abc70347c919d5c45f6f05a42 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Apr 2019 14:07:30 +0200 Subject: netlink: add strict parsing for future attributes Unfortunately, we cannot add strict parsing for all attributes, as that would break existing userspace. We currently warn about it, but that's about all we can do. For new attributes, however, the story is better: nobody is using them, so we can reject bad sizes. Also, for new attributes, we need not accept them when the policy doesn't declare their usage. David Ahern and I went back and forth on how to best encode this, and the best way we found was to have a "boundary type", from which point on new attributes have all possible validation applied, and NLA_UNSPEC is rejected. As we didn't want to add another argument to all functions that get a netlink policy, the workaround is to encode that boundary in the first entry of the policy array (which is for type 0 and thus probably not really valid anyway). I put it into the validation union for the rare possibility that somebody is actually using attribute 0, which would continue to work fine unless they tried to use the extended validation, which isn't likely. We also didn't find any in-tree users with type 0. The reason for setting the "start strict here" attribute is that we never really need to start strict from 0, which is invalid anyway (or in legacy families where that isn't true, it cannot be set to strict), so we can thus reserve the value 0 for "don't do this check" and don't have to add the tag to all policies right now. Thus, policies can now opt in to this validation, which we should do for all existing policies, at least when adding new attributes. Note that entirely *new* policies won't need to set it, as the use of that should be using nla_parse()/nlmsg_parse() etc. which anyway do fully strict validation now, regardless of this. So in effect, this patch only covers the "existing command with new attribute" case. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- lib/nlattr.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/nlattr.c b/lib/nlattr.c index af0f8b0309c6..29f6336e2422 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -158,10 +158,14 @@ static int validate_nla(const struct nlattr *nla, int maxtype, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack) { + u16 strict_start_type = policy[0].strict_start_type; const struct nla_policy *pt; int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); int err = -ERANGE; + if (strict_start_type && type >= strict_start_type) + validate |= NL_VALIDATE_STRICT; + if (type <= 0 || type > maxtype) return 0; -- cgit From 92067f843854be0eef1e41ff00cb465247a83c42 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Sun, 28 Apr 2019 10:48:10 +1000 Subject: kobject: Improve docs for kobject_add/del There is currently some confusion on how to wind back kobject_init_and_add() during the error paths in code that uses this function. Add documentation to kobject_add() and kobject_del() to help clarify the usage. Signed-off-by: Tobin C. Harding Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index ede40005db28..c97b5729f94d 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -406,15 +406,19 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj, * is assigned to the kobject, then the kobject will be located in the * root of the sysfs tree. * - * If this function returns an error, kobject_put() must be called to - * properly clean up the memory associated with the object. - * Under no instance should the kobject that is passed to this function - * be directly freed with a call to kfree(), that can leak memory. - * * Note, no "add" uevent will be created with this call, the caller should set * up all of the necessary sysfs files for the object and then call * kobject_uevent() with the UEVENT_ADD parameter to ensure that * userspace is properly notified of this kobject's creation. + * + * Return: If this function returns an error, kobject_put() must be + * called to properly clean up the memory associated with the + * object. Under no instance should the kobject that is passed + * to this function be directly freed with a call to kfree(), + * that can leak memory. + * + * If this call returns successfully and you later need to unwind + * kobject_add() for the error path you should call kobject_del(). */ int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) @@ -589,6 +593,9 @@ EXPORT_SYMBOL_GPL(kobject_move); /** * kobject_del - unlink kobject from hierarchy. * @kobj: object. + * + * This is the function that should be called to delete an object + * successfully added via kobject_add(). */ void kobject_del(struct kobject *kobj) { -- cgit From 1fd7c3b438a2e4741435ed4d45546c03abf045b2 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Sun, 28 Apr 2019 09:56:52 +1000 Subject: kobject: Improve doc clarity kobject_init_and_add() Function kobject_init_and_add() is currently misused in a number of places in the kernel. On error return kobject_put() must be called but is at times not. Make the function documentation more explicit about calling kobject_put() in the error path. Signed-off-by: Tobin C. Harding Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index c97b5729f94d..a30ee0467942 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -450,9 +450,12 @@ EXPORT_SYMBOL(kobject_add); * @parent: pointer to the parent of this kobject. * @fmt: the name of the kobject. * - * This function combines the call to kobject_init() and - * kobject_add(). The same type of error handling after a call to - * kobject_add() and kobject lifetime rules are the same here. + * This function combines the call to kobject_init() and kobject_add(). + * + * If this function returns an error, kobject_put() must be called to + * properly clean up the memory associated with the object. This is the + * same type of error handling after a call to kobject_add() and kobject + * lifetime rules are the same here. */ int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...) -- cgit From ce9d3eceb7ffb74445a8d892ca0685395a93a7e2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 27 Apr 2019 00:46:30 +0800 Subject: lib/vsprintf: Make function pointer_string static Fix sparse warning: lib/vsprintf.c:673:6: warning: symbol 'pointer_string' was not declared. Should it be static? Link: http://lkml.kernel.org/r/20190426164630.22104-1-yuehaibing@huawei.com To: To: To: To: Signed-off-by: YueHaibing Signed-off-by: Petr Mladek --- lib/vsprintf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 1f367f3a7e2b..7b0a6140bfad 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -670,8 +670,9 @@ char *string(char *buf, char *end, const char *s, return string_nocheck(buf, end, s, spec); } -char *pointer_string(char *buf, char *end, const void *ptr, - struct printf_spec spec) +static char *pointer_string(char *buf, char *end, + const void *ptr, + struct printf_spec spec) { spec.base = 16; spec.flags |= SMALL; -- cgit From 3d378dc713f3ff4951d9a50c6a815f011e59da10 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 May 2019 13:43:17 +0100 Subject: kobject: fix dereference before null check on kobj The kobj pointer is being null-checked so potentially it could be null, however, the ktype declaration before the null check is dereferencing kobj hence we have a potential null pointer deference. Fix this by moving the assignment of ktype after kobj has been null checked. Addresses-Coverity: ("Dereference before null check") Fixes: aa30f47cf666 ("kobject: Add support for default attribute groups to kobj_type") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index a30ee0467942..095bcb55c2ba 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -603,12 +603,13 @@ EXPORT_SYMBOL_GPL(kobject_move); void kobject_del(struct kobject *kobj) { struct kernfs_node *sd; - const struct kobj_type *ktype = get_ktype(kobj); + const struct kobj_type *ktype; if (!kobj) return; sd = kobj->sd; + ktype = get_ktype(kobj); if (ktype) sysfs_remove_groups(kobj, ktype->default_groups); -- cgit From 923abb9d797ba078f4e9eb3734dd71be5f567a2a Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 1 May 2019 13:48:13 +0300 Subject: RDMA/core: Introduce RDMA subsystem ibdev_* print functions Similarly to dev/netdev/etc printk helpers, add standard printk helpers for the RDMA subsystem. Example output: efa 0000:00:06.0 efa_0: Hello World! efa_0: Hello World! (no parent device set) (NULL ib_device): Hello World! (ibdev is NULL) Cc: Jason Baron Suggested-by: Jason Gunthorpe Suggested-by: Leon Romanovsky Signed-off-by: Gal Pressman Reviewed-by: Leon Romanovsky Reviewed-by: Shiraz Saleem Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- lib/dynamic_debug.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 7bdf98c37e91..8a16c2d498e9 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -37,6 +37,8 @@ #include #include +#include + extern struct _ddebug __start___verbose[]; extern struct _ddebug __stop___verbose[]; @@ -636,6 +638,41 @@ EXPORT_SYMBOL(__dynamic_netdev_dbg); #endif +#if IS_ENABLED(CONFIG_INFINIBAND) + +void __dynamic_ibdev_dbg(struct _ddebug *descriptor, + const struct ib_device *ibdev, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (ibdev && ibdev->dev.parent) { + char buf[PREFIX_SIZE]; + + dev_printk_emit(LOGLEVEL_DEBUG, ibdev->dev.parent, + "%s%s %s %s: %pV", + dynamic_emit_prefix(descriptor, buf), + dev_driver_string(ibdev->dev.parent), + dev_name(ibdev->dev.parent), + dev_name(&ibdev->dev), + &vaf); + } else if (ibdev) { + printk(KERN_DEBUG "%s: %pV", dev_name(&ibdev->dev), &vaf); + } else { + printk(KERN_DEBUG "(NULL ib_device): %pV", &vaf); + } + + va_end(args); +} +EXPORT_SYMBOL(__dynamic_ibdev_dbg); + +#endif + #define DDEBUG_STRING_SIZE 1024 static __initdata char ddebug_setup_string[DDEBUG_STRING_SIZE]; -- cgit From 8fd7c302b37099670b5d793375da10a40da7edf5 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 2 May 2019 12:31:39 +1000 Subject: kobject: Remove docstring reference to kset Currently the docstring for kobject_get_path() mentions 'kset'. The kset is not used in the function callchain starting from this function. Remove docstring reference to kset from the function kobject_get_path(). Signed-off-by: Tobin C. Harding Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 095bcb55c2ba..09e3ac329e92 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -162,12 +162,11 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length) } /** - * kobject_get_path - generate and return the path associated with a given kobj and kset pair. - * + * kobject_get_path() - Allocate memory and fill in the path for @kobj. * @kobj: kobject in question, with which to build the path * @gfp_mask: the allocation type used to allocate the path * - * The result must be freed by the caller with kfree(). + * Return: The newly allocated memory, caller must free with kfree(). */ char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask) { -- cgit From ed856349dc0886a97e91fd4ce6ba5ff5312fc0f1 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Thu, 2 May 2019 12:31:40 +1000 Subject: kobject: Fix kernel-doc comment first line kernel-doc comments have a prescribed format. This includes parenthesis on the function name. To be _particularly_ correct we should also capitalise the brief description and terminate it with a period. In preparation for adding/updating kernel-doc function comments clean up the ones currently present. Signed-off-by: Tobin C. Harding Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 09e3ac329e92..3f4b7e95b0c2 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -18,7 +18,7 @@ #include /** - * kobject_namespace - return @kobj's namespace tag + * kobject_namespace() - Return @kobj's namespace tag. * @kobj: kobject in question * * Returns namespace tag of @kobj if its parent has namespace ops enabled @@ -36,7 +36,7 @@ const void *kobject_namespace(struct kobject *kobj) } /** - * kobject_get_ownership - get sysfs ownership data for @kobj + * kobject_get_ownership() - Get sysfs ownership data for @kobj. * @kobj: kobject in question * @uid: kernel user ID for sysfs objects * @gid: kernel group ID for sysfs objects @@ -273,7 +273,7 @@ static int kobject_add_internal(struct kobject *kobj) } /** - * kobject_set_name_vargs - Set the name of an kobject + * kobject_set_name_vargs() - Set the name of a kobject. * @kobj: struct kobject to set the name of * @fmt: format string used to build the name * @vargs: vargs to format the string. @@ -313,7 +313,7 @@ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, } /** - * kobject_set_name - Set the name of a kobject + * kobject_set_name() - Set the name of a kobject. * @kobj: struct kobject to set the name of * @fmt: format string used to build the name * @@ -335,7 +335,7 @@ int kobject_set_name(struct kobject *kobj, const char *fmt, ...) EXPORT_SYMBOL(kobject_set_name); /** - * kobject_init - initialize a kobject structure + * kobject_init() - Initialize a kobject structure. * @kobj: pointer to the kobject to initialize * @ktype: pointer to the ktype for this kobject. * @@ -391,7 +391,7 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj, } /** - * kobject_add - the main kobject add function + * kobject_add() - The main kobject add function. * @kobj: the kobject to add * @parent: pointer to the parent of the kobject. * @fmt: format to name the kobject with. @@ -443,7 +443,8 @@ int kobject_add(struct kobject *kobj, struct kobject *parent, EXPORT_SYMBOL(kobject_add); /** - * kobject_init_and_add - initialize a kobject structure and add it to the kobject hierarchy + * kobject_init_and_add() - Initialize a kobject structure and add it to + * the kobject hierarchy. * @kobj: pointer to the kobject to initialize * @ktype: pointer to the ktype for this kobject. * @parent: pointer to the parent of this kobject. @@ -473,7 +474,7 @@ int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, EXPORT_SYMBOL_GPL(kobject_init_and_add); /** - * kobject_rename - change the name of an object + * kobject_rename() - Change the name of an object. * @kobj: object in question. * @new_name: object's new name * @@ -540,7 +541,7 @@ out: EXPORT_SYMBOL_GPL(kobject_rename); /** - * kobject_move - move object to another parent + * kobject_move() - Move object to another parent. * @kobj: object in question. * @new_parent: object's new parent (can be NULL) */ @@ -593,7 +594,7 @@ out: EXPORT_SYMBOL_GPL(kobject_move); /** - * kobject_del - unlink kobject from hierarchy. + * kobject_del() - Unlink kobject from hierarchy. * @kobj: object. * * This is the function that should be called to delete an object @@ -624,7 +625,7 @@ void kobject_del(struct kobject *kobj) EXPORT_SYMBOL(kobject_del); /** - * kobject_get - increment refcount for object. + * kobject_get() - Increment refcount for object. * @kobj: object. */ struct kobject *kobject_get(struct kobject *kobj) @@ -717,7 +718,7 @@ static void kobject_release(struct kref *kref) } /** - * kobject_put - decrement refcount for object. + * kobject_put() - Decrement refcount for object. * @kobj: object. * * Decrement the refcount, and if 0, call kobject_cleanup(). @@ -746,7 +747,7 @@ static struct kobj_type dynamic_kobj_ktype = { }; /** - * kobject_create - create a struct kobject dynamically + * kobject_create() - Create a struct kobject dynamically. * * This function creates a kobject structure dynamically and sets it up * to be a "dynamic" kobject with a default release function set up. @@ -769,8 +770,8 @@ struct kobject *kobject_create(void) } /** - * kobject_create_and_add - create a struct kobject dynamically and register it with sysfs - * + * kobject_create_and_add() - Create a struct kobject dynamically and + * register it with sysfs. * @name: the name for the kobject * @parent: the parent kobject of this kobject, if any. * @@ -801,7 +802,7 @@ struct kobject *kobject_create_and_add(const char *name, struct kobject *parent) EXPORT_SYMBOL_GPL(kobject_create_and_add); /** - * kset_init - initialize a kset for use + * kset_init() - Initialize a kset for use. * @k: kset */ void kset_init(struct kset *k) @@ -843,7 +844,7 @@ const struct sysfs_ops kobj_sysfs_ops = { EXPORT_SYMBOL_GPL(kobj_sysfs_ops); /** - * kset_register - initialize and add a kset. + * kset_register() - Initialize and add a kset. * @k: kset. */ int kset_register(struct kset *k) @@ -863,7 +864,7 @@ int kset_register(struct kset *k) EXPORT_SYMBOL(kset_register); /** - * kset_unregister - remove a kset. + * kset_unregister() - Remove a kset. * @k: kset. */ void kset_unregister(struct kset *k) @@ -876,7 +877,7 @@ void kset_unregister(struct kset *k) EXPORT_SYMBOL(kset_unregister); /** - * kset_find_obj - search for object in kset. + * kset_find_obj() - Search for object in kset. * @kset: kset we're looking in. * @name: object's name. * @@ -924,7 +925,7 @@ static struct kobj_type kset_ktype = { }; /** - * kset_create - create a struct kset dynamically + * kset_create() - Create a struct kset dynamically. * * @name: the name for the kset * @uevent_ops: a struct kset_uevent_ops for the kset @@ -968,7 +969,7 @@ static struct kset *kset_create(const char *name, } /** - * kset_create_and_add - create a struct kset dynamically and add it to sysfs + * kset_create_and_add() - Create a struct kset dynamically and add it to sysfs. * * @name: the name for the kset * @uevent_ops: a struct kset_uevent_ops for the kset -- cgit From 70e16a620e075cb916644e06012766639b58b2fb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 May 2019 12:22:24 +0200 Subject: kobject: clean up the kobject add documentation a bit more Commit 1fd7c3b438a2 ("kobject: Improve doc clarity kobject_init_and_add()") tried to provide more clarity, but the reference to kobject_del() was incorrect. Fix that up by removing that line, and hopefully be more explicit as to exactly what needs to happen here once you register a kobject with the kobject core. Acked-by: Tobin C. Harding Fixes: 1fd7c3b438a2 ("kobject: Improve doc clarity kobject_init_and_add()") Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 3f4b7e95b0c2..f2ccdbac8ed9 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -416,8 +416,12 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj, * to this function be directly freed with a call to kfree(), * that can leak memory. * - * If this call returns successfully and you later need to unwind - * kobject_add() for the error path you should call kobject_del(). + * If this function returns success, kobject_put() must also be called + * in order to properly clean up the memory associated with the object. + * + * In short, once this function is called, kobject_put() MUST be called + * when the use of the object is finished in order to properly free + * everything. */ int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) -- cgit From 554aae35007e49f533d3d10e788295f7141725bc Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 2 May 2019 23:23:29 +0300 Subject: lib: Add support for generic packing operations This provides an unified API for accessing register bit fields regardless of memory layout. The basic unit of data for these API functions is the u64. The process of transforming an u64 from native CPU encoding into the peripheral's encoding is called 'pack', and transforming it from peripheral to native CPU encoding is 'unpack'. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- lib/Kconfig | 17 +++++ lib/Makefile | 1 + lib/packing.c | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 231 insertions(+) create mode 100644 lib/packing.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index a9e56539bd11..ac1fcf06d8ea 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -18,6 +18,23 @@ config RAID6_PQ_BENCHMARK Benchmark all available RAID6 PQ functions on init and choose the fastest one. +config PACKING + bool "Generic bitfield packing and unpacking" + default n + help + This option provides the packing() helper function, which permits + converting bitfields between a CPU-usable representation and a + memory representation that can have any combination of these quirks: + - Is little endian (bytes are reversed within a 32-bit group) + - The least-significant 32-bit word comes first (within a 64-bit + group) + - The most significant bit of a byte is at its right (bit 0 of a + register description is numerically 2^7). + Drivers may use these helpers to match the bit indices as described + in the data sheets of the peripherals they are in control of. + + When in doubt, say N. + config BITREVERSE tristate diff --git a/lib/Makefile b/lib/Makefile index 3b08673e8881..7d4db18fabf1 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -108,6 +108,7 @@ obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o obj-$(CONFIG_BITREVERSE) += bitrev.o +obj-$(CONFIG_PACKING) += packing.o obj-$(CONFIG_RATIONAL) += rational.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o obj-$(CONFIG_CRC16) += crc16.o diff --git a/lib/packing.c b/lib/packing.c new file mode 100644 index 000000000000..50d1e9f2f5a7 --- /dev/null +++ b/lib/packing.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2016-2018, NXP Semiconductors + * Copyright (c) 2018-2019, Vladimir Oltean + */ +#include +#include +#include +#include +#include + +static int get_le_offset(int offset) +{ + int closest_multiple_of_4; + + closest_multiple_of_4 = (offset / 4) * 4; + offset -= closest_multiple_of_4; + return closest_multiple_of_4 + (3 - offset); +} + +static int get_reverse_lsw32_offset(int offset, size_t len) +{ + int closest_multiple_of_4; + int word_index; + + word_index = offset / 4; + closest_multiple_of_4 = word_index * 4; + offset -= closest_multiple_of_4; + word_index = (len / 4) - word_index - 1; + return word_index * 4 + offset; +} + +static u64 bit_reverse(u64 val, unsigned int width) +{ + u64 new_val = 0; + unsigned int bit; + unsigned int i; + + for (i = 0; i < width; i++) { + bit = (val & (1 << i)) != 0; + new_val |= (bit << (width - i - 1)); + } + return new_val; +} + +static void adjust_for_msb_right_quirk(u64 *to_write, int *box_start_bit, + int *box_end_bit, u8 *box_mask) +{ + int box_bit_width = *box_start_bit - *box_end_bit + 1; + int new_box_start_bit, new_box_end_bit; + + *to_write >>= *box_end_bit; + *to_write = bit_reverse(*to_write, box_bit_width); + *to_write <<= *box_end_bit; + + new_box_end_bit = box_bit_width - *box_start_bit - 1; + new_box_start_bit = box_bit_width - *box_end_bit - 1; + *box_mask = GENMASK_ULL(new_box_start_bit, new_box_end_bit); + *box_start_bit = new_box_start_bit; + *box_end_bit = new_box_end_bit; +} + +/** + * packing - Convert numbers (currently u64) between a packed and an unpacked + * format. Unpacked means laid out in memory in the CPU's native + * understanding of integers, while packed means anything else that + * requires translation. + * + * @pbuf: Pointer to a buffer holding the packed value. + * @uval: Pointer to an u64 holding the unpacked value. + * @startbit: The index (in logical notation, compensated for quirks) where + * the packed value starts within pbuf. Must be larger than, or + * equal to, endbit. + * @endbit: The index (in logical notation, compensated for quirks) where + * the packed value ends within pbuf. Must be smaller than, or equal + * to, startbit. + * @op: If PACK, then uval will be treated as const pointer and copied (packed) + * into pbuf, between startbit and endbit. + * If UNPACK, then pbuf will be treated as const pointer and the logical + * value between startbit and endbit will be copied (unpacked) to uval. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming + * correct usage, return code may be discarded. + * If op is PACK, pbuf is modified. + * If op is UNPACK, uval is modified. + */ +int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, + enum packing_op op, u8 quirks) +{ + /* Number of bits for storing "uval" + * also width of the field to access in the pbuf + */ + u64 value_width; + /* Logical byte indices corresponding to the + * start and end of the field. + */ + int plogical_first_u8, plogical_last_u8, box; + + /* startbit is expected to be larger than endbit */ + if (startbit < endbit) + /* Invalid function call */ + return -EINVAL; + + value_width = startbit - endbit + 1; + if (value_width > 64) + return -ERANGE; + + /* Check if "uval" fits in "value_width" bits. + * If value_width is 64, the check will fail, but any + * 64-bit uval will surely fit. + */ + if (op == PACK && value_width < 64 && (*uval >= (1ull << value_width))) + /* Cannot store "uval" inside "value_width" bits. + * Truncating "uval" is most certainly not desirable, + * so simply erroring out is appropriate. + */ + return -ERANGE; + + /* Initialize parameter */ + if (op == UNPACK) + *uval = 0; + + /* Iterate through an idealistic view of the pbuf as an u64 with + * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low + * logical bit significance. "box" denotes the current logical u8. + */ + plogical_first_u8 = startbit / 8; + plogical_last_u8 = endbit / 8; + + for (box = plogical_first_u8; box >= plogical_last_u8; box--) { + /* Bit indices into the currently accessed 8-bit box */ + int box_start_bit, box_end_bit, box_addr; + u8 box_mask; + /* Corresponding bits from the unpacked u64 parameter */ + int proj_start_bit, proj_end_bit; + u64 proj_mask; + + /* This u8 may need to be accessed in its entirety + * (from bit 7 to bit 0), or not, depending on the + * input arguments startbit and endbit. + */ + if (box == plogical_first_u8) + box_start_bit = startbit % 8; + else + box_start_bit = 7; + if (box == plogical_last_u8) + box_end_bit = endbit % 8; + else + box_end_bit = 0; + + /* We have determined the box bit start and end. + * Now we calculate where this (masked) u8 box would fit + * in the unpacked (CPU-readable) u64 - the u8 box's + * projection onto the unpacked u64. Though the + * box is u8, the projection is u64 because it may fall + * anywhere within the unpacked u64. + */ + proj_start_bit = ((box * 8) + box_start_bit) - endbit; + proj_end_bit = ((box * 8) + box_end_bit) - endbit; + proj_mask = GENMASK_ULL(proj_start_bit, proj_end_bit); + box_mask = GENMASK_ULL(box_start_bit, box_end_bit); + + /* Determine the offset of the u8 box inside the pbuf, + * adjusted for quirks. The adjusted box_addr will be used for + * effective addressing inside the pbuf (so it's not + * logical any longer). + */ + box_addr = pbuflen - box - 1; + if (quirks & QUIRK_LITTLE_ENDIAN) + box_addr = get_le_offset(box_addr); + if (quirks & QUIRK_LSW32_IS_FIRST) + box_addr = get_reverse_lsw32_offset(box_addr, + pbuflen); + + if (op == UNPACK) { + u64 pval; + + /* Read from pbuf, write to uval */ + pval = ((u8 *)pbuf)[box_addr] & box_mask; + if (quirks & QUIRK_MSB_ON_THE_RIGHT) + adjust_for_msb_right_quirk(&pval, + &box_start_bit, + &box_end_bit, + &box_mask); + + pval >>= box_end_bit; + pval <<= proj_end_bit; + *uval &= ~proj_mask; + *uval |= pval; + } else { + u64 pval; + + /* Write to pbuf, read from uval */ + pval = (*uval) & proj_mask; + pval >>= proj_end_bit; + if (quirks & QUIRK_MSB_ON_THE_RIGHT) + adjust_for_msb_right_quirk(&pval, + &box_start_bit, + &box_end_bit, + &box_mask); + + pval <<= box_end_bit; + ((u8 *)pbuf)[box_addr] &= ~box_mask; + ((u8 *)pbuf)[box_addr] |= pval; + } + } + return 0; +} +EXPORT_SYMBOL(packing); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Generic bitfield packing and unpacking"); -- cgit From d54a16b20157ce300298eb4a1169bf9acfda3d08 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 2 May 2019 16:15:10 +0200 Subject: netlink: set bad attribute also on maxtype check The check that attribute type is within 0...maxtype range in __nla_validate_parse() sets only error message but not bad_attr in extack. Set also bad_attr to tell userspace which attribute failed validation. Signed-off-by: Michal Kubecek Reviewed-by: Johannes Berg Reviewed-by: David Ahern Signed-off-by: David S. Miller --- lib/nlattr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/nlattr.c b/lib/nlattr.c index 29f6336e2422..adc919b32bf9 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -356,7 +356,8 @@ static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype, if (type == 0 || type > maxtype) { if (validate & NL_VALIDATE_MAXTYPE) { - NL_SET_ERR_MSG(extack, "Unknown attribute type"); + NL_SET_ERR_MSG_ATTR(extack, nla, + "Unknown attribute type"); return -EINVAL; } continue; -- cgit From b424e432e770d6dd572765459d5b6a96a19c5286 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 2 May 2019 16:15:10 +0200 Subject: netlink: add validation of NLA_F_NESTED flag Add new validation flag NL_VALIDATE_NESTED which adds three consistency checks of NLA_F_NESTED_FLAG: - the flag is set on attributes with NLA_NESTED{,_ARRAY} policy - the flag is not set on attributes with other policies except NLA_UNSPEC - the flag is set on attribute passed to nla_parse_nested() Signed-off-by: Michal Kubecek v2: change error messages to mention NLA_F_NESTED explicitly Reviewed-by: Johannes Berg Reviewed-by: David Ahern Signed-off-by: David S. Miller --- lib/nlattr.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'lib') diff --git a/lib/nlattr.c b/lib/nlattr.c index adc919b32bf9..cace9b307781 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -184,6 +184,21 @@ static int validate_nla(const struct nlattr *nla, int maxtype, } } + if (validate & NL_VALIDATE_NESTED) { + if ((pt->type == NLA_NESTED || pt->type == NLA_NESTED_ARRAY) && + !(nla->nla_type & NLA_F_NESTED)) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "NLA_F_NESTED is missing"); + return -EINVAL; + } + if (pt->type != NLA_NESTED && pt->type != NLA_NESTED_ARRAY && + pt->type != NLA_UNSPEC && (nla->nla_type & NLA_F_NESTED)) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "NLA_F_NESTED not expected"); + return -EINVAL; + } + } + switch (pt->type) { case NLA_EXACT_LEN: if (attrlen != pt->len) -- cgit From 2ac5a3bf7042a1c4abbcce1b6f0ec61e5d3786c2 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 10 May 2019 10:42:13 +0200 Subject: vsprintf: Do not break early boot with probing addresses The commit 3e5903eb9cff70730 ("vsprintf: Prevent crash when dereferencing invalid pointers") broke boot on several architectures. The common pattern is that probe_kernel_read() is not working during early boot because userspace access framework is not ready. It is a generic problem. We have to avoid any complex external functions in vsprintf() code, especially in the common path. They might break printk() easily and are hard to debug. Replace probe_kernel_read() with some simple checks for obvious problems. Details: 1. Report on Power: Kernel crashes very early during boot with with CONFIG_PPC_KUAP and CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG The problem is the combination of some new code called via printk(), check_pointer() which calls probe_kernel_read(). That then calls allow_user_access() (PPC_KUAP) and that uses mmu_has_feature() too early (before we've patched features). With the JUMP_LABEL debug enabled that causes us to call printk() & dump_stack() and we end up recursing and overflowing the stack. Because it happens so early you don't get any output, just an apparently dead system. The stack trace (which you don't see) is something like: ... dump_stack+0xdc probe_kernel_read+0x1a4 check_pointer+0x58 string+0x3c vsnprintf+0x1bc vscnprintf+0x20 printk_safe_log_store+0x7c printk+0x40 dump_stack_print_info+0xbc dump_stack+0x8 probe_kernel_read+0x1a4 probe_kernel_read+0x19c check_pointer+0x58 string+0x3c vsnprintf+0x1bc vscnprintf+0x20 vprintk_store+0x6c vprintk_emit+0xec vprintk_func+0xd4 printk+0x40 cpufeatures_process_feature+0xc8 scan_cpufeatures_subnodes+0x380 of_scan_flat_dt_subnodes+0xb4 dt_cpu_ftrs_scan_callback+0x158 of_scan_flat_dt+0xf0 dt_cpu_ftrs_scan+0x3c early_init_devtree+0x360 early_setup+0x9c 2. Report on s390: vsnprintf invocations, are broken on s390. For example, the early boot output now looks like this where the first (efault) should be the linux_banner: [ 0.099985] (efault) [ 0.099985] setup: Linux is running as a z/VM guest operating system in 64-bit mode [ 0.100066] setup: The maximum memory size is 8192MB [ 0.100070] cma: Reserved 4 MiB at (efault) [ 0.100100] numa: NUMA mode: (efault) The reason for this, is that the code assumes that probe_kernel_address() works very early. This however is not true on at least s390. Uaccess on KERNEL_DS works only after page tables have been setup on s390, which happens with setup_arch()->paging_init(). Any probe_kernel_address() invocation before that will return -EFAULT. Fixes: 3e5903eb9cff70730 ("vsprintf: Prevent crash when dereferencing invalid pointers") Link: http://lkml.kernel.org/r/20190510084213.22149-1-pmladek@suse.com Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: "Tobin C . Harding" Cc: Michal Hocko Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: linux-kernel@vger.kernel.org Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: Russell Currey Cc: Christophe Leroy Cc: Stephen Rothwell Cc: Heiko Carstens Cc: linux-arch@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: Martin Schwidefsky Cc: Petr Mladek Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- lib/vsprintf.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 7b0a6140bfad..2f003cfe340e 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -628,19 +628,16 @@ static char *error_string(char *buf, char *end, const char *s, } /* - * This is not a fool-proof test. 99% of the time that this will fault is - * due to a bad pointer, not one that crosses into bad memory. Just test - * the address to make sure it doesn't fault due to a poorly added printk - * during debugging. + * Do not call any complex external code here. Nested printk()/vsprintf() + * might cause infinite loops. Failures might break printk() and would + * be hard to debug. */ static const char *check_pointer_msg(const void *ptr) { - char byte; - if (!ptr) return "(null)"; - if (probe_kernel_address(ptr, byte)) + if ((unsigned long)ptr < PAGE_SIZE || IS_ERR_VALUE(ptr)) return "(efault)"; return NULL; -- cgit From 73b0140bf0fe9df90fb267c00673c4b9bf285430 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 13 May 2019 17:17:11 -0700 Subject: mm/gup: change GUP fast to use flags rather than a write 'bool' To facilitate additional options to get_user_pages_fast() change the singular write parameter to be gup_flags. This patch does not change any functionality. New functionality will follow in subsequent patches. Some of the get_user_pages_fast() call sites were unchanged because they already passed FOLL_WRITE or 0 for the write parameter. NOTE: It was suggested to change the ordering of the get_user_pages_fast() arguments to ensure that callers were converted. This breaks the current GUP call site convention of having the returned pages be the final parameter. So the suggestion was rejected. Link: http://lkml.kernel.org/r/20190328084422.29911-4-ira.weiny@intel.com Link: http://lkml.kernel.org/r/20190317183438.2057-4-ira.weiny@intel.com Signed-off-by: Ira Weiny Reviewed-by: Mike Marshall Cc: Aneesh Kumar K.V Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Dan Williams Cc: "David S. Miller" Cc: Heiko Carstens Cc: Ingo Molnar Cc: James Hogan Cc: Jason Gunthorpe Cc: John Hubbard Cc: "Kirill A. Shutemov" Cc: Martin Schwidefsky Cc: Michal Hocko Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Rich Felker Cc: Thomas Gleixner Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/iov_iter.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index b396d328a764..f74fa832f3aa 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1293,7 +1293,9 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, len = maxpages * PAGE_SIZE; addr &= ~(PAGE_SIZE - 1); n = DIV_ROUND_UP(len, PAGE_SIZE); - res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, pages); + res = get_user_pages_fast(addr, n, + iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, + pages); if (unlikely(res < 0)) return res; return (res == n ? len : res * PAGE_SIZE) - *start; @@ -1374,7 +1376,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, p = get_pages_array(n); if (!p) return -ENOMEM; - res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, p); + res = get_user_pages_fast(addr, n, + iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); if (unlikely(res < 0)) { kvfree(p); return res; -- cgit From 9012d011660ea5cf2a623e1de207a2bc0ca6936d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 14 May 2019 15:42:25 -0700 Subject: compiler: allow all arches to enable CONFIG_OPTIMIZE_INLINING Commit 60a3cdd06394 ("x86: add optimized inlining") introduced CONFIG_OPTIMIZE_INLINING, but it has been available only for x86. The idea is obviously arch-agnostic. This commit moves the config entry from arch/x86/Kconfig.debug to lib/Kconfig.debug so that all architectures can benefit from it. This can make a huge difference in kernel image size especially when CONFIG_OPTIMIZE_FOR_SIZE is enabled. For example, I got 3.5% smaller arm64 kernel for v5.1-rc1. dec file 18983424 arch/arm64/boot/Image.before 18321920 arch/arm64/boot/Image.after This also slightly improves the "Kernel hacking" Kconfig menu as e61aca5158a8 ("Merge branch 'kconfig-diet' from Dave Hansen') suggested; this config option would be a good fit in the "compiler option" menu. Link: http://lkml.kernel.org/r/20190423034959.13525-12-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Acked-by: Borislav Petkov Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Boris Brezillon Cc: Brian Norris Cc: Christophe Leroy Cc: David Woodhouse Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Marek Vasut Cc: Mark Rutland Cc: Mathieu Malaterre Cc: Miquel Raynal Cc: Paul Mackerras Cc: Ralf Baechle Cc: Richard Weinberger Cc: Russell King Cc: Stefan Agner Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d695ec1477f3..d5411a7484f6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -318,6 +318,20 @@ config HEADERS_CHECK exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in your build tree), to make sure they're suitable. +config OPTIMIZE_INLINING + bool "Allow compiler to uninline functions marked 'inline'" + help + This option determines if the kernel forces gcc to inline the functions + developers have marked 'inline'. Doing so takes away freedom from gcc to + do what it thinks is best, which is desirable for the gcc 3.x series of + compilers. The gcc 4.x series have a rewritten inlining algorithm and + enabling this option will generate a smaller kernel there. Hopefully + this algorithm is so good that allowing gcc 4.x and above to make the + decision will become the default in the future. Until then this option + is there to test gcc for this. + + If unsure, say N. + config DEBUG_SECTION_MISMATCH bool "Enable full Section mismatch analysis" help -- cgit From 5f239f655a7e67a972ee1fa17045a08e640d28da Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 14 May 2019 15:42:40 -0700 Subject: lib/bitmap.c: remove unused EXPORT_SYMBOLs AFAICT, there have never been any callers of these functions outside mm/mempolicy.c (via their nodemask.h wrappers). In particular, no modular code has ever used them, and given their somewhat exotic semantics, I highly doubt they will ever find such a use. In any case, no need to export them currently. Link: http://lkml.kernel.org/r/20190329205353.6010-1-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Cc: Andy Shevchenko Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'lib') diff --git a/lib/bitmap.c b/lib/bitmap.c index 98872e9025da..66421f304f7d 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -757,7 +757,6 @@ void bitmap_remap(unsigned long *dst, const unsigned long *src, set_bit(bitmap_ord_to_pos(new, n % w, nbits), dst); } } -EXPORT_SYMBOL(bitmap_remap); /** * bitmap_bitremap - Apply map defined by a pair of bitmaps to a single bit @@ -795,7 +794,6 @@ int bitmap_bitremap(int oldbit, const unsigned long *old, else return bitmap_ord_to_pos(new, n % w, bits); } -EXPORT_SYMBOL(bitmap_bitremap); /** * bitmap_onto - translate one bitmap relative to another @@ -930,7 +928,6 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig, m++; } } -EXPORT_SYMBOL(bitmap_onto); /** * bitmap_fold - fold larger bitmap into smaller, modulo specified size @@ -955,7 +952,6 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig, for_each_set_bit(oldbit, orig, nbits) set_bit(oldbit % sz, dst); } -EXPORT_SYMBOL(bitmap_fold); /* * Common code for bitmap_*_region() routines. -- cgit From cdc90a1871d6e64080f4506e900c6ef88e6fb39f Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 14 May 2019 15:42:43 -0700 Subject: lib/bitmap.c: guard exotic bitmap functions by CONFIG_NUMA The bitmap_remap, _bitremap, _onto and _fold functions are only used, via their node_ wrappers, in mm/mempolicy.c, which is only built for CONFIG_NUMA. The helper bitmap_ord_to_pos used by these functions is global, but its only external caller is node_random() in lib/nodemask.c, which is also guarded by CONFIG_NUMA. For !CONFIG_NUMA: add/remove: 0/6 grow/shrink: 0/0 up/down: 0/-621 (-621) Function old new delta bitmap_pos_to_ord 20 - -20 bitmap_ord_to_pos 70 - -70 bitmap_bitremap 81 - -81 bitmap_fold 113 - -113 bitmap_onto 123 - -123 bitmap_remap 214 - -214 Total: Before=4776, After=4155, chg -13.00% Link: http://lkml.kernel.org/r/20190329205353.6010-2-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Cc: Andy Shevchenko Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/bitmap.c b/lib/bitmap.c index 66421f304f7d..3f3b8051f342 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -649,6 +649,7 @@ int bitmap_parselist_user(const char __user *ubuf, EXPORT_SYMBOL(bitmap_parselist_user); +#ifdef CONFIG_NUMA /** * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap * @buf: pointer to a bitmap @@ -952,6 +953,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig, for_each_set_bit(oldbit, orig, nbits) set_bit(oldbit % sz, dst); } +#endif /* CONFIG_NUMA */ /* * Common code for bitmap_*_region() routines. -- cgit From 8e18faeac3e4d4b3ff3d705cd46d0fcb710b09d0 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 14 May 2019 15:42:46 -0700 Subject: lib/plist: rename DEBUG_PI_LIST to DEBUG_PLIST This is a lot more appropriate than PI_LIST, which in the kernel one would assume that it has to do with priority-inheritance; which is not -- furthermore futexes make use of plists so this can be even more confusing, albeit the debug nature of the config option. Link: http://lkml.kernel.org/r/20190317185434.1626-1-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 2 +- lib/plist.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d5411a7484f6..181bd56238b0 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1372,7 +1372,7 @@ config DEBUG_LIST If unsure, say N. -config DEBUG_PI_LIST +config DEBUG_PLIST bool "Debug priority linked list manipulation" depends on DEBUG_KERNEL help diff --git a/lib/plist.c b/lib/plist.c index 199408f91057..d3bd8827186f 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -26,7 +26,7 @@ #include #include -#ifdef CONFIG_DEBUG_PI_LIST +#ifdef CONFIG_DEBUG_PLIST static struct plist_head test_head; @@ -173,7 +173,7 @@ void plist_requeue(struct plist_node *node, struct plist_head *head) plist_check_head(head); } -#ifdef CONFIG_DEBUG_PI_LIST +#ifdef CONFIG_DEBUG_PLIST #include #include #include -- cgit From 37d0ec34d111acfdb82b24e3de00d926c0aece4d Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Tue, 14 May 2019 15:42:49 -0700 Subject: lib/sort: make swap functions more generic Patch series "lib/sort & lib/list_sort: faster and smaller", v2. Because CONFIG_RETPOLINE has made indirect calls much more expensive, I thought I'd try to reduce the number made by the library sort functions. The first three patches apply to lib/sort.c. Patch #1 is a simple optimization. The built-in swap has special cases for aligned 4- and 8-byte objects. But those are almost never used; most calls to sort() work on larger structures, which fall back to the byte-at-a-time loop. This generalizes them to aligned *multiples* of 4 and 8 bytes. (If nothing else, it saves an awful lot of energy by not thrashing the store buffers as much.) Patch #2 grabs a juicy piece of low-hanging fruit. I agree that nice simple solid heapsort is preferable to more complex algorithms (sorry, Andrey), but it's possible to implement heapsort with far fewer comparisons (50% asymptotically, 25-40% reduction for realistic sizes) than the way it's been done up to now. And with some care, the code ends up smaller, as well. This is the "big win" patch. Patch #3 adds the same sort of indirect call bypass that has been added to the net code of late. The great majority of the callers use the builtin swap functions, so replace the indirect call to sort_func with a (highly preditable) series of if() statements. Rather surprisingly, this decreased code size, as the swap functions were inlined and their prologue & epilogue code eliminated. lib/list_sort.c is a bit trickier, as merge sort is already close to optimal, and we don't want to introduce triumphs of theory over practicality like the Ford-Johnson merge-insertion sort. Patch #4, without changing the algorithm, chops 32% off the code size and removes the part[MAX_LIST_LENGTH+1] pointer array (and the corresponding upper limit on efficiently sortable input size). Patch #5 improves the algorithm. The previous code is already optimal for power-of-two (or slightly smaller) size inputs, but when the input size is just over a power of 2, there's a very unbalanced final merge. There are, in the literature, several algorithms which solve this, but they all depend on the "breadth-first" merge order which was replaced by commit 835cc0c8477f with a more cache-friendly "depth-first" order. Some hard thinking came up with a depth-first algorithm which defers merges as little as possible while avoiding bad merges. This saves 0.2*n compares, averaged over all sizes. The code size increase is minimal (64 bytes on x86-64, reducing the net savings to 26%), but the comments expanded significantly to document the clever algorithm. TESTING NOTES: I have some ugly user-space benchmarking code which I used for testing before moving this code into the kernel. Shout if you want a copy. I'm running this code right now, with CONFIG_TEST_SORT and CONFIG_TEST_LIST_SORT, but I confess I haven't rebooted since the last round of minor edits to quell checkpatch. I figure there will be at least one round of comments and final testing. This patch (of 5): Rather than having special-case swap functions for 4- and 8-byte objects, special-case aligned multiples of 4 or 8 bytes. This speeds up most users of sort() by avoiding fallback to the byte copy loop. Despite what ca96ab859ab4 ("lib/sort: Add 64 bit swap function") claims, very few users of sort() sort pointers (or pointer-sized objects); most sort structures containing at least two words. (E.g. drivers/acpi/fan.c:acpi_fan_get_fps() sorts an array of 40-byte struct acpi_fan_fps.) The functions also got renamed to reflect the fact that they support multiple words. In the great tradition of bikeshedding, the names were by far the most contentious issue during review of this patch series. x86-64 code size 872 -> 886 bytes (+14) With feedback from Andy Shevchenko, Rasmus Villemoes and Geert Uytterhoeven. Link: http://lkml.kernel.org/r/f24f932df3a7fa1973c1084154f1cea596bcf341.1552704200.git.lkml@sdf.org Signed-off-by: George Spelvin Acked-by: Andrey Abramov Acked-by: Rasmus Villemoes Reviewed-by: Andy Shevchenko Cc: Rasmus Villemoes Cc: Geert Uytterhoeven Cc: Daniel Wagner Cc: Don Mullis Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/sort.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 99 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/sort.c b/lib/sort.c index d6b7a202b0b6..ec79eac85e21 100644 --- a/lib/sort.c +++ b/lib/sort.c @@ -11,35 +11,108 @@ #include #include -static int alignment_ok(const void *base, int align) +/** + * is_aligned - is this pointer & size okay for word-wide copying? + * @base: pointer to data + * @size: size of each element + * @align: required aignment (typically 4 or 8) + * + * Returns true if elements can be copied using word loads and stores. + * The size must be a multiple of the alignment, and the base address must + * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. + * + * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)" + * to "if ((a | b) & mask)", so we do that by hand. + */ +__attribute_const__ __always_inline +static bool is_aligned(const void *base, size_t size, unsigned char align) { - return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || - ((unsigned long)base & (align - 1)) == 0; + unsigned char lsbits = (unsigned char)size; + + (void)base; +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + lsbits |= (unsigned char)(uintptr_t)base; +#endif + return (lsbits & (align - 1)) == 0; } -static void u32_swap(void *a, void *b, int size) +/** + * swap_words_32 - swap two elements in 32-bit chunks + * @a, @b: pointers to the elements + * @size: element size (must be a multiple of 4) + * + * Exchange the two objects in memory. This exploits base+index addressing, + * which basically all CPUs have, to minimize loop overhead computations. + * + * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the + * bottom of the loop, even though the zero flag is stil valid from the + * subtract (since the intervening mov instructions don't alter the flags). + * Gcc 8.1.0 doesn't have that problem. + */ +static void swap_words_32(void *a, void *b, int size) { - u32 t = *(u32 *)a; - *(u32 *)a = *(u32 *)b; - *(u32 *)b = t; + size_t n = (unsigned int)size; + + do { + u32 t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; + } while (n); } -static void u64_swap(void *a, void *b, int size) +/** + * swap_words_64 - swap two elements in 64-bit chunks + * @a, @b: pointers to the elements + * @size: element size (must be a multiple of 8) + * + * Exchange the two objects in memory. This exploits base+index + * addressing, which basically all CPUs have, to minimize loop overhead + * computations. + * + * We'd like to use 64-bit loads if possible. If they're not, emulating + * one requires base+index+4 addressing which x86 has but most other + * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads, + * but it's possible to have 64-bit loads without 64-bit pointers (e.g. + * x32 ABI). Are there any cases the kernel needs to worry about? + */ +static void swap_words_64(void *a, void *b, int size) { - u64 t = *(u64 *)a; - *(u64 *)a = *(u64 *)b; - *(u64 *)b = t; + size_t n = (unsigned int)size; + + do { +#ifdef CONFIG_64BIT + u64 t = *(u64 *)(a + (n -= 8)); + *(u64 *)(a + n) = *(u64 *)(b + n); + *(u64 *)(b + n) = t; +#else + /* Use two 32-bit transfers to avoid base+index+4 addressing */ + u32 t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; + + t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; +#endif + } while (n); } -static void generic_swap(void *a, void *b, int size) +/** + * swap_bytes - swap two elements a byte at a time + * @a, @b: pointers to the elements + * @size: element size + * + * This is the fallback if alignment doesn't allow using larger chunks. + */ +static void swap_bytes(void *a, void *b, int size) { - char t; + size_t n = (unsigned int)size; do { - t = *(char *)a; - *(char *)a++ = *(char *)b; - *(char *)b++ = t; - } while (--size > 0); + char t = ((char *)a)[--n]; + ((char *)a)[n] = ((char *)b)[n]; + ((char *)b)[n] = t; + } while (n); } /** @@ -50,8 +123,10 @@ static void generic_swap(void *a, void *b, int size) * @cmp_func: pointer to comparison function * @swap_func: pointer to swap function or NULL * - * This function does a heapsort on the given array. You may provide a - * swap_func function optimized to your element type. + * This function does a heapsort on the given array. You may provide + * a swap_func function if you need to do something more than a memory + * copy (e.g. fix up pointers or auxiliary data), but the built-in swap + * isn't usually a bottleneck. * * Sorting time is O(n log n) both on average and worst-case. While * qsort is about 20% faster on average, it suffers from exploitable @@ -67,12 +142,12 @@ void sort(void *base, size_t num, size_t size, int i = (num/2 - 1) * size, n = num * size, c, r; if (!swap_func) { - if (size == 4 && alignment_ok(base, 4)) - swap_func = u32_swap; - else if (size == 8 && alignment_ok(base, 8)) - swap_func = u64_swap; + if (is_aligned(base, size, 8)) + swap_func = swap_words_64; + else if (is_aligned(base, size, 4)) + swap_func = swap_words_32; else - swap_func = generic_swap; + swap_func = swap_bytes; } /* heapify */ -- cgit From 22a241ccb2c19962a0fb02c98154aa93d3fc1862 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Tue, 14 May 2019 15:42:52 -0700 Subject: lib/sort: use more efficient bottom-up heapsort variant This uses fewer comparisons than the previous code (approaching half as many for large random inputs), but produces identical results; it actually performs the exact same series of swap operations. Specifically, it reduces the average number of compares from 2*n*log2(n) - 3*n + o(n) to n*log2(n) + 0.37*n + o(n). This is still 1.63*n worse than glibc qsort() which manages n*log2(n) - 1.26*n, but at least the leading coefficient is correct. Standard heapsort, when sifting down, performs two comparisons per level: one to find the greater child, and a second to see if the current node should be exchanged with that child. Bottom-up heapsort observes that it's better to postpone the second comparison and search for the leaf where -infinity would be sent to, then search back *up* for the current node's destination. Since sifting down usually proceeds to the leaf level (that's where half the nodes are), this does O(1) second comparisons rather than log2(n). That saves a lot of (expensive since Spectre) indirect function calls. The one time it's worse than the previous code is if there are large numbers of duplicate keys, when the top-down algorithm is O(n) and bottom-up is O(n log n). For distinct keys, it's provably always better, doing 1.5*n*log2(n) + O(n) in the worst case. (The code is not significantly more complex. This patch also merges the heap-building and -extracting sift-down loops, resulting in a net code size savings.) x86-64 code size 885 -> 767 bytes (-118) (I see the checkpatch complaint about "else if (n -= size)". The alternative is significantly uglier.) Link: http://lkml.kernel.org/r/2de8348635a1a421a72620677898c7fd5bd4b19d.1552704200.git.lkml@sdf.org Signed-off-by: George Spelvin Acked-by: Andrey Abramov Acked-by: Rasmus Villemoes Reviewed-by: Andy Shevchenko Cc: Daniel Wagner Cc: Dave Chinner Cc: Don Mullis Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/sort.c | 112 ++++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 81 insertions(+), 31 deletions(-) (limited to 'lib') diff --git a/lib/sort.c b/lib/sort.c index ec79eac85e21..0d24d0c5c0fc 100644 --- a/lib/sort.c +++ b/lib/sort.c @@ -1,8 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* - * A fast, small, non-recursive O(nlog n) sort for the Linux kernel + * A fast, small, non-recursive O(n log n) sort for the Linux kernel * - * Jan 23 2005 Matt Mackall + * This performs n*log2(n) + 0.37*n + o(n) comparisons on average, + * and 1.5*n*log2(n) + O(n) in the (very contrived) worst case. + * + * Glibc qsort() manages n*log2(n) - 1.26*n for random inputs (1.63*n + * better) at the expense of stack usage and much larger code to avoid + * quicksort's O(n^2) worst case. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -15,7 +20,7 @@ * is_aligned - is this pointer & size okay for word-wide copying? * @base: pointer to data * @size: size of each element - * @align: required aignment (typically 4 or 8) + * @align: required alignment (typically 4 or 8) * * Returns true if elements can be copied using word loads and stores. * The size must be a multiple of the alignment, and the base address must @@ -115,6 +120,32 @@ static void swap_bytes(void *a, void *b, int size) } while (n); } +/** + * parent - given the offset of the child, find the offset of the parent. + * @i: the offset of the heap element whose parent is sought. Non-zero. + * @lsbit: a precomputed 1-bit mask, equal to "size & -size" + * @size: size of each element + * + * In terms of array indexes, the parent of element j = @i/@size is simply + * (j-1)/2. But when working in byte offsets, we can't use implicit + * truncation of integer divides. + * + * Fortunately, we only need one bit of the quotient, not the full divide. + * @size has a least significant bit. That bit will be clear if @i is + * an even multiple of @size, and set if it's an odd multiple. + * + * Logically, we're doing "if (i & lsbit) i -= size;", but since the + * branch is unpredictable, it's done with a bit of clever branch-free + * code instead. + */ +__attribute_const__ __always_inline +static size_t parent(size_t i, unsigned int lsbit, size_t size) +{ + i -= size; + i -= size & -(i & lsbit); + return i / 2; +} + /** * sort - sort an array of elements * @base: pointer to data to sort @@ -129,17 +160,20 @@ static void swap_bytes(void *a, void *b, int size) * isn't usually a bottleneck. * * Sorting time is O(n log n) both on average and worst-case. While - * qsort is about 20% faster on average, it suffers from exploitable + * quicksort is slightly faster on average, it suffers from exploitable * O(n*n) worst-case behavior and extra memory requirements that make * it less suitable for kernel use. */ - void sort(void *base, size_t num, size_t size, int (*cmp_func)(const void *, const void *), void (*swap_func)(void *, void *, int size)) { /* pre-scale counters for performance */ - int i = (num/2 - 1) * size, n = num * size, c, r; + size_t n = num * size, a = (num/2) * size; + const unsigned int lsbit = size & -size; /* Used to find parent */ + + if (!a) /* num < 2 || size == 0 */ + return; if (!swap_func) { if (is_aligned(base, size, 8)) @@ -150,32 +184,48 @@ void sort(void *base, size_t num, size_t size, swap_func = swap_bytes; } - /* heapify */ - for ( ; i >= 0; i -= size) { - for (r = i; r * 2 + size < n; r = c) { - c = r * 2 + size; - if (c < n - size && - cmp_func(base + c, base + c + size) < 0) - c += size; - if (cmp_func(base + r, base + c) >= 0) - break; - swap_func(base + r, base + c, size); - } - } - - /* sort */ - for (i = n - size; i > 0; i -= size) { - swap_func(base, base + i, size); - for (r = 0; r * 2 + size < i; r = c) { - c = r * 2 + size; - if (c < i - size && - cmp_func(base + c, base + c + size) < 0) - c += size; - if (cmp_func(base + r, base + c) >= 0) - break; - swap_func(base + r, base + c, size); + /* + * Loop invariants: + * 1. elements [a,n) satisfy the heap property (compare greater than + * all of their children), + * 2. elements [n,num*size) are sorted, and + * 3. a <= b <= c <= d <= n (whenever they are valid). + */ + for (;;) { + size_t b, c, d; + + if (a) /* Building heap: sift down --a */ + a -= size; + else if (n -= size) /* Sorting: Extract root to --n */ + swap_func(base, base + n, size); + else /* Sort complete */ + break; + + /* + * Sift element at "a" down into heap. This is the + * "bottom-up" variant, which significantly reduces + * calls to cmp_func(): we find the sift-down path all + * the way to the leaves (one compare per level), then + * backtrack to find where to insert the target element. + * + * Because elements tend to sift down close to the leaves, + * this uses fewer compares than doing two per level + * on the way down. (A bit more than half as many on + * average, 3/4 worst-case.) + */ + for (b = a; c = 2*b + size, (d = c + size) < n;) + b = cmp_func(base + c, base + d) >= 0 ? c : d; + if (d == n) /* Special case last leaf with no sibling */ + b = c; + + /* Now backtrack from "b" to the correct location for "a" */ + while (b != a && cmp_func(base + a, base + b) >= 0) + b = parent(b, lsbit, size); + c = b; /* Where "a" belongs */ + while (b != a) { /* Shift it into place */ + b = parent(b, lsbit, size); + swap_func(base + b, base + c, size); } } } - EXPORT_SYMBOL(sort); -- cgit From 8fb583c4258d08f0aff105aa2ae5157b7d414ea2 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Tue, 14 May 2019 15:42:55 -0700 Subject: lib/sort: avoid indirect calls to built-in swap Similar to what's being done in the net code, this takes advantage of the fact that most invocations use only a few common swap functions, and replaces indirect calls to them with (highly predictable) conditional branches. (The downside, of course, is that if you *do* use a custom swap function, there are a few extra predicted branches on the code path.) This actually *shrinks* the x86-64 code, because it inlines the various swap functions inside do_swap, eliding function prologues & epilogues. x86-64 code size 767 -> 703 bytes (-64) Link: http://lkml.kernel.org/r/d10c5d4b393a1847f32f5b26f4bbaa2857140e1e.1552704200.git.lkml@sdf.org Signed-off-by: George Spelvin Acked-by: Andrey Abramov Acked-by: Rasmus Villemoes Reviewed-by: Andy Shevchenko Cc: Daniel Wagner Cc: Dave Chinner Cc: Don Mullis Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/sort.c | 51 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/sort.c b/lib/sort.c index 0d24d0c5c0fc..50855ea8c262 100644 --- a/lib/sort.c +++ b/lib/sort.c @@ -54,10 +54,8 @@ static bool is_aligned(const void *base, size_t size, unsigned char align) * subtract (since the intervening mov instructions don't alter the flags). * Gcc 8.1.0 doesn't have that problem. */ -static void swap_words_32(void *a, void *b, int size) +static void swap_words_32(void *a, void *b, size_t n) { - size_t n = (unsigned int)size; - do { u32 t = *(u32 *)(a + (n -= 4)); *(u32 *)(a + n) = *(u32 *)(b + n); @@ -80,10 +78,8 @@ static void swap_words_32(void *a, void *b, int size) * but it's possible to have 64-bit loads without 64-bit pointers (e.g. * x32 ABI). Are there any cases the kernel needs to worry about? */ -static void swap_words_64(void *a, void *b, int size) +static void swap_words_64(void *a, void *b, size_t n) { - size_t n = (unsigned int)size; - do { #ifdef CONFIG_64BIT u64 t = *(u64 *)(a + (n -= 8)); @@ -109,10 +105,8 @@ static void swap_words_64(void *a, void *b, int size) * * This is the fallback if alignment doesn't allow using larger chunks. */ -static void swap_bytes(void *a, void *b, int size) +static void swap_bytes(void *a, void *b, size_t n) { - size_t n = (unsigned int)size; - do { char t = ((char *)a)[--n]; ((char *)a)[n] = ((char *)b)[n]; @@ -120,6 +114,33 @@ static void swap_bytes(void *a, void *b, int size) } while (n); } +typedef void (*swap_func_t)(void *a, void *b, int size); + +/* + * The values are arbitrary as long as they can't be confused with + * a pointer, but small integers make for the smallest compare + * instructions. + */ +#define SWAP_WORDS_64 (swap_func_t)0 +#define SWAP_WORDS_32 (swap_func_t)1 +#define SWAP_BYTES (swap_func_t)2 + +/* + * The function pointer is last to make tail calls most efficient if the + * compiler decides not to inline this function. + */ +static void do_swap(void *a, void *b, size_t size, swap_func_t swap_func) +{ + if (swap_func == SWAP_WORDS_64) + swap_words_64(a, b, size); + else if (swap_func == SWAP_WORDS_32) + swap_words_32(a, b, size); + else if (swap_func == SWAP_BYTES) + swap_bytes(a, b, size); + else + swap_func(a, b, (int)size); +} + /** * parent - given the offset of the child, find the offset of the parent. * @i: the offset of the heap element whose parent is sought. Non-zero. @@ -157,7 +178,7 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size) * This function does a heapsort on the given array. You may provide * a swap_func function if you need to do something more than a memory * copy (e.g. fix up pointers or auxiliary data), but the built-in swap - * isn't usually a bottleneck. + * avoids a slow retpoline and so is significantly faster. * * Sorting time is O(n log n) both on average and worst-case. While * quicksort is slightly faster on average, it suffers from exploitable @@ -177,11 +198,11 @@ void sort(void *base, size_t num, size_t size, if (!swap_func) { if (is_aligned(base, size, 8)) - swap_func = swap_words_64; + swap_func = SWAP_WORDS_64; else if (is_aligned(base, size, 4)) - swap_func = swap_words_32; + swap_func = SWAP_WORDS_32; else - swap_func = swap_bytes; + swap_func = SWAP_BYTES; } /* @@ -197,7 +218,7 @@ void sort(void *base, size_t num, size_t size, if (a) /* Building heap: sift down --a */ a -= size; else if (n -= size) /* Sorting: Extract root to --n */ - swap_func(base, base + n, size); + do_swap(base, base + n, size, swap_func); else /* Sort complete */ break; @@ -224,7 +245,7 @@ void sort(void *base, size_t num, size_t size, c = b; /* Where "a" belongs */ while (b != a) { /* Shift it into place */ b = parent(b, lsbit, size); - swap_func(base + b, base + c, size); + do_swap(base + b, base + c, size, swap_func); } } } -- cgit From 043b3f7b6388fca6be86ca82979f66c5723a0d10 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Tue, 14 May 2019 15:42:58 -0700 Subject: lib/list_sort: simplify and remove MAX_LIST_LENGTH_BITS Rather than a fixed-size array of pending sorted runs, use the ->prev links to keep track of things. This reduces stack usage, eliminates some ugly overflow handling, and reduces the code size. Also: * merge() no longer needs to handle NULL inputs, so simplify. * The same applies to merge_and_restore_back_links(), which is renamed to the less ponderous merge_final(). (It's a static helper function, so we don't need a super-descriptive name; comments will do.) * Document the actual return value requirements on the (*cmp)() function; some callers are already using this feature. x86-64 code size 1086 -> 739 bytes (-347) (Yes, I see checkpatch complaining about no space after comma in "__attribute__((nonnull(2,3,4,5)))". Checkpatch is wrong.) Feedback from Rasmus Villemoes, Andy Shevchenko and Geert Uytterhoeven. [akpm@linux-foundation.org: remove __pure usage due to mysterious warning] Link: http://lkml.kernel.org/r/f63c410e0ff76009c9b58e01027e751ff7fdb749.1552704200.git.lkml@sdf.org Signed-off-by: George Spelvin Acked-by: Andrey Abramov Acked-by: Rasmus Villemoes Reviewed-by: Andy Shevchenko Cc: Geert Uytterhoeven Cc: Daniel Wagner Cc: Dave Chinner Cc: Don Mullis Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/list_sort.c | 165 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 103 insertions(+), 62 deletions(-) (limited to 'lib') diff --git a/lib/list_sort.c b/lib/list_sort.c index 85759928215b..ba9431bcac0b 100644 --- a/lib/list_sort.c +++ b/lib/list_sort.c @@ -7,33 +7,41 @@ #include #include -#define MAX_LIST_LENGTH_BITS 20 +typedef int __attribute__((nonnull(2,3))) (*cmp_func)(void *, + struct list_head const *, struct list_head const *); /* * Returns a list organized in an intermediate format suited * to chaining of merge() calls: null-terminated, no reserved or * sentinel head node, "prev" links not maintained. */ -static struct list_head *merge(void *priv, - int (*cmp)(void *priv, struct list_head *a, - struct list_head *b), +__attribute__((nonnull(2,3,4))) +static struct list_head *merge(void *priv, cmp_func cmp, struct list_head *a, struct list_head *b) { - struct list_head head, *tail = &head; + struct list_head *head, **tail = &head; - while (a && b) { + for (;;) { /* if equal, take 'a' -- important for sort stability */ - if ((*cmp)(priv, a, b) <= 0) { - tail->next = a; + if (cmp(priv, a, b) <= 0) { + *tail = a; + tail = &a->next; a = a->next; + if (!a) { + *tail = b; + break; + } } else { - tail->next = b; + *tail = b; + tail = &b->next; b = b->next; + if (!b) { + *tail = a; + break; + } } - tail = tail->next; } - tail->next = a?:b; - return head.next; + return head; } /* @@ -43,44 +51,52 @@ static struct list_head *merge(void *priv, * prev-link restoration pass, or maintaining the prev links * throughout. */ -static void merge_and_restore_back_links(void *priv, - int (*cmp)(void *priv, struct list_head *a, - struct list_head *b), - struct list_head *head, - struct list_head *a, struct list_head *b) +__attribute__((nonnull(2,3,4,5))) +static void merge_final(void *priv, cmp_func cmp, struct list_head *head, + struct list_head *a, struct list_head *b) { struct list_head *tail = head; u8 count = 0; - while (a && b) { + for (;;) { /* if equal, take 'a' -- important for sort stability */ - if ((*cmp)(priv, a, b) <= 0) { + if (cmp(priv, a, b) <= 0) { tail->next = a; a->prev = tail; + tail = a; a = a->next; + if (!a) + break; } else { tail->next = b; b->prev = tail; + tail = b; b = b->next; + if (!b) { + b = a; + break; + } } - tail = tail->next; } - tail->next = a ? : b; + /* Finish linking remainder of list b on to tail */ + tail->next = b; do { /* - * In worst cases this loop may run many iterations. + * If the merge is highly unbalanced (e.g. the input is + * already sorted), this loop may run many iterations. * Continue callbacks to the client even though no * element comparison is needed, so the client's cmp() * routine can invoke cond_resched() periodically. */ - if (unlikely(!(++count))) - (*cmp)(priv, tail->next, tail->next); - - tail->next->prev = tail; - tail = tail->next; - } while (tail->next); - + if (unlikely(!++count)) + cmp(priv, b, b); + b->prev = tail; + tail = b; + b = b->next; + } while (b); + + /* And the final links to make a circular doubly-linked list */ tail->next = head; head->prev = tail; } @@ -91,55 +107,80 @@ static void merge_and_restore_back_links(void *priv, * @head: the list to sort * @cmp: the elements comparison function * - * This function implements "merge sort", which has O(nlog(n)) - * complexity. + * This function implements a bottom-up merge sort, which has O(nlog(n)) + * complexity. We use depth-first order to take advantage of cacheing. + * (E.g. when we get to the fourth element, we immediately merge the + * first two 2-element lists.) + * + * The comparison funtion @cmp must return > 0 if @a should sort after + * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should + * sort before @b *or* their original order should be preserved. It is + * always called with the element that came first in the input in @a, + * and list_sort is a stable sort, so it is not necessary to distinguish + * the @a < @b and @a == @b cases. * - * The comparison function @cmp must return a negative value if @a - * should sort before @b, and a positive value if @a should sort after - * @b. If @a and @b are equivalent, and their original relative - * ordering is to be preserved, @cmp must return 0. + * This is compatible with two styles of @cmp function: + * - The traditional style which returns <0 / =0 / >0, or + * - Returning a boolean 0/1. + * The latter offers a chance to save a few cycles in the comparison + * (which is used by e.g. plug_ctx_cmp() in block/blk-mq.c). + * + * A good way to write a multi-word comparison is + * if (a->high != b->high) + * return a->high > b->high; + * if (a->middle != b->middle) + * return a->middle > b->middle; + * return a->low > b->low; */ +__attribute__((nonnull(2,3))) void list_sort(void *priv, struct list_head *head, int (*cmp)(void *priv, struct list_head *a, struct list_head *b)) { - struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists - -- last slot is a sentinel */ - int lev; /* index into part[] */ - int max_lev = 0; - struct list_head *list; + struct list_head *list = head->next, *pending = NULL; + size_t count = 0; /* Count of pending */ - if (list_empty(head)) + if (list == head->prev) /* Zero or one elements */ return; - memset(part, 0, sizeof(part)); - + /* Convert to a null-terminated singly-linked list. */ head->prev->next = NULL; - list = head->next; - while (list) { + /* + * Data structure invariants: + * - All lists are singly linked and null-terminated; prev + * pointers are not maintained. + * - pending is a prev-linked "list of lists" of sorted + * sublists awaiting further merging. + * - Each of the sorted sublists is power-of-two in size, + * corresponding to bits set in "count". + * - Sublists are sorted by size and age, smallest & newest at front. + */ + do { + size_t bits; struct list_head *cur = list; + + /* Extract the head of "list" as a single-element list "cur" */ list = list->next; cur->next = NULL; - for (lev = 0; part[lev]; lev++) { - cur = merge(priv, cmp, part[lev], cur); - part[lev] = NULL; + /* Do merges corresponding to set lsbits in count */ + for (bits = count; bits & 1; bits >>= 1) { + cur = merge(priv, (cmp_func)cmp, pending, cur); + pending = pending->prev; /* Untouched by merge() */ } - if (lev > max_lev) { - if (unlikely(lev >= ARRAY_SIZE(part)-1)) { - printk_once(KERN_DEBUG "list too long for efficiency\n"); - lev--; - } - max_lev = lev; - } - part[lev] = cur; + /* And place the result at the head of "pending" */ + cur->prev = pending; + pending = cur; + count++; + } while (list->next); + + /* Now merge together last element with all pending lists */ + while (pending->prev) { + list = merge(priv, (cmp_func)cmp, pending, list); + pending = pending->prev; } - - for (lev = 0; lev < max_lev; lev++) - if (part[lev]) - list = merge(priv, cmp, part[lev], list); - - merge_and_restore_back_links(priv, cmp, head, part[max_lev], list); + /* The final merge, rebuilding prev links */ + merge_final(priv, (cmp_func)cmp, head, pending, list); } EXPORT_SYMBOL(list_sort); -- cgit From b5c56e0cdd62979dd538e5363b06be5bdf735a09 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Tue, 14 May 2019 15:43:02 -0700 Subject: lib/list_sort: optimize number of calls to comparison function CONFIG_RETPOLINE has severely degraded indirect function call performance, so it's worth putting some effort into reducing the number of times cmp() is called. This patch avoids badly unbalanced merges on unlucky input sizes. It slightly increases the code size, but saves an average of 0.2*n calls to cmp(). x86-64 code size 739 -> 803 bytes (+64) Unfortunately, there's not a lot of low-hanging fruit in a merge sort; it already performs only n*log2(n) - K*n + O(1) compares. The leading coefficient is already at the theoretical limit (log2(n!) corresponds to K=1.4427), so we're fighting over the linear term, and the best mergesort can do is K=1.2645, achieved when n is a power of 2. The differences between mergesort variants appear when n is *not* a power of 2; K is a function of the fractional part of log2(n). Top-down mergesort does best of all, achieving a minimum K=1.2408, and an average (over all sizes) K=1.248. However, that requires knowing the number of entries to be sorted ahead of time, and making a full pass over the input to count it conflicts with a second performance goal, which is cache blocking. Obviously, we have to read the entire list into L1 cache at some point, and performance is best if it fits. But if it doesn't fit, each full pass over the input causes a cache miss per element, which is undesirable. While textbooks explain bottom-up mergesort as a succession of merging passes, practical implementations do merging in depth-first order: as soon as two lists of the same size are available, they are merged. This allows as many merge passes as possible to fit into L1; only the final few merges force cache misses. This cache-friendly depth-first merge order depends on us merging the beginning of the input as much as possible before we've even seen the end of the input (and thus know its size). The simple eager merge pattern causes bad performance when n is just over a power of 2. If n=1028, the final merge is between 1024- and 4-element lists, which is wasteful of comparisons. (This is actually worse on average than n=1025, because a 1204:1 merge will, on average, end after 512 compares, while 1024:4 will walk 4/5 of the list.) Because of this, bottom-up mergesort achieves K < 0.5 for such sizes, and has an average (over all sizes) K of around 1. (My experiments show K=1.01, while theory predicts K=0.965.) There are "worst-case optimal" variants of bottom-up mergesort which avoid this bad performance, but the algorithms given in the literature, such as queue-mergesort and boustrodephonic mergesort, depend on the breadth-first multi-pass structure that we are trying to avoid. This implementation is as eager as possible while ensuring that all merge passes are at worst 1:2 unbalanced. This achieves the same average K=1.207 as queue-mergesort, which is 0.2*n better then bottom-up, and only 0.04*n behind top-down mergesort. Specifically, defers merging two lists of size 2^k until it is known that there are 2^k additional inputs following. This ensures that the final uneven merges triggered by reaching the end of the input will be at worst 2:1. This will avoid cache misses as long as 3*2^k elements fit into the cache. (I confess to being more than a little bit proud of how clean this code turned out. It took a lot of thinking, but the resultant inner loop is very simple and efficient.) Refs: Bottom-up Mergesort: A Detailed Analysis Wolfgang Panny, Helmut Prodinger Algorithmica 14(4):340--354, October 1995 https://doi.org/10.1007/BF01294131 https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.6.5260 The cost distribution of queue-mergesort, optimal mergesorts, and power-of-two rules Wei-Mei Chen, Hsien-Kuei Hwang, Gen-Huey Chen Journal of Algorithms 30(2); Pages 423--448, February 1999 https://doi.org/10.1006/jagm.1998.0986 https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.5380 Queue-Mergesort Mordecai J. Golin, Robert Sedgewick Information Processing Letters, 48(5):253--259, 10 December 1993 https://doi.org/10.1016/0020-0190(93)90088-q https://sci-hub.tw/10.1016/0020-0190(93)90088-Q Feedback from Rasmus Villemoes . Link: http://lkml.kernel.org/r/fd560853cc4dca0d0f02184ffa888b4c1be89abc.1552704200.git.lkml@sdf.org Signed-off-by: George Spelvin Acked-by: Andrey Abramov Acked-by: Rasmus Villemoes Reviewed-by: Andy Shevchenko Cc: Daniel Wagner Cc: Dave Chinner Cc: Don Mullis Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/list_sort.c | 113 +++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 91 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/list_sort.c b/lib/list_sort.c index ba9431bcac0b..06e900c5587b 100644 --- a/lib/list_sort.c +++ b/lib/list_sort.c @@ -107,11 +107,6 @@ static void merge_final(void *priv, cmp_func cmp, struct list_head *head, * @head: the list to sort * @cmp: the elements comparison function * - * This function implements a bottom-up merge sort, which has O(nlog(n)) - * complexity. We use depth-first order to take advantage of cacheing. - * (E.g. when we get to the fourth element, we immediately merge the - * first two 2-element lists.) - * * The comparison funtion @cmp must return > 0 if @a should sort after * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should * sort before @b *or* their original order should be preserved. It is @@ -131,6 +126,60 @@ static void merge_final(void *priv, cmp_func cmp, struct list_head *head, * if (a->middle != b->middle) * return a->middle > b->middle; * return a->low > b->low; + * + * + * This mergesort is as eager as possible while always performing at least + * 2:1 balanced merges. Given two pending sublists of size 2^k, they are + * merged to a size-2^(k+1) list as soon as we have 2^k following elements. + * + * Thus, it will avoid cache thrashing as long as 3*2^k elements can + * fit into the cache. Not quite as good as a fully-eager bottom-up + * mergesort, but it does use 0.2*n fewer comparisons, so is faster in + * the common case that everything fits into L1. + * + * + * The merging is controlled by "count", the number of elements in the + * pending lists. This is beautiully simple code, but rather subtle. + * + * Each time we increment "count", we set one bit (bit k) and clear + * bits k-1 .. 0. Each time this happens (except the very first time + * for each bit, when count increments to 2^k), we merge two lists of + * size 2^k into one list of size 2^(k+1). + * + * This merge happens exactly when the count reaches an odd multiple of + * 2^k, which is when we have 2^k elements pending in smaller lists, + * so it's safe to merge away two lists of size 2^k. + * + * After this happens twice, we have created two lists of size 2^(k+1), + * which will be merged into a list of size 2^(k+2) before we create + * a third list of size 2^(k+1), so there are never more than two pending. + * + * The number of pending lists of size 2^k is determined by the + * state of bit k of "count" plus two extra pieces of information: + * - The state of bit k-1 (when k == 0, consider bit -1 always set), and + * - Whether the higher-order bits are zero or non-zero (i.e. + * is count >= 2^(k+1)). + * There are six states we distinguish. "x" represents some arbitrary + * bits, and "y" represents some arbitrary non-zero bits: + * 0: 00x: 0 pending of size 2^k; x pending of sizes < 2^k + * 1: 01x: 0 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k + * 2: x10x: 0 pending of size 2^k; 2^k + x pending of sizes < 2^k + * 3: x11x: 1 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k + * 4: y00x: 1 pending of size 2^k; 2^k + x pending of sizes < 2^k + * 5: y01x: 2 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k + * (merge and loop back to state 2) + * + * We gain lists of size 2^k in the 2->3 and 4->5 transitions (because + * bit k-1 is set while the more significant bits are non-zero) and + * merge them away in the 5->2 transition. Note in particular that just + * before the 5->2 transition, all lower-order bits are 11 (state 3), + * so there is one list of each smaller size. + * + * When we reach the end of the input, we merge all the pending + * lists, from smallest to largest. If you work through cases 2 to + * 5 above, you can see that the number of elements we merge with a list + * of size 2^k varies from 2^(k-1) (cases 3 and 5 when x == 0) to + * 2^(k+1) - 1 (second merge of case 5 when x == 2^(k-1) - 1). */ __attribute__((nonnull(2,3))) void list_sort(void *priv, struct list_head *head, @@ -152,33 +201,53 @@ void list_sort(void *priv, struct list_head *head, * pointers are not maintained. * - pending is a prev-linked "list of lists" of sorted * sublists awaiting further merging. - * - Each of the sorted sublists is power-of-two in size, - * corresponding to bits set in "count". + * - Each of the sorted sublists is power-of-two in size. * - Sublists are sorted by size and age, smallest & newest at front. + * - There are zero to two sublists of each size. + * - A pair of pending sublists are merged as soon as the number + * of following pending elements equals their size (i.e. + * each time count reaches an odd multiple of that size). + * That ensures each later final merge will be at worst 2:1. + * - Each round consists of: + * - Merging the two sublists selected by the highest bit + * which flips when count is incremented, and + * - Adding an element from the input as a size-1 sublist. */ do { size_t bits; - struct list_head *cur = list; + struct list_head **tail = &pending; - /* Extract the head of "list" as a single-element list "cur" */ - list = list->next; - cur->next = NULL; + /* Find the least-significant clear bit in count */ + for (bits = count; bits & 1; bits >>= 1) + tail = &(*tail)->prev; + /* Do the indicated merge */ + if (likely(bits)) { + struct list_head *a = *tail, *b = a->prev; - /* Do merges corresponding to set lsbits in count */ - for (bits = count; bits & 1; bits >>= 1) { - cur = merge(priv, (cmp_func)cmp, pending, cur); - pending = pending->prev; /* Untouched by merge() */ + a = merge(priv, (cmp_func)cmp, b, a); + /* Install the merged result in place of the inputs */ + a->prev = b->prev; + *tail = a; } - /* And place the result at the head of "pending" */ - cur->prev = pending; - pending = cur; + + /* Move one element from input list to pending */ + list->prev = pending; + pending = list; + list = list->next; + pending->next = NULL; count++; - } while (list->next); + } while (list); + + /* End of input; merge together all the pending lists. */ + list = pending; + pending = pending->prev; + for (;;) { + struct list_head *next = pending->prev; - /* Now merge together last element with all pending lists */ - while (pending->prev) { + if (!next) + break; list = merge(priv, (cmp_func)cmp, pending, list); - pending = pending->prev; + pending = next; } /* The final merge, rebuilding prev links */ merge_final(priv, (cmp_func)cmp, head, pending, list); -- cgit From 2c64e9cb0b6b858901e9a386860d7d929d1cbaeb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 14 May 2019 15:43:05 -0700 Subject: lib: Move mathematic helpers to separate folder For better maintenance and expansion move the mathematic helpers to the separate folder. No functional change intended. Note, the int_sqrt() is not used as a part of lib, so, moved to regular obj. Link: http://lkml.kernel.org/r/20190323172531.80025-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Mauro Carvalho Chehab Cc: Randy Dunlap Cc: Thierry Reding Cc: Lee Jones Cc: Daniel Thompson Cc: Ray Jui [mchehab+samsung@kernel.org: fix broken doc references for div64.c and gcd.c] Link: http://lkml.kernel.org/r/734f49bae5d4052b3c25691dfefad59bea2e5843.1555580999.git.mchehab+samsung@kernel.org Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig | 14 +-- lib/Makefile | 15 +-- lib/cordic.c | 92 -------------- lib/div64.c | 192 ---------------------------- lib/gcd.c | 84 ------------- lib/int_sqrt.c | 70 ----------- lib/lcm.c | 25 ---- lib/math/Kconfig | 11 ++ lib/math/Makefile | 5 + lib/math/cordic.c | 92 ++++++++++++++ lib/math/div64.c | 192 ++++++++++++++++++++++++++++ lib/math/gcd.c | 84 +++++++++++++ lib/math/int_sqrt.c | 70 +++++++++++ lib/math/lcm.c | 25 ++++ lib/math/prime_numbers.c | 315 ++++++++++++++++++++++++++++++++++++++++++++++ lib/math/rational.c | 65 ++++++++++ lib/math/reciprocal_div.c | 69 ++++++++++ lib/prime_numbers.c | 315 ---------------------------------------------- lib/rational.c | 65 ---------- lib/reciprocal_div.c | 69 ---------- 20 files changed, 936 insertions(+), 933 deletions(-) delete mode 100644 lib/cordic.c delete mode 100644 lib/div64.c delete mode 100644 lib/gcd.c delete mode 100644 lib/int_sqrt.c delete mode 100644 lib/lcm.c create mode 100644 lib/math/Kconfig create mode 100644 lib/math/Makefile create mode 100644 lib/math/cordic.c create mode 100644 lib/math/div64.c create mode 100644 lib/math/gcd.c create mode 100644 lib/math/int_sqrt.c create mode 100644 lib/math/lcm.c create mode 100644 lib/math/prime_numbers.c create mode 100644 lib/math/rational.c create mode 100644 lib/math/reciprocal_div.c delete mode 100644 lib/prime_numbers.c delete mode 100644 lib/rational.c delete mode 100644 lib/reciprocal_div.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index f323b85ad11c..3577609b61be 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -46,9 +46,6 @@ config HAVE_ARCH_BITREVERSE This option enables the use of hardware bit-reversal instructions on architectures which support such operations. -config RATIONAL - bool - config GENERIC_STRNCPY_FROM_USER bool @@ -61,6 +58,8 @@ config GENERIC_NET_UTILS config GENERIC_FIND_FIRST_BIT bool +source "lib/math/Kconfig" + config NO_GENERIC_PCI_IOPORT_MAP bool @@ -531,12 +530,6 @@ config LRU_CACHE config CLZ_TAB bool -config CORDIC - tristate "CORDIC algorithm" - help - This option provides an implementation of the CORDIC algorithm; - calculations are in fixed point. Module will be called cordic. - config DDR bool "JEDEC DDR data" help @@ -628,9 +621,6 @@ config SBITMAP config PARMAN tristate "parman" if COMPILE_TEST -config PRIME_NUMBERS - tristate - config STRING_SELFTEST tristate "Test string functions" diff --git a/lib/Makefile b/lib/Makefile index 83d7df2661ff..fb7697031a79 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -30,7 +30,7 @@ endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o timerqueue.o xarray.o \ - idr.o int_sqrt.o extable.o \ + idr.o extable.o \ sha1.o chacha.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ @@ -44,11 +44,11 @@ lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o klist.o obj-y += lockref.o -obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ +obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \ bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ - gcd.o lcm.o list_sort.o uuid.o iov_iter.o clz_ctz.o \ + list_sort.o uuid.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o rhashtable.o reciprocal_div.o \ + percpu-refcount.o rhashtable.o \ once.o refcount.o usercopy.o errseq.o bucket_locks.o \ generic-radix-tree.o obj-$(CONFIG_STRING_SELFTEST) += test_string.o @@ -102,6 +102,8 @@ endif obj-$(CONFIG_DEBUG_INFO_REDUCED) += debug_info.o CFLAGS_debug_info.o += $(call cc-option, -femit-struct-debug-detailed=any) +obj-y += math/ + obj-$(CONFIG_GENERIC_IOMAP) += iomap.o obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o @@ -121,7 +123,6 @@ obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o obj-$(CONFIG_BITREVERSE) += bitrev.o obj-$(CONFIG_PACKING) += packing.o -obj-$(CONFIG_RATIONAL) += rational.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o obj-$(CONFIG_CRC16) += crc16.o obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o @@ -195,8 +196,6 @@ obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o -obj-$(CONFIG_CORDIC) += cordic.o - obj-$(CONFIG_DQL) += dynamic_queue_limits.o obj-$(CONFIG_GLOB) += glob.o @@ -238,8 +237,6 @@ obj-$(CONFIG_ASN1) += asn1_decoder.o obj-$(CONFIG_FONT_SUPPORT) += fonts/ -obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o - hostprogs-y := gen_crc32table hostprogs-y += gen_crc64table clean-files := crc32table.h diff --git a/lib/cordic.c b/lib/cordic.c deleted file mode 100644 index 8ef27c12956f..000000000000 --- a/lib/cordic.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2011 Broadcom Corporation - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#include -#include - -static const s32 arctan_table[] = { - 2949120, - 1740967, - 919879, - 466945, - 234379, - 117304, - 58666, - 29335, - 14668, - 7334, - 3667, - 1833, - 917, - 458, - 229, - 115, - 57, - 29 -}; - -/* - * cordic_calc_iq() - calculates the i/q coordinate for given angle - * - * theta: angle in degrees for which i/q coordinate is to be calculated - * coord: function output parameter holding the i/q coordinate - */ -struct cordic_iq cordic_calc_iq(s32 theta) -{ - struct cordic_iq coord; - s32 angle, valtmp; - unsigned iter; - int signx = 1; - int signtheta; - - coord.i = CORDIC_ANGLE_GEN; - coord.q = 0; - angle = 0; - - theta = CORDIC_FIXED(theta); - signtheta = (theta < 0) ? -1 : 1; - theta = ((theta + CORDIC_FIXED(180) * signtheta) % CORDIC_FIXED(360)) - - CORDIC_FIXED(180) * signtheta; - - if (CORDIC_FLOAT(theta) > 90) { - theta -= CORDIC_FIXED(180); - signx = -1; - } else if (CORDIC_FLOAT(theta) < -90) { - theta += CORDIC_FIXED(180); - signx = -1; - } - - for (iter = 0; iter < CORDIC_NUM_ITER; iter++) { - if (theta > angle) { - valtmp = coord.i - (coord.q >> iter); - coord.q += (coord.i >> iter); - angle += arctan_table[iter]; - } else { - valtmp = coord.i + (coord.q >> iter); - coord.q -= (coord.i >> iter); - angle -= arctan_table[iter]; - } - coord.i = valtmp; - } - - coord.i *= signx; - coord.q *= signx; - return coord; -} -EXPORT_SYMBOL(cordic_calc_iq); - -MODULE_DESCRIPTION("CORDIC algorithm"); -MODULE_AUTHOR("Broadcom Corporation"); -MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/div64.c b/lib/div64.c deleted file mode 100644 index ee146bb4c558..000000000000 --- a/lib/div64.c +++ /dev/null @@ -1,192 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2003 Bernardo Innocenti - * - * Based on former do_div() implementation from asm-parisc/div64.h: - * Copyright (C) 1999 Hewlett-Packard Co - * Copyright (C) 1999 David Mosberger-Tang - * - * - * Generic C version of 64bit/32bit division and modulo, with - * 64bit result and 32bit remainder. - * - * The fast case for (n>>32 == 0) is handled inline by do_div(). - * - * Code generated for this function might be very inefficient - * for some CPUs. __div64_32() can be overridden by linking arch-specific - * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S - * or by defining a preprocessor macro in arch/include/asm/div64.h. - */ - -#include -#include -#include - -/* Not needed on 64bit architectures */ -#if BITS_PER_LONG == 32 - -#ifndef __div64_32 -uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base) -{ - uint64_t rem = *n; - uint64_t b = base; - uint64_t res, d = 1; - uint32_t high = rem >> 32; - - /* Reduce the thing a bit first */ - res = 0; - if (high >= base) { - high /= base; - res = (uint64_t) high << 32; - rem -= (uint64_t) (high*base) << 32; - } - - while ((int64_t)b > 0 && b < rem) { - b = b+b; - d = d+d; - } - - do { - if (rem >= b) { - rem -= b; - res += d; - } - b >>= 1; - d >>= 1; - } while (d); - - *n = res; - return rem; -} -EXPORT_SYMBOL(__div64_32); -#endif - -/** - * div_s64_rem - signed 64bit divide with 64bit divisor and remainder - * @dividend: 64bit dividend - * @divisor: 64bit divisor - * @remainder: 64bit remainder - */ -#ifndef div_s64_rem -s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) -{ - u64 quotient; - - if (dividend < 0) { - quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder); - *remainder = -*remainder; - if (divisor > 0) - quotient = -quotient; - } else { - quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder); - if (divisor < 0) - quotient = -quotient; - } - return quotient; -} -EXPORT_SYMBOL(div_s64_rem); -#endif - -/** - * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder - * @dividend: 64bit dividend - * @divisor: 64bit divisor - * @remainder: 64bit remainder - * - * This implementation is a comparable to algorithm used by div64_u64. - * But this operation, which includes math for calculating the remainder, - * is kept distinct to avoid slowing down the div64_u64 operation on 32bit - * systems. - */ -#ifndef div64_u64_rem -u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) -{ - u32 high = divisor >> 32; - u64 quot; - - if (high == 0) { - u32 rem32; - quot = div_u64_rem(dividend, divisor, &rem32); - *remainder = rem32; - } else { - int n = fls(high); - quot = div_u64(dividend >> n, divisor >> n); - - if (quot != 0) - quot--; - - *remainder = dividend - quot * divisor; - if (*remainder >= divisor) { - quot++; - *remainder -= divisor; - } - } - - return quot; -} -EXPORT_SYMBOL(div64_u64_rem); -#endif - -/** - * div64_u64 - unsigned 64bit divide with 64bit divisor - * @dividend: 64bit dividend - * @divisor: 64bit divisor - * - * This implementation is a modified version of the algorithm proposed - * by the book 'Hacker's Delight'. The original source and full proof - * can be found here and is available for use without restriction. - * - * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt' - */ -#ifndef div64_u64 -u64 div64_u64(u64 dividend, u64 divisor) -{ - u32 high = divisor >> 32; - u64 quot; - - if (high == 0) { - quot = div_u64(dividend, divisor); - } else { - int n = fls(high); - quot = div_u64(dividend >> n, divisor >> n); - - if (quot != 0) - quot--; - if ((dividend - quot * divisor) >= divisor) - quot++; - } - - return quot; -} -EXPORT_SYMBOL(div64_u64); -#endif - -/** - * div64_s64 - signed 64bit divide with 64bit divisor - * @dividend: 64bit dividend - * @divisor: 64bit divisor - */ -#ifndef div64_s64 -s64 div64_s64(s64 dividend, s64 divisor) -{ - s64 quot, t; - - quot = div64_u64(abs(dividend), abs(divisor)); - t = (dividend ^ divisor) >> 63; - - return (quot ^ t) - t; -} -EXPORT_SYMBOL(div64_s64); -#endif - -#endif /* BITS_PER_LONG == 32 */ - -/* - * Iterative div/mod for use when dividend is not expected to be much - * bigger than divisor. - */ -u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) -{ - return __iter_div_u64_rem(dividend, divisor, remainder); -} -EXPORT_SYMBOL(iter_div_u64_rem); diff --git a/lib/gcd.c b/lib/gcd.c deleted file mode 100644 index 7948ab27f0a4..000000000000 --- a/lib/gcd.c +++ /dev/null @@ -1,84 +0,0 @@ -#include -#include -#include - -/* - * This implements the binary GCD algorithm. (Often attributed to Stein, - * but as Knuth has noted, appears in a first-century Chinese math text.) - * - * This is faster than the division-based algorithm even on x86, which - * has decent hardware division. - */ - -#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) - -/* If __ffs is available, the even/odd algorithm benchmarks slower. */ - -/** - * gcd - calculate and return the greatest common divisor of 2 unsigned longs - * @a: first value - * @b: second value - */ -unsigned long gcd(unsigned long a, unsigned long b) -{ - unsigned long r = a | b; - - if (!a || !b) - return r; - - b >>= __ffs(b); - if (b == 1) - return r & -r; - - for (;;) { - a >>= __ffs(a); - if (a == 1) - return r & -r; - if (a == b) - return a << __ffs(r); - - if (a < b) - swap(a, b); - a -= b; - } -} - -#else - -/* If normalization is done by loops, the even/odd algorithm is a win. */ -unsigned long gcd(unsigned long a, unsigned long b) -{ - unsigned long r = a | b; - - if (!a || !b) - return r; - - /* Isolate lsbit of r */ - r &= -r; - - while (!(b & r)) - b >>= 1; - if (b == r) - return r; - - for (;;) { - while (!(a & r)) - a >>= 1; - if (a == r) - return r; - if (a == b) - return a; - - if (a < b) - swap(a, b); - a -= b; - a >>= 1; - if (a & r) - a += b; - a >>= 1; - } -} - -#endif - -EXPORT_SYMBOL_GPL(gcd); diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c deleted file mode 100644 index 30e0f9770f88..000000000000 --- a/lib/int_sqrt.c +++ /dev/null @@ -1,70 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2013 Davidlohr Bueso - * - * Based on the shift-and-subtract algorithm for computing integer - * square root from Guy L. Steele. - */ - -#include -#include -#include - -/** - * int_sqrt - computes the integer square root - * @x: integer of which to calculate the sqrt - * - * Computes: floor(sqrt(x)) - */ -unsigned long int_sqrt(unsigned long x) -{ - unsigned long b, m, y = 0; - - if (x <= 1) - return x; - - m = 1UL << (__fls(x) & ~1UL); - while (m != 0) { - b = y + m; - y >>= 1; - - if (x >= b) { - x -= b; - y += m; - } - m >>= 2; - } - - return y; -} -EXPORT_SYMBOL(int_sqrt); - -#if BITS_PER_LONG < 64 -/** - * int_sqrt64 - strongly typed int_sqrt function when minimum 64 bit input - * is expected. - * @x: 64bit integer of which to calculate the sqrt - */ -u32 int_sqrt64(u64 x) -{ - u64 b, m, y = 0; - - if (x <= ULONG_MAX) - return int_sqrt((unsigned long) x); - - m = 1ULL << ((fls64(x) - 1) & ~1ULL); - while (m != 0) { - b = y + m; - y >>= 1; - - if (x >= b) { - x -= b; - y += m; - } - m >>= 2; - } - - return y; -} -EXPORT_SYMBOL(int_sqrt64); -#endif diff --git a/lib/lcm.c b/lib/lcm.c deleted file mode 100644 index 03d7fcb420b5..000000000000 --- a/lib/lcm.c +++ /dev/null @@ -1,25 +0,0 @@ -#include -#include -#include -#include - -/* Lowest common multiple */ -unsigned long lcm(unsigned long a, unsigned long b) -{ - if (a && b) - return (a / gcd(a, b)) * b; - else - return 0; -} -EXPORT_SYMBOL_GPL(lcm); - -unsigned long lcm_not_zero(unsigned long a, unsigned long b) -{ - unsigned long l = lcm(a, b); - - if (l) - return l; - - return (b ? : a); -} -EXPORT_SYMBOL_GPL(lcm_not_zero); diff --git a/lib/math/Kconfig b/lib/math/Kconfig new file mode 100644 index 000000000000..73bdf37178d1 --- /dev/null +++ b/lib/math/Kconfig @@ -0,0 +1,11 @@ +config CORDIC + tristate "CORDIC algorithm" + help + This option provides an implementation of the CORDIC algorithm; + calculations are in fixed point. Module will be called cordic. + +config PRIME_NUMBERS + tristate + +config RATIONAL + bool diff --git a/lib/math/Makefile b/lib/math/Makefile new file mode 100644 index 000000000000..b75878420da6 --- /dev/null +++ b/lib/math/Makefile @@ -0,0 +1,5 @@ +obj-y += div64.o gcd.o lcm.o int_sqrt.o reciprocal_div.o + +obj-$(CONFIG_CORDIC) += cordic.o +obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o +obj-$(CONFIG_RATIONAL) += rational.o diff --git a/lib/math/cordic.c b/lib/math/cordic.c new file mode 100644 index 000000000000..8ef27c12956f --- /dev/null +++ b/lib/math/cordic.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2011 Broadcom Corporation + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include +#include + +static const s32 arctan_table[] = { + 2949120, + 1740967, + 919879, + 466945, + 234379, + 117304, + 58666, + 29335, + 14668, + 7334, + 3667, + 1833, + 917, + 458, + 229, + 115, + 57, + 29 +}; + +/* + * cordic_calc_iq() - calculates the i/q coordinate for given angle + * + * theta: angle in degrees for which i/q coordinate is to be calculated + * coord: function output parameter holding the i/q coordinate + */ +struct cordic_iq cordic_calc_iq(s32 theta) +{ + struct cordic_iq coord; + s32 angle, valtmp; + unsigned iter; + int signx = 1; + int signtheta; + + coord.i = CORDIC_ANGLE_GEN; + coord.q = 0; + angle = 0; + + theta = CORDIC_FIXED(theta); + signtheta = (theta < 0) ? -1 : 1; + theta = ((theta + CORDIC_FIXED(180) * signtheta) % CORDIC_FIXED(360)) - + CORDIC_FIXED(180) * signtheta; + + if (CORDIC_FLOAT(theta) > 90) { + theta -= CORDIC_FIXED(180); + signx = -1; + } else if (CORDIC_FLOAT(theta) < -90) { + theta += CORDIC_FIXED(180); + signx = -1; + } + + for (iter = 0; iter < CORDIC_NUM_ITER; iter++) { + if (theta > angle) { + valtmp = coord.i - (coord.q >> iter); + coord.q += (coord.i >> iter); + angle += arctan_table[iter]; + } else { + valtmp = coord.i + (coord.q >> iter); + coord.q -= (coord.i >> iter); + angle -= arctan_table[iter]; + } + coord.i = valtmp; + } + + coord.i *= signx; + coord.q *= signx; + return coord; +} +EXPORT_SYMBOL(cordic_calc_iq); + +MODULE_DESCRIPTION("CORDIC algorithm"); +MODULE_AUTHOR("Broadcom Corporation"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/math/div64.c b/lib/math/div64.c new file mode 100644 index 000000000000..368ca7fd0d82 --- /dev/null +++ b/lib/math/div64.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2003 Bernardo Innocenti + * + * Based on former do_div() implementation from asm-parisc/div64.h: + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang + * + * + * Generic C version of 64bit/32bit division and modulo, with + * 64bit result and 32bit remainder. + * + * The fast case for (n>>32 == 0) is handled inline by do_div(). + * + * Code generated for this function might be very inefficient + * for some CPUs. __div64_32() can be overridden by linking arch-specific + * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S + * or by defining a preprocessor macro in arch/include/asm/div64.h. + */ + +#include +#include +#include + +/* Not needed on 64bit architectures */ +#if BITS_PER_LONG == 32 + +#ifndef __div64_32 +uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base) +{ + uint64_t rem = *n; + uint64_t b = base; + uint64_t res, d = 1; + uint32_t high = rem >> 32; + + /* Reduce the thing a bit first */ + res = 0; + if (high >= base) { + high /= base; + res = (uint64_t) high << 32; + rem -= (uint64_t) (high*base) << 32; + } + + while ((int64_t)b > 0 && b < rem) { + b = b+b; + d = d+d; + } + + do { + if (rem >= b) { + rem -= b; + res += d; + } + b >>= 1; + d >>= 1; + } while (d); + + *n = res; + return rem; +} +EXPORT_SYMBOL(__div64_32); +#endif + +/** + * div_s64_rem - signed 64bit divide with 64bit divisor and remainder + * @dividend: 64bit dividend + * @divisor: 64bit divisor + * @remainder: 64bit remainder + */ +#ifndef div_s64_rem +s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) +{ + u64 quotient; + + if (dividend < 0) { + quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder); + *remainder = -*remainder; + if (divisor > 0) + quotient = -quotient; + } else { + quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder); + if (divisor < 0) + quotient = -quotient; + } + return quotient; +} +EXPORT_SYMBOL(div_s64_rem); +#endif + +/** + * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder + * @dividend: 64bit dividend + * @divisor: 64bit divisor + * @remainder: 64bit remainder + * + * This implementation is a comparable to algorithm used by div64_u64. + * But this operation, which includes math for calculating the remainder, + * is kept distinct to avoid slowing down the div64_u64 operation on 32bit + * systems. + */ +#ifndef div64_u64_rem +u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) +{ + u32 high = divisor >> 32; + u64 quot; + + if (high == 0) { + u32 rem32; + quot = div_u64_rem(dividend, divisor, &rem32); + *remainder = rem32; + } else { + int n = fls(high); + quot = div_u64(dividend >> n, divisor >> n); + + if (quot != 0) + quot--; + + *remainder = dividend - quot * divisor; + if (*remainder >= divisor) { + quot++; + *remainder -= divisor; + } + } + + return quot; +} +EXPORT_SYMBOL(div64_u64_rem); +#endif + +/** + * div64_u64 - unsigned 64bit divide with 64bit divisor + * @dividend: 64bit dividend + * @divisor: 64bit divisor + * + * This implementation is a modified version of the algorithm proposed + * by the book 'Hacker's Delight'. The original source and full proof + * can be found here and is available for use without restriction. + * + * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt' + */ +#ifndef div64_u64 +u64 div64_u64(u64 dividend, u64 divisor) +{ + u32 high = divisor >> 32; + u64 quot; + + if (high == 0) { + quot = div_u64(dividend, divisor); + } else { + int n = fls(high); + quot = div_u64(dividend >> n, divisor >> n); + + if (quot != 0) + quot--; + if ((dividend - quot * divisor) >= divisor) + quot++; + } + + return quot; +} +EXPORT_SYMBOL(div64_u64); +#endif + +/** + * div64_s64 - signed 64bit divide with 64bit divisor + * @dividend: 64bit dividend + * @divisor: 64bit divisor + */ +#ifndef div64_s64 +s64 div64_s64(s64 dividend, s64 divisor) +{ + s64 quot, t; + + quot = div64_u64(abs(dividend), abs(divisor)); + t = (dividend ^ divisor) >> 63; + + return (quot ^ t) - t; +} +EXPORT_SYMBOL(div64_s64); +#endif + +#endif /* BITS_PER_LONG == 32 */ + +/* + * Iterative div/mod for use when dividend is not expected to be much + * bigger than divisor. + */ +u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) +{ + return __iter_div_u64_rem(dividend, divisor, remainder); +} +EXPORT_SYMBOL(iter_div_u64_rem); diff --git a/lib/math/gcd.c b/lib/math/gcd.c new file mode 100644 index 000000000000..7948ab27f0a4 --- /dev/null +++ b/lib/math/gcd.c @@ -0,0 +1,84 @@ +#include +#include +#include + +/* + * This implements the binary GCD algorithm. (Often attributed to Stein, + * but as Knuth has noted, appears in a first-century Chinese math text.) + * + * This is faster than the division-based algorithm even on x86, which + * has decent hardware division. + */ + +#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) + +/* If __ffs is available, the even/odd algorithm benchmarks slower. */ + +/** + * gcd - calculate and return the greatest common divisor of 2 unsigned longs + * @a: first value + * @b: second value + */ +unsigned long gcd(unsigned long a, unsigned long b) +{ + unsigned long r = a | b; + + if (!a || !b) + return r; + + b >>= __ffs(b); + if (b == 1) + return r & -r; + + for (;;) { + a >>= __ffs(a); + if (a == 1) + return r & -r; + if (a == b) + return a << __ffs(r); + + if (a < b) + swap(a, b); + a -= b; + } +} + +#else + +/* If normalization is done by loops, the even/odd algorithm is a win. */ +unsigned long gcd(unsigned long a, unsigned long b) +{ + unsigned long r = a | b; + + if (!a || !b) + return r; + + /* Isolate lsbit of r */ + r &= -r; + + while (!(b & r)) + b >>= 1; + if (b == r) + return r; + + for (;;) { + while (!(a & r)) + a >>= 1; + if (a == r) + return r; + if (a == b) + return a; + + if (a < b) + swap(a, b); + a -= b; + a >>= 1; + if (a & r) + a += b; + a >>= 1; + } +} + +#endif + +EXPORT_SYMBOL_GPL(gcd); diff --git a/lib/math/int_sqrt.c b/lib/math/int_sqrt.c new file mode 100644 index 000000000000..30e0f9770f88 --- /dev/null +++ b/lib/math/int_sqrt.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2013 Davidlohr Bueso + * + * Based on the shift-and-subtract algorithm for computing integer + * square root from Guy L. Steele. + */ + +#include +#include +#include + +/** + * int_sqrt - computes the integer square root + * @x: integer of which to calculate the sqrt + * + * Computes: floor(sqrt(x)) + */ +unsigned long int_sqrt(unsigned long x) +{ + unsigned long b, m, y = 0; + + if (x <= 1) + return x; + + m = 1UL << (__fls(x) & ~1UL); + while (m != 0) { + b = y + m; + y >>= 1; + + if (x >= b) { + x -= b; + y += m; + } + m >>= 2; + } + + return y; +} +EXPORT_SYMBOL(int_sqrt); + +#if BITS_PER_LONG < 64 +/** + * int_sqrt64 - strongly typed int_sqrt function when minimum 64 bit input + * is expected. + * @x: 64bit integer of which to calculate the sqrt + */ +u32 int_sqrt64(u64 x) +{ + u64 b, m, y = 0; + + if (x <= ULONG_MAX) + return int_sqrt((unsigned long) x); + + m = 1ULL << ((fls64(x) - 1) & ~1ULL); + while (m != 0) { + b = y + m; + y >>= 1; + + if (x >= b) { + x -= b; + y += m; + } + m >>= 2; + } + + return y; +} +EXPORT_SYMBOL(int_sqrt64); +#endif diff --git a/lib/math/lcm.c b/lib/math/lcm.c new file mode 100644 index 000000000000..03d7fcb420b5 --- /dev/null +++ b/lib/math/lcm.c @@ -0,0 +1,25 @@ +#include +#include +#include +#include + +/* Lowest common multiple */ +unsigned long lcm(unsigned long a, unsigned long b) +{ + if (a && b) + return (a / gcd(a, b)) * b; + else + return 0; +} +EXPORT_SYMBOL_GPL(lcm); + +unsigned long lcm_not_zero(unsigned long a, unsigned long b) +{ + unsigned long l = lcm(a, b); + + if (l) + return l; + + return (b ? : a); +} +EXPORT_SYMBOL_GPL(lcm_not_zero); diff --git a/lib/math/prime_numbers.c b/lib/math/prime_numbers.c new file mode 100644 index 000000000000..550eec457c2e --- /dev/null +++ b/lib/math/prime_numbers.c @@ -0,0 +1,315 @@ +#define pr_fmt(fmt) "prime numbers: " fmt "\n" + +#include +#include +#include +#include + +#define bitmap_size(nbits) (BITS_TO_LONGS(nbits) * sizeof(unsigned long)) + +struct primes { + struct rcu_head rcu; + unsigned long last, sz; + unsigned long primes[]; +}; + +#if BITS_PER_LONG == 64 +static const struct primes small_primes = { + .last = 61, + .sz = 64, + .primes = { + BIT(2) | + BIT(3) | + BIT(5) | + BIT(7) | + BIT(11) | + BIT(13) | + BIT(17) | + BIT(19) | + BIT(23) | + BIT(29) | + BIT(31) | + BIT(37) | + BIT(41) | + BIT(43) | + BIT(47) | + BIT(53) | + BIT(59) | + BIT(61) + } +}; +#elif BITS_PER_LONG == 32 +static const struct primes small_primes = { + .last = 31, + .sz = 32, + .primes = { + BIT(2) | + BIT(3) | + BIT(5) | + BIT(7) | + BIT(11) | + BIT(13) | + BIT(17) | + BIT(19) | + BIT(23) | + BIT(29) | + BIT(31) + } +}; +#else +#error "unhandled BITS_PER_LONG" +#endif + +static DEFINE_MUTEX(lock); +static const struct primes __rcu *primes = RCU_INITIALIZER(&small_primes); + +static unsigned long selftest_max; + +static bool slow_is_prime_number(unsigned long x) +{ + unsigned long y = int_sqrt(x); + + while (y > 1) { + if ((x % y) == 0) + break; + y--; + } + + return y == 1; +} + +static unsigned long slow_next_prime_number(unsigned long x) +{ + while (x < ULONG_MAX && !slow_is_prime_number(++x)) + ; + + return x; +} + +static unsigned long clear_multiples(unsigned long x, + unsigned long *p, + unsigned long start, + unsigned long end) +{ + unsigned long m; + + m = 2 * x; + if (m < start) + m = roundup(start, x); + + while (m < end) { + __clear_bit(m, p); + m += x; + } + + return x; +} + +static bool expand_to_next_prime(unsigned long x) +{ + const struct primes *p; + struct primes *new; + unsigned long sz, y; + + /* Betrand's Postulate (or Chebyshev's theorem) states that if n > 3, + * there is always at least one prime p between n and 2n - 2. + * Equivalently, if n > 1, then there is always at least one prime p + * such that n < p < 2n. + * + * http://mathworld.wolfram.com/BertrandsPostulate.html + * https://en.wikipedia.org/wiki/Bertrand's_postulate + */ + sz = 2 * x; + if (sz < x) + return false; + + sz = round_up(sz, BITS_PER_LONG); + new = kmalloc(sizeof(*new) + bitmap_size(sz), + GFP_KERNEL | __GFP_NOWARN); + if (!new) + return false; + + mutex_lock(&lock); + p = rcu_dereference_protected(primes, lockdep_is_held(&lock)); + if (x < p->last) { + kfree(new); + goto unlock; + } + + /* Where memory permits, track the primes using the + * Sieve of Eratosthenes. The sieve is to remove all multiples of known + * primes from the set, what remains in the set is therefore prime. + */ + bitmap_fill(new->primes, sz); + bitmap_copy(new->primes, p->primes, p->sz); + for (y = 2UL; y < sz; y = find_next_bit(new->primes, sz, y + 1)) + new->last = clear_multiples(y, new->primes, p->sz, sz); + new->sz = sz; + + BUG_ON(new->last <= x); + + rcu_assign_pointer(primes, new); + if (p != &small_primes) + kfree_rcu((struct primes *)p, rcu); + +unlock: + mutex_unlock(&lock); + return true; +} + +static void free_primes(void) +{ + const struct primes *p; + + mutex_lock(&lock); + p = rcu_dereference_protected(primes, lockdep_is_held(&lock)); + if (p != &small_primes) { + rcu_assign_pointer(primes, &small_primes); + kfree_rcu((struct primes *)p, rcu); + } + mutex_unlock(&lock); +} + +/** + * next_prime_number - return the next prime number + * @x: the starting point for searching to test + * + * A prime number is an integer greater than 1 that is only divisible by + * itself and 1. The set of prime numbers is computed using the Sieve of + * Eratoshenes (on finding a prime, all multiples of that prime are removed + * from the set) enabling a fast lookup of the next prime number larger than + * @x. If the sieve fails (memory limitation), the search falls back to using + * slow trial-divison, up to the value of ULONG_MAX (which is reported as the + * final prime as a sentinel). + * + * Returns: the next prime number larger than @x + */ +unsigned long next_prime_number(unsigned long x) +{ + const struct primes *p; + + rcu_read_lock(); + p = rcu_dereference(primes); + while (x >= p->last) { + rcu_read_unlock(); + + if (!expand_to_next_prime(x)) + return slow_next_prime_number(x); + + rcu_read_lock(); + p = rcu_dereference(primes); + } + x = find_next_bit(p->primes, p->last, x + 1); + rcu_read_unlock(); + + return x; +} +EXPORT_SYMBOL(next_prime_number); + +/** + * is_prime_number - test whether the given number is prime + * @x: the number to test + * + * A prime number is an integer greater than 1 that is only divisible by + * itself and 1. Internally a cache of prime numbers is kept (to speed up + * searching for sequential primes, see next_prime_number()), but if the number + * falls outside of that cache, its primality is tested using trial-divison. + * + * Returns: true if @x is prime, false for composite numbers. + */ +bool is_prime_number(unsigned long x) +{ + const struct primes *p; + bool result; + + rcu_read_lock(); + p = rcu_dereference(primes); + while (x >= p->sz) { + rcu_read_unlock(); + + if (!expand_to_next_prime(x)) + return slow_is_prime_number(x); + + rcu_read_lock(); + p = rcu_dereference(primes); + } + result = test_bit(x, p->primes); + rcu_read_unlock(); + + return result; +} +EXPORT_SYMBOL(is_prime_number); + +static void dump_primes(void) +{ + const struct primes *p; + char *buf; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + + rcu_read_lock(); + p = rcu_dereference(primes); + + if (buf) + bitmap_print_to_pagebuf(true, buf, p->primes, p->sz); + pr_info("primes.{last=%lu, .sz=%lu, .primes[]=...x%lx} = %s", + p->last, p->sz, p->primes[BITS_TO_LONGS(p->sz) - 1], buf); + + rcu_read_unlock(); + + kfree(buf); +} + +static int selftest(unsigned long max) +{ + unsigned long x, last; + + if (!max) + return 0; + + for (last = 0, x = 2; x < max; x++) { + bool slow = slow_is_prime_number(x); + bool fast = is_prime_number(x); + + if (slow != fast) { + pr_err("inconsistent result for is-prime(%lu): slow=%s, fast=%s!", + x, slow ? "yes" : "no", fast ? "yes" : "no"); + goto err; + } + + if (!slow) + continue; + + if (next_prime_number(last) != x) { + pr_err("incorrect result for next-prime(%lu): expected %lu, got %lu", + last, x, next_prime_number(last)); + goto err; + } + last = x; + } + + pr_info("selftest(%lu) passed, last prime was %lu", x, last); + return 0; + +err: + dump_primes(); + return -EINVAL; +} + +static int __init primes_init(void) +{ + return selftest(selftest_max); +} + +static void __exit primes_exit(void) +{ + free_primes(); +} + +module_init(primes_init); +module_exit(primes_exit); + +module_param_named(selftest, selftest_max, ulong, 0400); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/lib/math/rational.c b/lib/math/rational.c new file mode 100644 index 000000000000..ba7443677c90 --- /dev/null +++ b/lib/math/rational.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * rational fractions + * + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer + * + * helper functions when coping with rational numbers + */ + +#include +#include +#include + +/* + * calculate best rational approximation for a given fraction + * taking into account restricted register size, e.g. to find + * appropriate values for a pll with 5 bit denominator and + * 8 bit numerator register fields, trying to set up with a + * frequency ratio of 3.1415, one would say: + * + * rational_best_approximation(31415, 10000, + * (1 << 8) - 1, (1 << 5) - 1, &n, &d); + * + * you may look at given_numerator as a fixed point number, + * with the fractional part size described in given_denominator. + * + * for theoretical background, see: + * http://en.wikipedia.org/wiki/Continued_fraction + */ + +void rational_best_approximation( + unsigned long given_numerator, unsigned long given_denominator, + unsigned long max_numerator, unsigned long max_denominator, + unsigned long *best_numerator, unsigned long *best_denominator) +{ + unsigned long n, d, n0, d0, n1, d1; + n = given_numerator; + d = given_denominator; + n0 = d1 = 0; + n1 = d0 = 1; + for (;;) { + unsigned long t, a; + if ((n1 > max_numerator) || (d1 > max_denominator)) { + n1 = n0; + d1 = d0; + break; + } + if (d == 0) + break; + t = d; + a = n / d; + d = n % d; + n = t; + t = n0 + a * n1; + n0 = n1; + n1 = t; + t = d0 + a * d1; + d0 = d1; + d1 = t; + } + *best_numerator = n1; + *best_denominator = d1; +} + +EXPORT_SYMBOL(rational_best_approximation); diff --git a/lib/math/reciprocal_div.c b/lib/math/reciprocal_div.c new file mode 100644 index 000000000000..bf043258fa00 --- /dev/null +++ b/lib/math/reciprocal_div.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +/* + * For a description of the algorithm please have a look at + * include/linux/reciprocal_div.h + */ + +struct reciprocal_value reciprocal_value(u32 d) +{ + struct reciprocal_value R; + u64 m; + int l; + + l = fls(d - 1); + m = ((1ULL << 32) * ((1ULL << l) - d)); + do_div(m, d); + ++m; + R.m = (u32)m; + R.sh1 = min(l, 1); + R.sh2 = max(l - 1, 0); + + return R; +} +EXPORT_SYMBOL(reciprocal_value); + +struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec) +{ + struct reciprocal_value_adv R; + u32 l, post_shift; + u64 mhigh, mlow; + + /* ceil(log2(d)) */ + l = fls(d - 1); + /* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to + * be handled before calling "reciprocal_value_adv", please see the + * comment at include/linux/reciprocal_div.h. + */ + WARN(l == 32, + "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor", + d, __func__); + post_shift = l; + mlow = 1ULL << (32 + l); + do_div(mlow, d); + mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec)); + do_div(mhigh, d); + + for (; post_shift > 0; post_shift--) { + u64 lo = mlow >> 1, hi = mhigh >> 1; + + if (lo >= hi) + break; + + mlow = lo; + mhigh = hi; + } + + R.m = (u32)mhigh; + R.sh = post_shift; + R.exp = l; + R.is_wide_m = mhigh > U32_MAX; + + return R; +} +EXPORT_SYMBOL(reciprocal_value_adv); diff --git a/lib/prime_numbers.c b/lib/prime_numbers.c deleted file mode 100644 index 550eec457c2e..000000000000 --- a/lib/prime_numbers.c +++ /dev/null @@ -1,315 +0,0 @@ -#define pr_fmt(fmt) "prime numbers: " fmt "\n" - -#include -#include -#include -#include - -#define bitmap_size(nbits) (BITS_TO_LONGS(nbits) * sizeof(unsigned long)) - -struct primes { - struct rcu_head rcu; - unsigned long last, sz; - unsigned long primes[]; -}; - -#if BITS_PER_LONG == 64 -static const struct primes small_primes = { - .last = 61, - .sz = 64, - .primes = { - BIT(2) | - BIT(3) | - BIT(5) | - BIT(7) | - BIT(11) | - BIT(13) | - BIT(17) | - BIT(19) | - BIT(23) | - BIT(29) | - BIT(31) | - BIT(37) | - BIT(41) | - BIT(43) | - BIT(47) | - BIT(53) | - BIT(59) | - BIT(61) - } -}; -#elif BITS_PER_LONG == 32 -static const struct primes small_primes = { - .last = 31, - .sz = 32, - .primes = { - BIT(2) | - BIT(3) | - BIT(5) | - BIT(7) | - BIT(11) | - BIT(13) | - BIT(17) | - BIT(19) | - BIT(23) | - BIT(29) | - BIT(31) - } -}; -#else -#error "unhandled BITS_PER_LONG" -#endif - -static DEFINE_MUTEX(lock); -static const struct primes __rcu *primes = RCU_INITIALIZER(&small_primes); - -static unsigned long selftest_max; - -static bool slow_is_prime_number(unsigned long x) -{ - unsigned long y = int_sqrt(x); - - while (y > 1) { - if ((x % y) == 0) - break; - y--; - } - - return y == 1; -} - -static unsigned long slow_next_prime_number(unsigned long x) -{ - while (x < ULONG_MAX && !slow_is_prime_number(++x)) - ; - - return x; -} - -static unsigned long clear_multiples(unsigned long x, - unsigned long *p, - unsigned long start, - unsigned long end) -{ - unsigned long m; - - m = 2 * x; - if (m < start) - m = roundup(start, x); - - while (m < end) { - __clear_bit(m, p); - m += x; - } - - return x; -} - -static bool expand_to_next_prime(unsigned long x) -{ - const struct primes *p; - struct primes *new; - unsigned long sz, y; - - /* Betrand's Postulate (or Chebyshev's theorem) states that if n > 3, - * there is always at least one prime p between n and 2n - 2. - * Equivalently, if n > 1, then there is always at least one prime p - * such that n < p < 2n. - * - * http://mathworld.wolfram.com/BertrandsPostulate.html - * https://en.wikipedia.org/wiki/Bertrand's_postulate - */ - sz = 2 * x; - if (sz < x) - return false; - - sz = round_up(sz, BITS_PER_LONG); - new = kmalloc(sizeof(*new) + bitmap_size(sz), - GFP_KERNEL | __GFP_NOWARN); - if (!new) - return false; - - mutex_lock(&lock); - p = rcu_dereference_protected(primes, lockdep_is_held(&lock)); - if (x < p->last) { - kfree(new); - goto unlock; - } - - /* Where memory permits, track the primes using the - * Sieve of Eratosthenes. The sieve is to remove all multiples of known - * primes from the set, what remains in the set is therefore prime. - */ - bitmap_fill(new->primes, sz); - bitmap_copy(new->primes, p->primes, p->sz); - for (y = 2UL; y < sz; y = find_next_bit(new->primes, sz, y + 1)) - new->last = clear_multiples(y, new->primes, p->sz, sz); - new->sz = sz; - - BUG_ON(new->last <= x); - - rcu_assign_pointer(primes, new); - if (p != &small_primes) - kfree_rcu((struct primes *)p, rcu); - -unlock: - mutex_unlock(&lock); - return true; -} - -static void free_primes(void) -{ - const struct primes *p; - - mutex_lock(&lock); - p = rcu_dereference_protected(primes, lockdep_is_held(&lock)); - if (p != &small_primes) { - rcu_assign_pointer(primes, &small_primes); - kfree_rcu((struct primes *)p, rcu); - } - mutex_unlock(&lock); -} - -/** - * next_prime_number - return the next prime number - * @x: the starting point for searching to test - * - * A prime number is an integer greater than 1 that is only divisible by - * itself and 1. The set of prime numbers is computed using the Sieve of - * Eratoshenes (on finding a prime, all multiples of that prime are removed - * from the set) enabling a fast lookup of the next prime number larger than - * @x. If the sieve fails (memory limitation), the search falls back to using - * slow trial-divison, up to the value of ULONG_MAX (which is reported as the - * final prime as a sentinel). - * - * Returns: the next prime number larger than @x - */ -unsigned long next_prime_number(unsigned long x) -{ - const struct primes *p; - - rcu_read_lock(); - p = rcu_dereference(primes); - while (x >= p->last) { - rcu_read_unlock(); - - if (!expand_to_next_prime(x)) - return slow_next_prime_number(x); - - rcu_read_lock(); - p = rcu_dereference(primes); - } - x = find_next_bit(p->primes, p->last, x + 1); - rcu_read_unlock(); - - return x; -} -EXPORT_SYMBOL(next_prime_number); - -/** - * is_prime_number - test whether the given number is prime - * @x: the number to test - * - * A prime number is an integer greater than 1 that is only divisible by - * itself and 1. Internally a cache of prime numbers is kept (to speed up - * searching for sequential primes, see next_prime_number()), but if the number - * falls outside of that cache, its primality is tested using trial-divison. - * - * Returns: true if @x is prime, false for composite numbers. - */ -bool is_prime_number(unsigned long x) -{ - const struct primes *p; - bool result; - - rcu_read_lock(); - p = rcu_dereference(primes); - while (x >= p->sz) { - rcu_read_unlock(); - - if (!expand_to_next_prime(x)) - return slow_is_prime_number(x); - - rcu_read_lock(); - p = rcu_dereference(primes); - } - result = test_bit(x, p->primes); - rcu_read_unlock(); - - return result; -} -EXPORT_SYMBOL(is_prime_number); - -static void dump_primes(void) -{ - const struct primes *p; - char *buf; - - buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - - rcu_read_lock(); - p = rcu_dereference(primes); - - if (buf) - bitmap_print_to_pagebuf(true, buf, p->primes, p->sz); - pr_info("primes.{last=%lu, .sz=%lu, .primes[]=...x%lx} = %s", - p->last, p->sz, p->primes[BITS_TO_LONGS(p->sz) - 1], buf); - - rcu_read_unlock(); - - kfree(buf); -} - -static int selftest(unsigned long max) -{ - unsigned long x, last; - - if (!max) - return 0; - - for (last = 0, x = 2; x < max; x++) { - bool slow = slow_is_prime_number(x); - bool fast = is_prime_number(x); - - if (slow != fast) { - pr_err("inconsistent result for is-prime(%lu): slow=%s, fast=%s!", - x, slow ? "yes" : "no", fast ? "yes" : "no"); - goto err; - } - - if (!slow) - continue; - - if (next_prime_number(last) != x) { - pr_err("incorrect result for next-prime(%lu): expected %lu, got %lu", - last, x, next_prime_number(last)); - goto err; - } - last = x; - } - - pr_info("selftest(%lu) passed, last prime was %lu", x, last); - return 0; - -err: - dump_primes(); - return -EINVAL; -} - -static int __init primes_init(void) -{ - return selftest(selftest_max); -} - -static void __exit primes_exit(void) -{ - free_primes(); -} - -module_init(primes_init); -module_exit(primes_exit); - -module_param_named(selftest, selftest_max, ulong, 0400); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL"); diff --git a/lib/rational.c b/lib/rational.c deleted file mode 100644 index ba7443677c90..000000000000 --- a/lib/rational.c +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * rational fractions - * - * Copyright (C) 2009 emlix GmbH, Oskar Schirmer - * - * helper functions when coping with rational numbers - */ - -#include -#include -#include - -/* - * calculate best rational approximation for a given fraction - * taking into account restricted register size, e.g. to find - * appropriate values for a pll with 5 bit denominator and - * 8 bit numerator register fields, trying to set up with a - * frequency ratio of 3.1415, one would say: - * - * rational_best_approximation(31415, 10000, - * (1 << 8) - 1, (1 << 5) - 1, &n, &d); - * - * you may look at given_numerator as a fixed point number, - * with the fractional part size described in given_denominator. - * - * for theoretical background, see: - * http://en.wikipedia.org/wiki/Continued_fraction - */ - -void rational_best_approximation( - unsigned long given_numerator, unsigned long given_denominator, - unsigned long max_numerator, unsigned long max_denominator, - unsigned long *best_numerator, unsigned long *best_denominator) -{ - unsigned long n, d, n0, d0, n1, d1; - n = given_numerator; - d = given_denominator; - n0 = d1 = 0; - n1 = d0 = 1; - for (;;) { - unsigned long t, a; - if ((n1 > max_numerator) || (d1 > max_denominator)) { - n1 = n0; - d1 = d0; - break; - } - if (d == 0) - break; - t = d; - a = n / d; - d = n % d; - n = t; - t = n0 + a * n1; - n0 = n1; - n1 = t; - t = d0 + a * d1; - d0 = d1; - d1 = t; - } - *best_numerator = n1; - *best_denominator = d1; -} - -EXPORT_SYMBOL(rational_best_approximation); diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c deleted file mode 100644 index bf043258fa00..000000000000 --- a/lib/reciprocal_div.c +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include - -/* - * For a description of the algorithm please have a look at - * include/linux/reciprocal_div.h - */ - -struct reciprocal_value reciprocal_value(u32 d) -{ - struct reciprocal_value R; - u64 m; - int l; - - l = fls(d - 1); - m = ((1ULL << 32) * ((1ULL << l) - d)); - do_div(m, d); - ++m; - R.m = (u32)m; - R.sh1 = min(l, 1); - R.sh2 = max(l - 1, 0); - - return R; -} -EXPORT_SYMBOL(reciprocal_value); - -struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec) -{ - struct reciprocal_value_adv R; - u32 l, post_shift; - u64 mhigh, mlow; - - /* ceil(log2(d)) */ - l = fls(d - 1); - /* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to - * be handled before calling "reciprocal_value_adv", please see the - * comment at include/linux/reciprocal_div.h. - */ - WARN(l == 32, - "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor", - d, __func__); - post_shift = l; - mlow = 1ULL << (32 + l); - do_div(mlow, d); - mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec)); - do_div(mhigh, d); - - for (; post_shift > 0; post_shift--) { - u64 lo = mlow >> 1, hi = mhigh >> 1; - - if (lo >= hi) - break; - - mlow = lo; - mhigh = hi; - } - - R.m = (u32)mhigh; - R.sh = post_shift; - R.exp = l; - R.is_wide_m = mhigh > U32_MAX; - - return R; -} -EXPORT_SYMBOL(reciprocal_value_adv); -- cgit From 9f6158946987a5ce3f16da097d18f240a89db417 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 14 May 2019 15:43:08 -0700 Subject: lib/math: move int_pow() from pwm_bl.c for wider use The integer exponentiation is used in few places and might be used in the future by other call sites. Move it to wider use. Link: http://lkml.kernel.org/r/20190323172531.80025-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Daniel Thompson Cc: Lee Jones Cc: Ray Jui Cc: Thierry Reding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/math/Makefile | 2 +- lib/math/int_pow.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 lib/math/int_pow.c (limited to 'lib') diff --git a/lib/math/Makefile b/lib/math/Makefile index b75878420da6..583bbfebfc09 100644 --- a/lib/math/Makefile +++ b/lib/math/Makefile @@ -1,4 +1,4 @@ -obj-y += div64.o gcd.o lcm.o int_sqrt.o reciprocal_div.o +obj-y += div64.o gcd.o lcm.o int_pow.o int_sqrt.o reciprocal_div.o obj-$(CONFIG_CORDIC) += cordic.o obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o diff --git a/lib/math/int_pow.c b/lib/math/int_pow.c new file mode 100644 index 000000000000..622fc1ab3c74 --- /dev/null +++ b/lib/math/int_pow.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * An integer based power function + * + * Derived from drivers/video/backlight/pwm_bl.c + */ + +#include +#include +#include + +/** + * int_pow - computes the exponentiation of the given base and exponent + * @base: base which will be raised to the given power + * @exp: power to be raised to + * + * Computes: pow(base, exp), i.e. @base raised to the @exp power + */ +u64 int_pow(u64 base, unsigned int exp) +{ + u64 result = 1; + + while (exp) { + if (exp & 1) + result *= base; + exp >>= 1; + base *= base; + } + + return result; +} +EXPORT_SYMBOL_GPL(int_pow); -- cgit From 281327c99bcaa8dbd4fe49cc32178dc59b0e61b8 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 14 May 2019 15:43:11 -0700 Subject: lib: make bitmap_parselist_user() a wrapper on bitmap_parselist() Patch series "lib: rework bitmap_parselist and tests", v5. bitmap_parselist has been evolved from a pretty simple idea for long and now lacks for refactoring. It is not structured, has nested loops and a set of opaque-named variables. Things are more complicated because bitmap_parselist() is a part of user interface, and its behavior should not change. In this patchset - bitmap_parselist_user() made a wrapper on bitmap_parselist(); - bitmap_parselist() reworked (patch 2); - time measurement in test_bitmap_parselist switched to ktime_get (patch 3); - new tests introduced (patch 4), and - bitmap_parselist_user() testing enabled with the same testset as bitmap_parselist() (patch 5). This patch (of 5): Currently we parse user data byte after byte which leads to overcomplification of parsing algorithm. The only user of bitmap_parselist_user() is not performance-critical, and so we can duplicate user data to kernel buffer and simply call bitmap_parselist(). This rework lets us unify and simplify bitmap_parselist() and bitmap_parselist_user(), which is done in the following patch. Link: http://lkml.kernel.org/r/20190405173211.11373-2-ynorov@marvell.com Signed-off-by: Yury Norov Reviewed-by: Andy Shevchenko Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: Kees Cook Cc: Matthew Wilcox Cc: Tetsuo Handa Cc: Mike Travis Cc: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/bitmap.c b/lib/bitmap.c index 3f3b8051f342..c63ddd06a5da 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -632,19 +632,22 @@ EXPORT_SYMBOL(bitmap_parselist); * @nmaskbits: size of bitmap, in bits. * * Wrapper for bitmap_parselist(), providing it with user buffer. - * - * We cannot have this as an inline function in bitmap.h because it needs - * linux/uaccess.h to get the access_ok() declaration and this causes - * cyclic dependencies. */ int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, unsigned long *maskp, int nmaskbits) { - if (!access_ok(ubuf, ulen)) - return -EFAULT; - return __bitmap_parselist((const char __force *)ubuf, - ulen, 1, maskp, nmaskbits); + char *buf; + int ret; + + buf = memdup_user_nul(ubuf, ulen); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + ret = bitmap_parselist(buf, maskp, nmaskbits); + + kfree(buf); + return ret; } EXPORT_SYMBOL(bitmap_parselist_user); -- cgit From e371c481d89cd6b9db72e077efd64059dfc87711 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 14 May 2019 15:43:14 -0700 Subject: lib: rework bitmap_parselist Remove __bitmap_parselist helper and split the function to logical parts. [ynorov@marvell.com: v5] Link: http://lkml.kernel.org/r/20190416063801.20134-3-ynorov@marvell.com Link: http://lkml.kernel.org/r/20190405173211.11373-3-ynorov@marvell.com Signed-off-by: Yury Norov Reviewed-by: Andy Shevchenko Cc: Arnd Bergmann Cc: Kees Cook Cc: Matthew Wilcox Cc: Mike Travis Cc: Rasmus Villemoes Cc: Tetsuo Handa Cc: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 255 +++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 142 insertions(+), 113 deletions(-) (limited to 'lib') diff --git a/lib/bitmap.c b/lib/bitmap.c index c63ddd06a5da..f235434df87b 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -20,6 +20,8 @@ #include +#include "kstrtox.h" + /** * DOC: bitmap introduction * @@ -477,12 +479,128 @@ int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, } EXPORT_SYMBOL(bitmap_print_to_pagebuf); +/* + * Region 9-38:4/10 describes the following bitmap structure: + * 0 9 12 18 38 + * .........****......****......****...... + * ^ ^ ^ ^ + * start off group_len end + */ +struct region { + unsigned int start; + unsigned int off; + unsigned int group_len; + unsigned int end; +}; + +static int bitmap_set_region(const struct region *r, + unsigned long *bitmap, int nbits) +{ + unsigned int start; + + if (r->end >= nbits) + return -ERANGE; + + for (start = r->start; start <= r->end; start += r->group_len) + bitmap_set(bitmap, start, min(r->end - start + 1, r->off)); + + return 0; +} + +static int bitmap_check_region(const struct region *r) +{ + if (r->start > r->end || r->group_len == 0 || r->off > r->group_len) + return -EINVAL; + + return 0; +} + +static const char *bitmap_getnum(const char *str, unsigned int *num) +{ + unsigned long long n; + unsigned int len; + + len = _parse_integer(str, 10, &n); + if (!len) + return ERR_PTR(-EINVAL); + if (len & KSTRTOX_OVERFLOW || n != (unsigned int)n) + return ERR_PTR(-EOVERFLOW); + + *num = n; + return str + len; +} + +static inline bool end_of_str(char c) +{ + return c == '\0' || c == '\n'; +} + +static inline bool __end_of_region(char c) +{ + return isspace(c) || c == ','; +} + +static inline bool end_of_region(char c) +{ + return __end_of_region(c) || end_of_str(c); +} + +/* + * The format allows commas and whitespases at the beginning + * of the region. + */ +static const char *bitmap_find_region(const char *str) +{ + while (__end_of_region(*str)) + str++; + + return end_of_str(*str) ? NULL : str; +} + +static const char *bitmap_parse_region(const char *str, struct region *r) +{ + str = bitmap_getnum(str, &r->start); + if (IS_ERR(str)) + return str; + + if (end_of_region(*str)) + goto no_end; + + if (*str != '-') + return ERR_PTR(-EINVAL); + + str = bitmap_getnum(str + 1, &r->end); + if (IS_ERR(str)) + return str; + + if (end_of_region(*str)) + goto no_pattern; + + if (*str != ':') + return ERR_PTR(-EINVAL); + + str = bitmap_getnum(str + 1, &r->off); + if (IS_ERR(str)) + return str; + + if (*str != '/') + return ERR_PTR(-EINVAL); + + return bitmap_getnum(str + 1, &r->group_len); + +no_end: + r->end = r->start; +no_pattern: + r->off = r->end + 1; + r->group_len = r->end + 1; + + return end_of_str(*str) ? NULL : str; +} + /** - * __bitmap_parselist - convert list format ASCII string to bitmap - * @buf: read nul-terminated user string from this buffer - * @buflen: buffer size in bytes. If string is smaller than this - * then it must be terminated with a \0. - * @is_user: location of buffer, 0 indicates kernel space + * bitmap_parselist - convert list format ASCII string to bitmap + * @buf: read user string from this buffer; must be terminated + * with a \0 or \n. * @maskp: write resulting mask here * @nmaskbits: number of bits in mask to be written * @@ -498,127 +616,38 @@ EXPORT_SYMBOL(bitmap_print_to_pagebuf); * * Returns: 0 on success, -errno on invalid input strings. Error values: * - * - ``-EINVAL``: second number in range smaller than first + * - ``-EINVAL``: wrong region format * - ``-EINVAL``: invalid character in string * - ``-ERANGE``: bit number specified too large for mask + * - ``-EOVERFLOW``: integer overflow in the input parameters */ -static int __bitmap_parselist(const char *buf, unsigned int buflen, - int is_user, unsigned long *maskp, - int nmaskbits) +int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits) { - unsigned int a, b, old_a, old_b; - unsigned int group_size, used_size, off; - int c, old_c, totaldigits, ndigits; - const char __user __force *ubuf = (const char __user __force *)buf; - int at_start, in_range, in_partial_range; + struct region r; + long ret; - totaldigits = c = 0; - old_a = old_b = 0; - group_size = used_size = 0; bitmap_zero(maskp, nmaskbits); - do { - at_start = 1; - in_range = 0; - in_partial_range = 0; - a = b = 0; - ndigits = totaldigits; - - /* Get the next cpu# or a range of cpu#'s */ - while (buflen) { - old_c = c; - if (is_user) { - if (__get_user(c, ubuf++)) - return -EFAULT; - } else - c = *buf++; - buflen--; - if (isspace(c)) - continue; - /* A '\0' or a ',' signal the end of a cpu# or range */ - if (c == '\0' || c == ',') - break; - /* - * whitespaces between digits are not allowed, - * but it's ok if whitespaces are on head or tail. - * when old_c is whilespace, - * if totaldigits == ndigits, whitespace is on head. - * if whitespace is on tail, it should not run here. - * as c was ',' or '\0', - * the last code line has broken the current loop. - */ - if ((totaldigits != ndigits) && isspace(old_c)) - return -EINVAL; - - if (c == '/') { - used_size = a; - at_start = 1; - in_range = 0; - a = b = 0; - continue; - } + while (buf) { + buf = bitmap_find_region(buf); + if (buf == NULL) + return 0; - if (c == ':') { - old_a = a; - old_b = b; - at_start = 1; - in_range = 0; - in_partial_range = 1; - a = b = 0; - continue; - } + buf = bitmap_parse_region(buf, &r); + if (IS_ERR(buf)) + return PTR_ERR(buf); - if (c == '-') { - if (at_start || in_range) - return -EINVAL; - b = 0; - in_range = 1; - at_start = 1; - continue; - } + ret = bitmap_check_region(&r); + if (ret) + return ret; - if (!isdigit(c)) - return -EINVAL; + ret = bitmap_set_region(&r, maskp, nmaskbits); + if (ret) + return ret; + } - b = b * 10 + (c - '0'); - if (!in_range) - a = b; - at_start = 0; - totaldigits++; - } - if (ndigits == totaldigits) - continue; - if (in_partial_range) { - group_size = a; - a = old_a; - b = old_b; - old_a = old_b = 0; - } else { - used_size = group_size = b - a + 1; - } - /* if no digit is after '-', it's wrong*/ - if (at_start && in_range) - return -EINVAL; - if (!(a <= b) || group_size == 0 || !(used_size <= group_size)) - return -EINVAL; - if (b >= nmaskbits) - return -ERANGE; - while (a <= b) { - off = min(b - a + 1, used_size); - bitmap_set(maskp, a, off); - a += group_size; - } - } while (buflen && c == ','); return 0; } - -int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits) -{ - char *nl = strchrnul(bp, '\n'); - int len = nl - bp; - - return __bitmap_parselist(bp, len, 0, maskp, nmaskbits); -} EXPORT_SYMBOL(bitmap_parselist); -- cgit From 0c2111a5c852aa0ded7c5644a58058df80ed58bb Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 14 May 2019 15:43:18 -0700 Subject: lib/test_bitmap: switch test_bitmap_parselist to ktime_get() test_bitmap_parselist currently uses get_cycles which is not implemented on some platforms, so use ktime_get() instead. Link: http://lkml.kernel.org/r/20190405173211.11373-4-ynorov@marvell.com Signed-off-by: Yury Norov Reviewed-by: Andy Shevchenko Cc: Arnd Bergmann Cc: Kees Cook Cc: Matthew Wilcox Cc: Mike Travis Cc: Rasmus Villemoes Cc: Tetsuo Handa Cc: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 792d90608052..63d4a21955f0 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -268,15 +268,15 @@ static void __init test_bitmap_parselist(void) { int i; int err; - cycles_t cycles; + ktime_t time; DECLARE_BITMAP(bmap, 2048); for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) { #define ptest parselist_tests[i] - cycles = get_cycles(); + time = ktime_get(); err = bitmap_parselist(ptest.in, bmap, ptest.nbits); - cycles = get_cycles() - cycles; + time = ktime_get() - time; if (err != ptest.errno) { pr_err("test %d: input is %s, errno is %d, expected %d\n", @@ -293,8 +293,7 @@ static void __init test_bitmap_parselist(void) if (ptest.flags & PARSE_TIME) pr_err("test %d: input is '%s' OK, Time: %llu\n", - i, ptest.in, - (unsigned long long)cycles); + i, ptest.in, time); } } -- cgit From a4ab50509c76a03d338f434a271494b450250f0d Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 14 May 2019 15:43:21 -0700 Subject: lib/test_bitmap: add testcases for bitmap_parselist() Add tests for non-number character, empty regions, integer overflow. [ynorov@marvell.com: v5] Link: http://lkml.kernel.org/r/20190416063801.20134-5-ynorov@marvell.com Link: http://lkml.kernel.org/r/20190405173211.11373-5-ynorov@marvell.com Signed-off-by: Yury Norov Reviewed-by: Andy Shevchenko Cc: Arnd Bergmann Cc: Kees Cook Cc: Matthew Wilcox Cc: Mike Travis Cc: Rasmus Villemoes Cc: Tetsuo Handa Cc: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 63d4a21955f0..6640a82ad44b 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -226,7 +226,8 @@ static const unsigned long exp[] __initconst = { BITMAP_FROM_U64(0xffffffff), BITMAP_FROM_U64(0xfffffffe), BITMAP_FROM_U64(0x3333333311111111ULL), - BITMAP_FROM_U64(0xffffffff77777777ULL) + BITMAP_FROM_U64(0xffffffff77777777ULL), + BITMAP_FROM_U64(0), }; static const unsigned long exp2[] __initconst = { @@ -249,19 +250,34 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = { {0, "1-31:4/4", &exp[9 * step], 32, 0}, {0, "0-31:1/4,32-63:2/4", &exp[10 * step], 64, 0}, {0, "0-31:3/4,32-63:4/4", &exp[11 * step], 64, 0}, + {0, " ,, 0-31:3/4 ,, 32-63:4/4 ,, ", &exp[11 * step], 64, 0}, {0, "0-31:1/4,32-63:2/4,64-95:3/4,96-127:4/4", exp2, 128, 0}, {0, "0-2047:128/256", NULL, 2048, PARSE_TIME}, + {0, "", &exp[12 * step], 8, 0}, + {0, "\n", &exp[12 * step], 8, 0}, + {0, ",, ,, , , ,", &exp[12 * step], 8, 0}, + {0, " , ,, , , ", &exp[12 * step], 8, 0}, + {0, " , ,, , , \n", &exp[12 * step], 8, 0}, + {-EINVAL, "-1", NULL, 8, 0}, {-EINVAL, "-0", NULL, 8, 0}, {-EINVAL, "10-1", NULL, 8, 0}, {-EINVAL, "0-31:", NULL, 8, 0}, {-EINVAL, "0-31:0", NULL, 8, 0}, + {-EINVAL, "0-31:0/", NULL, 8, 0}, {-EINVAL, "0-31:0/0", NULL, 8, 0}, {-EINVAL, "0-31:1/0", NULL, 8, 0}, {-EINVAL, "0-31:10/1", NULL, 8, 0}, + {-EOVERFLOW, "0-98765432123456789:10/1", NULL, 8, 0}, + + {-EINVAL, "a-31", NULL, 8, 0}, + {-EINVAL, "0-a1", NULL, 8, 0}, + {-EINVAL, "a-31:10/1", NULL, 8, 0}, + {-EINVAL, "0-31:a/1", NULL, 8, 0}, + {-EINVAL, "0-\n", NULL, 8, 0}, }; static void __init test_bitmap_parselist(void) -- cgit From 6ea86bdfc169ba9df8484e9d2bb250a874b03c47 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 14 May 2019 15:43:24 -0700 Subject: lib/test_bitmap: add tests for bitmap_parselist_user() Propagate existing bitmap_parselist() tests to bitmap_parselist_user(). Link: http://lkml.kernel.org/r/20190405173211.11373-6-ynorov@marvell.com Signed-off-by: Yury Norov Reviewed-by: Andy Shevchenko Cc: Arnd Bergmann Cc: Kees Cook Cc: Matthew Wilcox Cc: Mike Travis Cc: Rasmus Villemoes Cc: Tetsuo Handa Cc: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 6640a82ad44b..d3a501f2a81a 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "../tools/testing/selftests/kselftest_module.h" @@ -280,39 +281,63 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = { {-EINVAL, "0-\n", NULL, 8, 0}, }; -static void __init test_bitmap_parselist(void) +static void __init __test_bitmap_parselist(int is_user) { int i; int err; ktime_t time; DECLARE_BITMAP(bmap, 2048); + char *mode = is_user ? "_user" : ""; for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) { #define ptest parselist_tests[i] - time = ktime_get(); - err = bitmap_parselist(ptest.in, bmap, ptest.nbits); - time = ktime_get() - time; + if (is_user) { + mm_segment_t orig_fs = get_fs(); + size_t len = strlen(ptest.in); + + set_fs(KERNEL_DS); + time = ktime_get(); + err = bitmap_parselist_user(ptest.in, len, + bmap, ptest.nbits); + time = ktime_get() - time; + set_fs(orig_fs); + } else { + time = ktime_get(); + err = bitmap_parselist(ptest.in, bmap, ptest.nbits); + time = ktime_get() - time; + } if (err != ptest.errno) { - pr_err("test %d: input is %s, errno is %d, expected %d\n", - i, ptest.in, err, ptest.errno); + pr_err("parselist%s: %d: input is %s, errno is %d, expected %d\n", + mode, i, ptest.in, err, ptest.errno); continue; } if (!err && ptest.expected && !__bitmap_equal(bmap, ptest.expected, ptest.nbits)) { - pr_err("test %d: input is %s, result is 0x%lx, expected 0x%lx\n", - i, ptest.in, bmap[0], *ptest.expected); + pr_err("parselist%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n", + mode, i, ptest.in, bmap[0], + *ptest.expected); continue; } if (ptest.flags & PARSE_TIME) - pr_err("test %d: input is '%s' OK, Time: %llu\n", - i, ptest.in, time); + pr_err("parselist%s: %d: input is '%s' OK, Time: %llu\n", + mode, i, ptest.in, time); } } +static void __init test_bitmap_parselist(void) +{ + __test_bitmap_parselist(0); +} + +static void __init test_bitmap_parselist_user(void) +{ + __test_bitmap_parselist(1); +} + #define EXP_BYTES (sizeof(exp) * 8) static void __init test_bitmap_arr32(void) @@ -385,6 +410,7 @@ static void __init selftest(void) test_copy(); test_bitmap_arr32(); test_bitmap_parselist(); + test_bitmap_parselist_user(); test_mem_optimisations(); } -- cgit From 7507c40258726ea7a07374db00e8b55a138de88c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 14 May 2019 15:43:30 -0700 Subject: lib/test_vmalloc.c:test_func(): eliminate local `ret' Local 'ret' is unneeded and was poorly named: the variable `ret' generally means the "the value which this function will return". Cc: Roman Gushchin Cc: Uladzislau Rezki Cc: Michal Hocko Cc: Matthew Wilcox Cc: Thomas Garnier Cc: Oleksiy Avramchenko Cc: Steven Rostedt Cc: Joel Fernandes Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_vmalloc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index f832b095afba..8bbefcaddfe8 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -384,12 +384,11 @@ static int test_func(void *private) { struct test_driver *t = private; int random_array[ARRAY_SIZE(test_case_array)]; - int index, i, j, ret; + int index, i, j; ktime_t kt; u64 delta; - ret = set_cpus_allowed_ptr(current, cpumask_of(t->cpu)); - if (ret < 0) + if (set_cpus_allowed_ptr(current, cpumask_of(t->cpu)) < 0) pr_err("Failed to set affinity to %d CPU\n", t->cpu); for (i = 0; i < ARRAY_SIZE(test_case_array); i++) @@ -415,8 +414,7 @@ static int test_func(void *private) kt = ktime_get(); for (j = 0; j < test_repeat_count; j++) { - ret = test_case_array[index].test_func(); - if (!ret) + if (!test_case_array[index].test_func()) per_cpu_test_data[t->cpu][index].test_passed++; else per_cpu_test_data[t->cpu][index].test_failed++; -- cgit From c66d7a27b794b43d19a96df03822dd476b03b0a3 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 14 May 2019 15:44:00 -0700 Subject: init: introduce DEBUG_MISC option Patch series "init: Do not select DEBUG_KERNEL by default", v5. CONFIG_DEBUG_KERNEL has been designed to just enable Kconfig options. Kernel code generatoin should not depend on CONFIG_DEBUG_KERNEL. Proposed alternative plan: let's add a new symbol, something like DEBUG_MISC ("Miscellaneous debug code that should be under a more specific debug option but isn't"), make it depend on DEBUG_KERNEL and be "default DEBUG_KERNEL" but allow itself to be turned off, and then mechanically change the small handful of "#ifdef CONFIG_DEBUG_KERNEL" to "#ifdef CONFIG_DEBUG_MISC". This patch (of 5): Introduce DEBUG_MISC ("Miscellaneous debug code that should be under a more specific debug option but isn't"), make it depend on DEBUG_KERNEL and be "default DEBUG_KERNEL" but allow itself to be turned off, and then mechanically change the small handful of "#ifdef CONFIG_DEBUG_KERNEL" to "#ifdef CONFIG_DEBUG_MISC". Link: http://lkml.kernel.org/r/20190413224438.10802-2-okaya@kernel.org Signed-off-by: Sinan Kaya Reviewed-by: Josh Triplett Reviewed-by: Kees Cook Cc: Anders Roxell Cc: Benjamin Herrenschmidt Cc: Christophe Leroy Cc: Chris Zankel Cc: "David S. Miller" Cc: Florian Westphal Cc: Greg Kroah-Hartman Cc: James Hogan Cc: Jozsef Kadlecsik Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Hocko Cc: Mike Rapoport Cc: Pablo Neira Ayuso Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 181bd56238b0..fdfa173651eb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -460,6 +460,15 @@ config DEBUG_KERNEL Say Y here if you are developing drivers or trying to debug and identify kernel problems. +config DEBUG_MISC + bool "Miscellaneous debug code" + default DEBUG_KERNEL + depends on DEBUG_KERNEL + help + Say Y here if you need to enable miscellaneous debug code that should + be under a more specific debug option but isn't. + + menu "Memory Debugging" source "mm/Kconfig.debug" -- cgit From 2ea622b887e74497ce5aac5bfe247502b5786f56 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 14 May 2019 15:45:10 -0700 Subject: tools/testing/selftests/sysctl/sysctl.sh: add proc_do_large_bitmap() test case The kernel has only two users of proc_do_large_bitmap(), the kernel CPU watchdog, and the ip_local_reserved_ports. Refer to watchdog_cpumask and ip_local_reserved_ports in Documentation for further details on these. When you input a large buffer into these, when it is larger than PAGE_SIZE- 1, the input data gets misparsed, and the user get incorrectly informed that the desired input value was set. This commit implements a test which mimics and exploits that use case, it uses a bitmap size, as in the watchdog case. The bitmap is used to test the bitmap proc handler, proc_do_large_bitmap(). The next commit fixes this issue. [akpm@linux-foundation.org: move proc_do_large_bitmap() export to EOF] [mcgrof@kernel.org: use new target description for backward compatibility] [mcgrof@kernel.org: augment test number to 50, ran into issues with bash string comparisons when testing up to 50 cases.] [mcgrof@kernel.org: introduce and use verify_diff_proc_file() to use diff] [mcgrof@kernel.org: use mktemp for tmp file] [mcgrof@kernel.org: merge shell test and C code] [mcgrof@kernel.org: commit log love] [mcgrof@kernel.org: export proc_do_large_bitmap() to allow for the test [mcgrof@kernel.org: check for the return value when writing to the proc file] Link: http://lkml.kernel.org/r/20190320222831.8243-6-mcgrof@kernel.org Signed-off-by: Eric Sandeen Signed-off-by: Luis Chamberlain Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_sysctl.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c index 3dd801c1c85b..566dad3f4196 100644 --- a/lib/test_sysctl.c +++ b/lib/test_sysctl.c @@ -47,6 +47,9 @@ struct test_sysctl_data { unsigned int uint_0001; char string_0001[65]; + +#define SYSCTL_TEST_BITMAP_SIZE 65536 + unsigned long *bitmap_0001; }; static struct test_sysctl_data test_data = { @@ -102,6 +105,13 @@ static struct ctl_table test_table[] = { .mode = 0644, .proc_handler = proc_dostring, }, + { + .procname = "bitmap_0001", + .data = &test_data.bitmap_0001, + .maxlen = SYSCTL_TEST_BITMAP_SIZE, + .mode = 0644, + .proc_handler = proc_do_large_bitmap, + }, { } }; @@ -129,15 +139,21 @@ static struct ctl_table_header *test_sysctl_header; static int __init test_sysctl_init(void) { + test_data.bitmap_0001 = kzalloc(SYSCTL_TEST_BITMAP_SIZE/8, GFP_KERNEL); + if (!test_data.bitmap_0001) + return -ENOMEM; test_sysctl_header = register_sysctl_table(test_sysctl_root_table); - if (!test_sysctl_header) + if (!test_sysctl_header) { + kfree(test_data.bitmap_0001); return -ENOMEM; + } return 0; } late_initcall(test_sysctl_init); static void __exit test_sysctl_exit(void) { + kfree(test_data.bitmap_0001); if (test_sysctl_header) unregister_sysctl_table(test_sysctl_header); } -- cgit