From 97a6ec4ac021f7fbec05c15a3aa0c4aaf0461af5 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:41 -0800 Subject: rhashtable: Change rhashtable_walk_start to return void Most callers of rhashtable_walk_start don't care about a resize event which is indicated by a return value of -EAGAIN. So calls to rhashtable_walk_start are wrapped wih code to ignore -EAGAIN. Something like this is common: ret = rhashtable_walk_start(rhiter); if (ret && ret != -EAGAIN) goto out; Since zero and -EAGAIN are the only possible return values from the function this check is pointless. The condition never evaluates to true. This patch changes rhashtable_walk_start to return void. This simplifies code for the callers that ignore -EAGAIN. For the few cases where the caller cares about the resize event, particularly where the table can be walked in mulitple parts for netlink or seq file dump, the function rhashtable_walk_start_check has been added that returns -EAGAIN on a resize event. Signed-off-by: Tom Herbert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/rhashtable.c | 10 +++++++--- lib/test_rhashtable.c | 6 +----- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index ddd7dde87c3c..1935e86ed477 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -732,7 +732,7 @@ void rhashtable_walk_exit(struct rhashtable_iter *iter) EXPORT_SYMBOL_GPL(rhashtable_walk_exit); /** - * rhashtable_walk_start - Start a hash table walk + * rhashtable_walk_start_check - Start a hash table walk * @iter: Hash table iterator * * Start a hash table walk at the current iterator position. Note that we take @@ -744,8 +744,12 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit); * Returns -EAGAIN if resize event occured. Note that the iterator * will rewind back to the beginning and you may use it immediately * by calling rhashtable_walk_next. + * + * rhashtable_walk_start is defined as an inline variant that returns + * void. This is preferred in cases where the caller would ignore + * resize events and always continue. */ -int rhashtable_walk_start(struct rhashtable_iter *iter) +int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU) { struct rhashtable *ht = iter->ht; @@ -764,7 +768,7 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) return 0; } -EXPORT_SYMBOL_GPL(rhashtable_walk_start); +EXPORT_SYMBOL_GPL(rhashtable_walk_start_check); /** * rhashtable_walk_next - Return the next object and advance the iterator diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 8e83cbdc049c..76d3667fdea2 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -162,11 +162,7 @@ static void test_bucket_stats(struct rhashtable *ht, unsigned int entries) return; } - err = rhashtable_walk_start(&hti); - if (err && err != -EAGAIN) { - pr_warn("Test failed: iterator failed: %d\n", err); - return; - } + rhashtable_walk_start(&hti); while ((pos = rhashtable_walk_next(&hti))) { if (PTR_ERR(pos) == -EAGAIN) { -- cgit From 2db54b475ae918d274bfc276416c384ba95e9f94 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:42 -0800 Subject: rhashtable: Add rhastable_walk_peek This function is like rhashtable_walk_next except that it only returns the current element in the inter and does not advance the iter. This patch also creates __rhashtable_walk_find_next. It finds the next element in the table when the entry cached in iter is NULL or at the end of a slot. __rhashtable_walk_find_next is called from rhashtable_walk_next and rhastable_walk_peek. end_of_table is an added field to the iter structure. This indicates that the end of table was reached (walker.tbl being NULL is not a sufficient condition for end of table). Signed-off-by: Tom Herbert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/rhashtable.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 1935e86ed477..6fc52d82efe6 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -707,6 +707,7 @@ void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter) iter->p = NULL; iter->slot = 0; iter->skip = 0; + iter->end_of_table = 0; spin_lock(&ht->lock); iter->walker.tbl = @@ -761,7 +762,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) list_del(&iter->walker.list); spin_unlock(&ht->lock); - if (!iter->walker.tbl) { + if (!iter->walker.tbl && !iter->end_of_table) { iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); return -EAGAIN; } @@ -771,18 +772,16 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) EXPORT_SYMBOL_GPL(rhashtable_walk_start_check); /** - * rhashtable_walk_next - Return the next object and advance the iterator + * __rhashtable_walk_find_next - Find the next element in a table (or the first + * one in case of a new walk). + * * @iter: Hash table iterator * - * Note that you must call rhashtable_walk_stop when you are finished - * with the walk. + * Returns the found object or NULL when the end of the table is reached. * - * Returns the next object or NULL when the end of the table is reached. - * - * Returns -EAGAIN if resize event occured. Note that the iterator - * will rewind back to the beginning and you may continue to use it. + * Returns -EAGAIN if resize event occurred. */ -void *rhashtable_walk_next(struct rhashtable_iter *iter) +static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter) { struct bucket_table *tbl = iter->walker.tbl; struct rhlist_head *list = iter->list; @@ -790,13 +789,8 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) struct rhash_head *p = iter->p; bool rhlist = ht->rhlist; - if (p) { - if (!rhlist || !(list = rcu_dereference(list->next))) { - p = rcu_dereference(p->next); - list = container_of(p, struct rhlist_head, rhead); - } - goto next; - } + if (!tbl) + return NULL; for (; iter->slot < tbl->size; iter->slot++) { int skip = iter->skip; @@ -840,12 +834,89 @@ next: iter->slot = 0; iter->skip = 0; return ERR_PTR(-EAGAIN); + } else { + iter->end_of_table = true; } return NULL; } + +/** + * rhashtable_walk_next - Return the next object and advance the iterator + * @iter: Hash table iterator + * + * Note that you must call rhashtable_walk_stop when you are finished + * with the walk. + * + * Returns the next object or NULL when the end of the table is reached. + * + * Returns -EAGAIN if resize event occurred. Note that the iterator + * will rewind back to the beginning and you may continue to use it. + */ +void *rhashtable_walk_next(struct rhashtable_iter *iter) +{ + struct rhlist_head *list = iter->list; + struct rhashtable *ht = iter->ht; + struct rhash_head *p = iter->p; + bool rhlist = ht->rhlist; + + if (p) { + if (!rhlist || !(list = rcu_dereference(list->next))) { + p = rcu_dereference(p->next); + list = container_of(p, struct rhlist_head, rhead); + } + if (!rht_is_a_nulls(p)) { + iter->skip++; + iter->p = p; + iter->list = list; + return rht_obj(ht, rhlist ? &list->rhead : p); + } + + /* At the end of this slot, switch to next one and then find + * next entry from that point. + */ + iter->skip = 0; + iter->slot++; + } + + return __rhashtable_walk_find_next(iter); +} EXPORT_SYMBOL_GPL(rhashtable_walk_next); +/** + * rhashtable_walk_peek - Return the next object but don't advance the iterator + * @iter: Hash table iterator + * + * Returns the next object or NULL when the end of the table is reached. + * + * Returns -EAGAIN if resize event occurred. Note that the iterator + * will rewind back to the beginning and you may continue to use it. + */ +void *rhashtable_walk_peek(struct rhashtable_iter *iter) +{ + struct rhlist_head *list = iter->list; + struct rhashtable *ht = iter->ht; + struct rhash_head *p = iter->p; + + if (p) + return rht_obj(ht, ht->rhlist ? &list->rhead : p); + + /* No object found in current iter, find next one in the table. */ + + if (iter->skip) { + /* A nonzero skip value points to the next entry in the table + * beyond that last one that was found. Decrement skip so + * we find the current value. __rhashtable_walk_find_next + * will restore the original value of skip assuming that + * the table hasn't changed. + */ + iter->skip--; + } + + return __rhashtable_walk_find_next(iter); +} +EXPORT_SYMBOL_GPL(rhashtable_walk_peek); + /** * rhashtable_walk_stop - Finish a hash table walk * @iter: Hash table iterator -- cgit From 92f36cca5773cbaa78c46ccf49503964a52da294 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:44 -0800 Subject: spinlock: Add library function to allocate spinlock buckets array Add two new library functions: alloc_bucket_spinlocks and free_bucket_spinlocks. These are used to allocate and free an array of spinlocks that are useful as locks for hash buckets. The interface specifies the maximum number of spinlocks in the array as well as a CPU multiplier to derive the number of spinlocks to allocate. The number allocated is rounded up to a power of two to make the array amenable to hash lookup. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- lib/Makefile | 2 +- lib/bucket_locks.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 lib/bucket_locks.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index d11c48ec8ffd..a6c8529dd9b2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -39,7 +39,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \ - once.o refcount.o usercopy.o errseq.o + once.o refcount.o usercopy.o errseq.o bucket_locks.o obj-$(CONFIG_STRING_SELFTEST) += test_string.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c new file mode 100644 index 000000000000..266a97c5708b --- /dev/null +++ b/lib/bucket_locks.c @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include + +/* Allocate an array of spinlocks to be accessed by a hash. Two arguments + * indicate the number of elements to allocate in the array. max_size + * gives the maximum number of elements to allocate. cpu_mult gives + * the number of locks per CPU to allocate. The size is rounded up + * to a power of 2 to be suitable as a hash table. + */ + +int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask, + size_t max_size, unsigned int cpu_mult, gfp_t gfp) +{ + spinlock_t *tlocks = NULL; + unsigned int i, size; +#if defined(CONFIG_PROVE_LOCKING) + unsigned int nr_pcpus = 2; +#else + unsigned int nr_pcpus = num_possible_cpus(); +#endif + + if (cpu_mult) { + nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL); + size = min_t(unsigned int, nr_pcpus * cpu_mult, max_size); + } else { + size = max_size; + } + + if (sizeof(spinlock_t) != 0) { + if (gfpflags_allow_blocking(gfp)) + tlocks = kvmalloc(size * sizeof(spinlock_t), gfp); + else + tlocks = kmalloc_array(size, sizeof(spinlock_t), gfp); + if (!tlocks) + return -ENOMEM; + for (i = 0; i < size; i++) + spin_lock_init(&tlocks[i]); + } + + *locks = tlocks; + *locks_mask = size - 1; + + return 0; +} +EXPORT_SYMBOL(alloc_bucket_spinlocks); + +void free_bucket_spinlocks(spinlock_t *locks) +{ + kvfree(locks); +} +EXPORT_SYMBOL(free_bucket_spinlocks); -- cgit From 64e0cd0d3540dbbdf6661943025409e6b31d5178 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:45 -0800 Subject: rhashtable: Call library function alloc_bucket_locks To allocate the array of bucket locks for the hash table we now call library function alloc_bucket_spinlocks. This function is based on the old alloc_bucket_locks in rhashtable and should produce the same effect. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- lib/rhashtable.c | 47 ++++++++--------------------------------------- 1 file changed, 8 insertions(+), 39 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 6fc52d82efe6..3825c30aaa36 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -65,42 +65,6 @@ EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); #define ASSERT_RHT_MUTEX(HT) #endif - -static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, - gfp_t gfp) -{ - unsigned int i, size; -#if defined(CONFIG_PROVE_LOCKING) - unsigned int nr_pcpus = 2; -#else - unsigned int nr_pcpus = num_possible_cpus(); -#endif - - nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL); - size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); - - /* Never allocate more than 0.5 locks per bucket */ - size = min_t(unsigned int, size, tbl->size >> 1); - - if (tbl->nest) - size = min(size, 1U << tbl->nest); - - if (sizeof(spinlock_t) != 0) { - if (gfpflags_allow_blocking(gfp)) - tbl->locks = kvmalloc(size * sizeof(spinlock_t), gfp); - else - tbl->locks = kmalloc_array(size, sizeof(spinlock_t), - gfp); - if (!tbl->locks) - return -ENOMEM; - for (i = 0; i < size; i++) - spin_lock_init(&tbl->locks[i]); - } - tbl->locks_mask = size - 1; - - return 0; -} - static void nested_table_free(union nested_table *ntbl, unsigned int size) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); @@ -140,7 +104,7 @@ static void bucket_table_free(const struct bucket_table *tbl) if (tbl->nest) nested_bucket_table_free(tbl); - kvfree(tbl->locks); + free_bucket_spinlocks(tbl->locks); kvfree(tbl); } @@ -207,7 +171,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, gfp_t gfp) { struct bucket_table *tbl = NULL; - size_t size; + size_t size, max_locks; int i; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); @@ -227,7 +191,12 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, tbl->size = size; - if (alloc_bucket_locks(ht, tbl, gfp) < 0) { + max_locks = size >> 1; + if (tbl->nest) + max_locks = min_t(size_t, max_locks, 1U << tbl->nest); + + if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks, + ht->p.locks_mul, gfp) < 0) { bucket_table_free(tbl); return NULL; } -- cgit From 540adea3809f61115d2a1ea4ed6e627613452ba1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 13 Jan 2018 02:55:03 +0900 Subject: error-injection: Separate error-injection from kprobe Since error-injection framework is not limited to be used by kprobes, nor bpf. Other kernel subsystems can use it freely for checking safeness of error-injection, e.g. livepatch, ftrace etc. So this separate error-injection framework from kprobes. Some differences has been made: - "kprobe" word is removed from any APIs/structures. - BPF_ALLOW_ERROR_INJECTION() is renamed to ALLOW_ERROR_INJECTION() since it is not limited for BPF too. - CONFIG_FUNCTION_ERROR_INJECTION is the config item of this feature. It is automatically enabled if the arch supports error injection feature for kprobe or ftrace etc. Signed-off-by: Masami Hiramatsu Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- lib/Kconfig.debug | 4 + lib/Makefile | 1 + lib/error-inject.c | 213 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 218 insertions(+) create mode 100644 lib/error-inject.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9d5b78aad4c5..2a33efdd1fea 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1500,6 +1500,10 @@ config FAULT_INJECTION Provide fault-injection framework. For more details, see Documentation/fault-injection/. +config FUNCTION_ERROR_INJECTION + def_bool y + depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + config FAILSLAB bool "Fault-injection capability for kmalloc" depends on FAULT_INJECTION diff --git a/lib/Makefile b/lib/Makefile index a6c8529dd9b2..75ec13778cd8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -149,6 +149,7 @@ obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \ of-reconfig-notifier-error-inject.o +obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_GENERIC_BUG) += bug.o diff --git a/lib/error-inject.c b/lib/error-inject.c new file mode 100644 index 000000000000..bccadcf3c981 --- /dev/null +++ b/lib/error-inject.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +// error-inject.c: Function-level error injection table +#include +#include +#include +#include +#include +#include +#include +#include + +/* Whitelist of symbols that can be overridden for error injection. */ +static LIST_HEAD(error_injection_list); +static DEFINE_MUTEX(ei_mutex); +struct ei_entry { + struct list_head list; + unsigned long start_addr; + unsigned long end_addr; + void *priv; +}; + +bool within_error_injection_list(unsigned long addr) +{ + struct ei_entry *ent; + bool ret = false; + + mutex_lock(&ei_mutex); + list_for_each_entry(ent, &error_injection_list, list) { + if (addr >= ent->start_addr && addr < ent->end_addr) { + ret = true; + break; + } + } + mutex_unlock(&ei_mutex); + return ret; +} + +/* + * Lookup and populate the error_injection_list. + * + * For safety reasons we only allow certain functions to be overridden with + * bpf_error_injection, so we need to populate the list of the symbols that have + * been marked as safe for overriding. + */ +static void populate_error_injection_list(unsigned long *start, + unsigned long *end, void *priv) +{ + unsigned long *iter; + struct ei_entry *ent; + unsigned long entry, offset = 0, size = 0; + + mutex_lock(&ei_mutex); + for (iter = start; iter < end; iter++) { + entry = arch_deref_entry_point((void *)*iter); + + if (!kernel_text_address(entry) || + !kallsyms_lookup_size_offset(entry, &size, &offset)) { + pr_err("Failed to find error inject entry at %p\n", + (void *)entry); + continue; + } + + ent = kmalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + break; + ent->start_addr = entry; + ent->end_addr = entry + size; + ent->priv = priv; + INIT_LIST_HEAD(&ent->list); + list_add_tail(&ent->list, &error_injection_list); + } + mutex_unlock(&ei_mutex); +} + +/* Markers of the _error_inject_whitelist section */ +extern unsigned long __start_error_injection_whitelist[]; +extern unsigned long __stop_error_injection_whitelist[]; + +static void __init populate_kernel_ei_list(void) +{ + populate_error_injection_list(__start_error_injection_whitelist, + __stop_error_injection_whitelist, + NULL); +} + +#ifdef CONFIG_MODULES +static void module_load_ei_list(struct module *mod) +{ + if (!mod->num_ei_funcs) + return; + + populate_error_injection_list(mod->ei_funcs, + mod->ei_funcs + mod->num_ei_funcs, mod); +} + +static void module_unload_ei_list(struct module *mod) +{ + struct ei_entry *ent, *n; + + if (!mod->num_ei_funcs) + return; + + mutex_lock(&ei_mutex); + list_for_each_entry_safe(ent, n, &error_injection_list, list) { + if (ent->priv == mod) { + list_del_init(&ent->list); + kfree(ent); + } + } + mutex_unlock(&ei_mutex); +} + +/* Module notifier call back, checking error injection table on the module */ +static int ei_module_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct module *mod = data; + + if (val == MODULE_STATE_COMING) + module_load_ei_list(mod); + else if (val == MODULE_STATE_GOING) + module_unload_ei_list(mod); + + return NOTIFY_DONE; +} + +static struct notifier_block ei_module_nb = { + .notifier_call = ei_module_callback, + .priority = 0 +}; + +static __init int module_ei_init(void) +{ + return register_module_notifier(&ei_module_nb); +} +#else /* !CONFIG_MODULES */ +#define module_ei_init() (0) +#endif + +/* + * error_injection/whitelist -- shows which functions can be overridden for + * error injection. + */ +static void *ei_seq_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&ei_mutex); + return seq_list_start(&error_injection_list, *pos); +} + +static void ei_seq_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&ei_mutex); +} + +static void *ei_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &error_injection_list, pos); +} + +static int ei_seq_show(struct seq_file *m, void *v) +{ + struct ei_entry *ent = list_entry(v, struct ei_entry, list); + + seq_printf(m, "%pf\n", (void *)ent->start_addr); + return 0; +} + +static const struct seq_operations ei_seq_ops = { + .start = ei_seq_start, + .next = ei_seq_next, + .stop = ei_seq_stop, + .show = ei_seq_show, +}; + +static int ei_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &ei_seq_ops); +} + +static const struct file_operations debugfs_ei_ops = { + .open = ei_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init ei_debugfs_init(void) +{ + struct dentry *dir, *file; + + dir = debugfs_create_dir("error_injection", NULL); + if (!dir) + return -ENOMEM; + + file = debugfs_create_file("list", 0444, dir, NULL, &debugfs_ei_ops); + if (!file) { + debugfs_remove(dir); + return -ENOMEM; + } + + return 0; +} + +static int __init init_error_injection(void) +{ + populate_kernel_ei_list(); + + if (!module_ei_init()) + ei_debugfs_init(); + + return 0; +} +late_initcall(init_error_injection); -- cgit From 663faf9f7beeaca4ad0176bb96c776eed9dad0c5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 13 Jan 2018 02:55:33 +0900 Subject: error-injection: Add injectable error types Add injectable error types for each error-injectable function. One motivation of error injection test is to find software flaws, mistakes or mis-handlings of expectable errors. If we find such flaws by the test, that is a program bug, so we need to fix it. But if the tester miss input the error (e.g. just return success code without processing anything), it causes unexpected behavior even if the caller is correctly programmed to handle any errors. That is not what we want to test by error injection. To clarify what type of errors the caller must expect for each injectable function, this introduces injectable error types: - EI_ETYPE_NULL : means the function will return NULL if it fails. No ERR_PTR, just a NULL. - EI_ETYPE_ERRNO : means the function will return -ERRNO if it fails. - EI_ETYPE_ERRNO_NULL : means the function will return -ERRNO (ERR_PTR) or NULL. ALLOW_ERROR_INJECTION() macro is expanded to get one of NULL, ERRNO, ERRNO_NULL to record the error type for each function. e.g. ALLOW_ERROR_INJECTION(open_ctree, ERRNO) This error types are shown in debugfs as below. ==== / # cat /sys/kernel/debug/error_injection/list open_ctree [btrfs] ERRNO io_ctl_init [btrfs] ERRNO ==== Signed-off-by: Masami Hiramatsu Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- lib/error-inject.c | 43 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/error-inject.c b/lib/error-inject.c index bccadcf3c981..c0d4600f4896 100644 --- a/lib/error-inject.c +++ b/lib/error-inject.c @@ -16,6 +16,7 @@ struct ei_entry { struct list_head list; unsigned long start_addr; unsigned long end_addr; + int etype; void *priv; }; @@ -35,6 +36,17 @@ bool within_error_injection_list(unsigned long addr) return ret; } +int get_injectable_error_type(unsigned long addr) +{ + struct ei_entry *ent; + + list_for_each_entry(ent, &error_injection_list, list) { + if (addr >= ent->start_addr && addr < ent->end_addr) + return ent->etype; + } + return EI_ETYPE_NONE; +} + /* * Lookup and populate the error_injection_list. * @@ -42,16 +54,17 @@ bool within_error_injection_list(unsigned long addr) * bpf_error_injection, so we need to populate the list of the symbols that have * been marked as safe for overriding. */ -static void populate_error_injection_list(unsigned long *start, - unsigned long *end, void *priv) +static void populate_error_injection_list(struct error_injection_entry *start, + struct error_injection_entry *end, + void *priv) { - unsigned long *iter; + struct error_injection_entry *iter; struct ei_entry *ent; unsigned long entry, offset = 0, size = 0; mutex_lock(&ei_mutex); for (iter = start; iter < end; iter++) { - entry = arch_deref_entry_point((void *)*iter); + entry = arch_deref_entry_point((void *)iter->addr); if (!kernel_text_address(entry) || !kallsyms_lookup_size_offset(entry, &size, &offset)) { @@ -65,6 +78,7 @@ static void populate_error_injection_list(unsigned long *start, break; ent->start_addr = entry; ent->end_addr = entry + size; + ent->etype = iter->etype; ent->priv = priv; INIT_LIST_HEAD(&ent->list); list_add_tail(&ent->list, &error_injection_list); @@ -73,8 +87,8 @@ static void populate_error_injection_list(unsigned long *start, } /* Markers of the _error_inject_whitelist section */ -extern unsigned long __start_error_injection_whitelist[]; -extern unsigned long __stop_error_injection_whitelist[]; +extern struct error_injection_entry __start_error_injection_whitelist[]; +extern struct error_injection_entry __stop_error_injection_whitelist[]; static void __init populate_kernel_ei_list(void) { @@ -157,11 +171,26 @@ static void *ei_seq_next(struct seq_file *m, void *v, loff_t *pos) return seq_list_next(v, &error_injection_list, pos); } +static const char *error_type_string(int etype) +{ + switch (etype) { + case EI_ETYPE_NULL: + return "NULL"; + case EI_ETYPE_ERRNO: + return "ERRNO"; + case EI_ETYPE_ERRNO_NULL: + return "ERRNO_NULL"; + default: + return "(unknown)"; + } +} + static int ei_seq_show(struct seq_file *m, void *v) { struct ei_entry *ent = list_entry(v, struct ei_entry, list); - seq_printf(m, "%pf\n", (void *)ent->start_addr); + seq_printf(m, "%pf\t%s\n", (void *)ent->start_addr, + error_type_string(ent->etype)); return 0; } -- cgit From 4b1a29a7f5425d32640b34b8a755f34e02f64d0f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 13 Jan 2018 02:56:03 +0900 Subject: error-injection: Support fault injection framework Support in-kernel fault-injection framework via debugfs. This allows you to inject a conditional error to specified function using debugfs interfaces. Here is the result of test script described in Documentation/fault-injection/fault-injection.txt =========== # ./test_fail_function.sh 1+0 records in 1+0 records out 1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.0227404 s, 46.1 MB/s btrfs-progs v4.4 See http://btrfs.wiki.kernel.org for more information. Label: (null) UUID: bfa96010-12e9-4360-aed0-42eec7af5798 Node size: 16384 Sector size: 4096 Filesystem size: 1001.00MiB Block group profiles: Data: single 8.00MiB Metadata: DUP 58.00MiB System: DUP 12.00MiB SSD detected: no Incompat features: extref, skinny-metadata Number of devices: 1 Devices: ID SIZE PATH 1 1001.00MiB /dev/loop2 mount: mount /dev/loop2 on /opt/tmpmnt failed: Cannot allocate memory SUCCESS! =========== Signed-off-by: Masami Hiramatsu Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- lib/Kconfig.debug | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2a33efdd1fea..890d4766cef3 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1551,6 +1551,16 @@ config FAIL_FUTEX help Provide fault-injection capability for futexes. +config FAIL_FUNCTION + bool "Fault-injection capability for functions" + depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION + help + Provide function-based fault-injection capability. + This will allow you to override a specific function with a return + with given return value. As a result, function caller will see + an error value and have to handle it. This is useful to test the + error handling in various subsystems. + config FAULT_INJECTION_DEBUG_FS bool "Debugfs entries for fault-injection capabilities" depends on FAULT_INJECTION && SYSFS && DEBUG_FS -- cgit From fcd1c9177195489c40198d2769649439dd88505b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 20 Jan 2018 01:24:31 +0100 Subject: bpf: add couple of test cases for signed extended imms Add a couple of test cases for interpreter and JIT that are related to an issue we faced some time ago in Cilium [1], which is fixed in LLVM with commit e53750e1e086 ("bpf: fix bug on silently truncating 64-bit immediate"). Test cases were run-time checking kernel to behave as intended which should also provide some guidance for current or new JITs in case they should trip over this. Added for cBPF and eBPF. [1] https://github.com/cilium/cilium/pull/2162 Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- lib/test_bpf.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) (limited to 'lib') diff --git a/lib/test_bpf.c b/lib/test_bpf.c index f369889e521d..e3938e395cba 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -6109,6 +6109,110 @@ static struct bpf_test tests[] = { { { ETH_HLEN, 42 } }, .fill_helper = bpf_fill_ld_abs_vlan_push_pop2, }, + /* Checking interpreter vs JIT wrt signed extended imms. */ + { + "JNE signed compare, test 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), + BPF_MOV64_REG(R2, R1), + BPF_ALU64_REG(BPF_AND, R2, R3), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R2, -17104896, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JNE signed compare, test 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), + BPF_MOV64_REG(R2, R1), + BPF_ALU64_REG(BPF_AND, R2, R3), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R2, 0xfefb0000, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JNE signed compare, test 3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), + BPF_ALU32_IMM(BPF_MOV, R4, 0xfefb0000), + BPF_MOV64_REG(R2, R1), + BPF_ALU64_REG(BPF_AND, R2, R3), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_REG(BPF_JNE, R2, R4, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "JNE signed compare, test 4", + .u.insns_int = { + BPF_LD_IMM64(R1, -17104896), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R1, -17104896, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "JNE signed compare, test 5", + .u.insns_int = { + BPF_LD_IMM64(R1, 0xfefb0000), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R1, 0xfefb0000, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JNE signed compare, test 6", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x7efb0000), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP_IMM(BPF_JNE, R1, 0x7efb0000, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "JNE signed compare, test 7", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0xffff0000), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xfefbbc12), + BPF_STMT(BPF_ALU | BPF_AND | BPF_X, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0xfefb0000, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_RET | BPF_K, 2), + }, + CLASSIC | FLAG_NO_DATA, + {}, + { { 0, 2 } }, + }, }; static struct net_device dev; -- cgit From 21ccaf21497b72f42133182716a42dbf573d314b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 26 Jan 2018 23:33:48 +0100 Subject: bpf: add further test cases around div/mod and others Update selftests to relfect recent changes and add various new test cases. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- lib/test_bpf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/test_bpf.c b/lib/test_bpf.c index e3938e395cba..4cd9ea9b3449 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -2003,10 +2003,14 @@ static struct bpf_test tests[] = { { { 4, 0 }, { 5, 10 } } }, { - "INT: DIV by zero", + /* This one doesn't go through verifier, but is just raw insn + * as opposed to cBPF tests from here. Thus div by 0 tests are + * done in test_verifier in BPF kselftests. + */ + "INT: DIV by -1", .u.insns_int = { BPF_ALU64_REG(BPF_MOV, R6, R1), - BPF_ALU64_IMM(BPF_MOV, R7, 0), + BPF_ALU64_IMM(BPF_MOV, R7, -1), BPF_LD_ABS(BPF_B, 3), BPF_ALU32_REG(BPF_DIV, R0, R7), BPF_EXIT_INSN(), -- cgit