From 97a6ec4ac021f7fbec05c15a3aa0c4aaf0461af5 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@quantonium.net>
Date: Mon, 4 Dec 2017 10:31:41 -0800
Subject: rhashtable: Change rhashtable_walk_start to return void

Most callers of rhashtable_walk_start don't care about a resize event
which is indicated by a return value of -EAGAIN. So calls to
rhashtable_walk_start are wrapped wih code to ignore -EAGAIN. Something
like this is common:

       ret = rhashtable_walk_start(rhiter);
       if (ret && ret != -EAGAIN)
               goto out;

Since zero and -EAGAIN are the only possible return values from the
function this check is pointless. The condition never evaluates to true.

This patch changes rhashtable_walk_start to return void. This simplifies
code for the callers that ignore -EAGAIN. For the few cases where the
caller cares about the resize event, particularly where the table can be
walked in mulitple parts for netlink or seq file dump, the function
rhashtable_walk_start_check has been added that returns -EAGAIN on a
resize event.

Signed-off-by: Tom Herbert <tom@quantonium.net>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c      | 10 +++++++---
 lib/test_rhashtable.c |  6 +-----
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index ddd7dde87c3c..1935e86ed477 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -732,7 +732,7 @@ void rhashtable_walk_exit(struct rhashtable_iter *iter)
 EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
 
 /**
- * rhashtable_walk_start - Start a hash table walk
+ * rhashtable_walk_start_check - Start a hash table walk
  * @iter:	Hash table iterator
  *
  * Start a hash table walk at the current iterator position.  Note that we take
@@ -744,8 +744,12 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
  * Returns -EAGAIN if resize event occured.  Note that the iterator
  * will rewind back to the beginning and you may use it immediately
  * by calling rhashtable_walk_next.
+ *
+ * rhashtable_walk_start is defined as an inline variant that returns
+ * void. This is preferred in cases where the caller would ignore
+ * resize events and always continue.
  */
-int rhashtable_walk_start(struct rhashtable_iter *iter)
+int rhashtable_walk_start_check(struct rhashtable_iter *iter)
 	__acquires(RCU)
 {
 	struct rhashtable *ht = iter->ht;
@@ -764,7 +768,7 @@ int rhashtable_walk_start(struct rhashtable_iter *iter)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(rhashtable_walk_start);
+EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
 
 /**
  * rhashtable_walk_next - Return the next object and advance the iterator
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 8e83cbdc049c..76d3667fdea2 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -162,11 +162,7 @@ static void test_bucket_stats(struct rhashtable *ht, unsigned int entries)
 		return;
 	}
 
-	err = rhashtable_walk_start(&hti);
-	if (err && err != -EAGAIN) {
-		pr_warn("Test failed: iterator failed: %d\n", err);
-		return;
-	}
+	rhashtable_walk_start(&hti);
 
 	while ((pos = rhashtable_walk_next(&hti))) {
 		if (PTR_ERR(pos) == -EAGAIN) {
-- 
cgit 


From 2db54b475ae918d274bfc276416c384ba95e9f94 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@quantonium.net>
Date: Mon, 4 Dec 2017 10:31:42 -0800
Subject: rhashtable: Add rhastable_walk_peek

This function is like rhashtable_walk_next except that it only returns
the current element in the inter and does not advance the iter.

This patch also creates __rhashtable_walk_find_next. It finds the next
element in the table when the entry cached in iter is NULL or at the end
of a slot. __rhashtable_walk_find_next is called from
rhashtable_walk_next and rhastable_walk_peek.

end_of_table is an added field to the iter structure. This indicates
that the end of table was reached (walker.tbl being NULL is not a
sufficient condition for end of table).

Signed-off-by: Tom Herbert <tom@quantonium.net>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 87 insertions(+), 16 deletions(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 1935e86ed477..6fc52d82efe6 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -707,6 +707,7 @@ void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
 	iter->p = NULL;
 	iter->slot = 0;
 	iter->skip = 0;
+	iter->end_of_table = 0;
 
 	spin_lock(&ht->lock);
 	iter->walker.tbl =
@@ -761,7 +762,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
 		list_del(&iter->walker.list);
 	spin_unlock(&ht->lock);
 
-	if (!iter->walker.tbl) {
+	if (!iter->walker.tbl && !iter->end_of_table) {
 		iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
 		return -EAGAIN;
 	}
@@ -771,18 +772,16 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
 EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
 
 /**
- * rhashtable_walk_next - Return the next object and advance the iterator
+ * __rhashtable_walk_find_next - Find the next element in a table (or the first
+ * one in case of a new walk).
+ *
  * @iter:	Hash table iterator
  *
- * Note that you must call rhashtable_walk_stop when you are finished
- * with the walk.
+ * Returns the found object or NULL when the end of the table is reached.
  *
- * Returns the next object or NULL when the end of the table is reached.
- *
- * Returns -EAGAIN if resize event occured.  Note that the iterator
- * will rewind back to the beginning and you may continue to use it.
+ * Returns -EAGAIN if resize event occurred.
  */
-void *rhashtable_walk_next(struct rhashtable_iter *iter)
+static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter)
 {
 	struct bucket_table *tbl = iter->walker.tbl;
 	struct rhlist_head *list = iter->list;
@@ -790,13 +789,8 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
 	struct rhash_head *p = iter->p;
 	bool rhlist = ht->rhlist;
 
-	if (p) {
-		if (!rhlist || !(list = rcu_dereference(list->next))) {
-			p = rcu_dereference(p->next);
-			list = container_of(p, struct rhlist_head, rhead);
-		}
-		goto next;
-	}
+	if (!tbl)
+		return NULL;
 
 	for (; iter->slot < tbl->size; iter->slot++) {
 		int skip = iter->skip;
@@ -840,12 +834,89 @@ next:
 		iter->slot = 0;
 		iter->skip = 0;
 		return ERR_PTR(-EAGAIN);
+	} else {
+		iter->end_of_table = true;
 	}
 
 	return NULL;
 }
+
+/**
+ * rhashtable_walk_next - Return the next object and advance the iterator
+ * @iter:	Hash table iterator
+ *
+ * Note that you must call rhashtable_walk_stop when you are finished
+ * with the walk.
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred.  Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_next(struct rhashtable_iter *iter)
+{
+	struct rhlist_head *list = iter->list;
+	struct rhashtable *ht = iter->ht;
+	struct rhash_head *p = iter->p;
+	bool rhlist = ht->rhlist;
+
+	if (p) {
+		if (!rhlist || !(list = rcu_dereference(list->next))) {
+			p = rcu_dereference(p->next);
+			list = container_of(p, struct rhlist_head, rhead);
+		}
+		if (!rht_is_a_nulls(p)) {
+			iter->skip++;
+			iter->p = p;
+			iter->list = list;
+			return rht_obj(ht, rhlist ? &list->rhead : p);
+		}
+
+		/* At the end of this slot, switch to next one and then find
+		 * next entry from that point.
+		 */
+		iter->skip = 0;
+		iter->slot++;
+	}
+
+	return __rhashtable_walk_find_next(iter);
+}
 EXPORT_SYMBOL_GPL(rhashtable_walk_next);
 
+/**
+ * rhashtable_walk_peek - Return the next object but don't advance the iterator
+ * @iter:	Hash table iterator
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred.  Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_peek(struct rhashtable_iter *iter)
+{
+	struct rhlist_head *list = iter->list;
+	struct rhashtable *ht = iter->ht;
+	struct rhash_head *p = iter->p;
+
+	if (p)
+		return rht_obj(ht, ht->rhlist ? &list->rhead : p);
+
+	/* No object found in current iter, find next one in the table. */
+
+	if (iter->skip) {
+		/* A nonzero skip value points to the next entry in the table
+		 * beyond that last one that was found. Decrement skip so
+		 * we find the current value. __rhashtable_walk_find_next
+		 * will restore the original value of skip assuming that
+		 * the table hasn't changed.
+		 */
+		iter->skip--;
+	}
+
+	return __rhashtable_walk_find_next(iter);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_peek);
+
 /**
  * rhashtable_walk_stop - Finish a hash table walk
  * @iter:	Hash table iterator
-- 
cgit 


From 92f36cca5773cbaa78c46ccf49503964a52da294 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@quantonium.net>
Date: Mon, 4 Dec 2017 10:31:44 -0800
Subject: spinlock: Add library function to allocate spinlock buckets array

Add two new library functions: alloc_bucket_spinlocks and
free_bucket_spinlocks. These are used to allocate and free an array
of spinlocks that are useful as locks for hash buckets. The interface
specifies the maximum number of spinlocks in the array as well
as a CPU multiplier to derive the number of spinlocks to allocate.
The number allocated is rounded up to a power of two to make the
array amenable to hash lookup.

Signed-off-by: Tom Herbert <tom@quantonium.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/Makefile       |  2 +-
 lib/bucket_locks.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 lib/bucket_locks.c

(limited to 'lib')

diff --git a/lib/Makefile b/lib/Makefile
index d11c48ec8ffd..a6c8529dd9b2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -39,7 +39,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
-	 once.o refcount.o usercopy.o errseq.o
+	 once.o refcount.o usercopy.o errseq.o bucket_locks.o
 obj-$(CONFIG_STRING_SELFTEST) += test_string.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c
new file mode 100644
index 000000000000..266a97c5708b
--- /dev/null
+++ b/lib/bucket_locks.c
@@ -0,0 +1,54 @@
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+/* Allocate an array of spinlocks to be accessed by a hash. Two arguments
+ * indicate the number of elements to allocate in the array. max_size
+ * gives the maximum number of elements to allocate. cpu_mult gives
+ * the number of locks per CPU to allocate. The size is rounded up
+ * to a power of 2 to be suitable as a hash table.
+ */
+
+int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
+			   size_t max_size, unsigned int cpu_mult, gfp_t gfp)
+{
+	spinlock_t *tlocks = NULL;
+	unsigned int i, size;
+#if defined(CONFIG_PROVE_LOCKING)
+	unsigned int nr_pcpus = 2;
+#else
+	unsigned int nr_pcpus = num_possible_cpus();
+#endif
+
+	if (cpu_mult) {
+		nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
+		size = min_t(unsigned int, nr_pcpus * cpu_mult, max_size);
+	} else {
+		size = max_size;
+	}
+
+	if (sizeof(spinlock_t) != 0) {
+		if (gfpflags_allow_blocking(gfp))
+			tlocks = kvmalloc(size * sizeof(spinlock_t), gfp);
+		else
+			tlocks = kmalloc_array(size, sizeof(spinlock_t), gfp);
+		if (!tlocks)
+			return -ENOMEM;
+		for (i = 0; i < size; i++)
+			spin_lock_init(&tlocks[i]);
+	}
+
+	*locks = tlocks;
+	*locks_mask = size - 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(alloc_bucket_spinlocks);
+
+void free_bucket_spinlocks(spinlock_t *locks)
+{
+	kvfree(locks);
+}
+EXPORT_SYMBOL(free_bucket_spinlocks);
-- 
cgit 


From 64e0cd0d3540dbbdf6661943025409e6b31d5178 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@quantonium.net>
Date: Mon, 4 Dec 2017 10:31:45 -0800
Subject: rhashtable: Call library function alloc_bucket_locks

To allocate the array of bucket locks for the hash table we now
call library function alloc_bucket_spinlocks. This function is
based on the old alloc_bucket_locks in rhashtable and should
produce the same effect.

Signed-off-by: Tom Herbert <tom@quantonium.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 47 ++++++++---------------------------------------
 1 file changed, 8 insertions(+), 39 deletions(-)

(limited to 'lib')

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 6fc52d82efe6..3825c30aaa36 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -65,42 +65,6 @@ EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
 #define ASSERT_RHT_MUTEX(HT)
 #endif
 
-
-static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
-			      gfp_t gfp)
-{
-	unsigned int i, size;
-#if defined(CONFIG_PROVE_LOCKING)
-	unsigned int nr_pcpus = 2;
-#else
-	unsigned int nr_pcpus = num_possible_cpus();
-#endif
-
-	nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
-	size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
-
-	/* Never allocate more than 0.5 locks per bucket */
-	size = min_t(unsigned int, size, tbl->size >> 1);
-
-	if (tbl->nest)
-		size = min(size, 1U << tbl->nest);
-
-	if (sizeof(spinlock_t) != 0) {
-		if (gfpflags_allow_blocking(gfp))
-			tbl->locks = kvmalloc(size * sizeof(spinlock_t), gfp);
-		else
-			tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
-						   gfp);
-		if (!tbl->locks)
-			return -ENOMEM;
-		for (i = 0; i < size; i++)
-			spin_lock_init(&tbl->locks[i]);
-	}
-	tbl->locks_mask = size - 1;
-
-	return 0;
-}
-
 static void nested_table_free(union nested_table *ntbl, unsigned int size)
 {
 	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
@@ -140,7 +104,7 @@ static void bucket_table_free(const struct bucket_table *tbl)
 	if (tbl->nest)
 		nested_bucket_table_free(tbl);
 
-	kvfree(tbl->locks);
+	free_bucket_spinlocks(tbl->locks);
 	kvfree(tbl);
 }
 
@@ -207,7 +171,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 					       gfp_t gfp)
 {
 	struct bucket_table *tbl = NULL;
-	size_t size;
+	size_t size, max_locks;
 	int i;
 
 	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
@@ -227,7 +191,12 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 
 	tbl->size = size;
 
-	if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
+	max_locks = size >> 1;
+	if (tbl->nest)
+		max_locks = min_t(size_t, max_locks, 1U << tbl->nest);
+
+	if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks,
+				   ht->p.locks_mul, gfp) < 0) {
 		bucket_table_free(tbl);
 		return NULL;
 	}
-- 
cgit 


From 540adea3809f61115d2a1ea4ed6e627613452ba1 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 13 Jan 2018 02:55:03 +0900
Subject: error-injection: Separate error-injection from kprobe

Since error-injection framework is not limited to be used
by kprobes, nor bpf. Other kernel subsystems can use it
freely for checking safeness of error-injection, e.g.
livepatch, ftrace etc.
So this separate error-injection framework from kprobes.

Some differences has been made:

- "kprobe" word is removed from any APIs/structures.
- BPF_ALLOW_ERROR_INJECTION() is renamed to
  ALLOW_ERROR_INJECTION() since it is not limited for BPF too.
- CONFIG_FUNCTION_ERROR_INJECTION is the config item of this
  feature. It is automatically enabled if the arch supports
  error injection feature for kprobe or ftrace etc.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 lib/Kconfig.debug  |   4 +
 lib/Makefile       |   1 +
 lib/error-inject.c | 213 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 218 insertions(+)
 create mode 100644 lib/error-inject.c

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9d5b78aad4c5..2a33efdd1fea 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1500,6 +1500,10 @@ config FAULT_INJECTION
 	  Provide fault-injection framework.
 	  For more details, see Documentation/fault-injection/.
 
+config FUNCTION_ERROR_INJECTION
+	def_bool y
+	depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
+
 config FAILSLAB
 	bool "Fault-injection capability for kmalloc"
 	depends on FAULT_INJECTION
diff --git a/lib/Makefile b/lib/Makefile
index a6c8529dd9b2..75ec13778cd8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -149,6 +149,7 @@ obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
 obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
 obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
 	of-reconfig-notifier-error-inject.o
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
diff --git a/lib/error-inject.c b/lib/error-inject.c
new file mode 100644
index 000000000000..bccadcf3c981
--- /dev/null
+++ b/lib/error-inject.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+// error-inject.c: Function-level error injection table
+#include <linux/error-injection.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+/* Whitelist of symbols that can be overridden for error injection. */
+static LIST_HEAD(error_injection_list);
+static DEFINE_MUTEX(ei_mutex);
+struct ei_entry {
+	struct list_head list;
+	unsigned long start_addr;
+	unsigned long end_addr;
+	void *priv;
+};
+
+bool within_error_injection_list(unsigned long addr)
+{
+	struct ei_entry *ent;
+	bool ret = false;
+
+	mutex_lock(&ei_mutex);
+	list_for_each_entry(ent, &error_injection_list, list) {
+		if (addr >= ent->start_addr && addr < ent->end_addr) {
+			ret = true;
+			break;
+		}
+	}
+	mutex_unlock(&ei_mutex);
+	return ret;
+}
+
+/*
+ * Lookup and populate the error_injection_list.
+ *
+ * For safety reasons we only allow certain functions to be overridden with
+ * bpf_error_injection, so we need to populate the list of the symbols that have
+ * been marked as safe for overriding.
+ */
+static void populate_error_injection_list(unsigned long *start,
+					  unsigned long *end, void *priv)
+{
+	unsigned long *iter;
+	struct ei_entry *ent;
+	unsigned long entry, offset = 0, size = 0;
+
+	mutex_lock(&ei_mutex);
+	for (iter = start; iter < end; iter++) {
+		entry = arch_deref_entry_point((void *)*iter);
+
+		if (!kernel_text_address(entry) ||
+		    !kallsyms_lookup_size_offset(entry, &size, &offset)) {
+			pr_err("Failed to find error inject entry at %p\n",
+				(void *)entry);
+			continue;
+		}
+
+		ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+		if (!ent)
+			break;
+		ent->start_addr = entry;
+		ent->end_addr = entry + size;
+		ent->priv = priv;
+		INIT_LIST_HEAD(&ent->list);
+		list_add_tail(&ent->list, &error_injection_list);
+	}
+	mutex_unlock(&ei_mutex);
+}
+
+/* Markers of the _error_inject_whitelist section */
+extern unsigned long __start_error_injection_whitelist[];
+extern unsigned long __stop_error_injection_whitelist[];
+
+static void __init populate_kernel_ei_list(void)
+{
+	populate_error_injection_list(__start_error_injection_whitelist,
+				      __stop_error_injection_whitelist,
+				      NULL);
+}
+
+#ifdef CONFIG_MODULES
+static void module_load_ei_list(struct module *mod)
+{
+	if (!mod->num_ei_funcs)
+		return;
+
+	populate_error_injection_list(mod->ei_funcs,
+				      mod->ei_funcs + mod->num_ei_funcs, mod);
+}
+
+static void module_unload_ei_list(struct module *mod)
+{
+	struct ei_entry *ent, *n;
+
+	if (!mod->num_ei_funcs)
+		return;
+
+	mutex_lock(&ei_mutex);
+	list_for_each_entry_safe(ent, n, &error_injection_list, list) {
+		if (ent->priv == mod) {
+			list_del_init(&ent->list);
+			kfree(ent);
+		}
+	}
+	mutex_unlock(&ei_mutex);
+}
+
+/* Module notifier call back, checking error injection table on the module */
+static int ei_module_callback(struct notifier_block *nb,
+			      unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	if (val == MODULE_STATE_COMING)
+		module_load_ei_list(mod);
+	else if (val == MODULE_STATE_GOING)
+		module_unload_ei_list(mod);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ei_module_nb = {
+	.notifier_call = ei_module_callback,
+	.priority = 0
+};
+
+static __init int module_ei_init(void)
+{
+	return register_module_notifier(&ei_module_nb);
+}
+#else /* !CONFIG_MODULES */
+#define module_ei_init()	(0)
+#endif
+
+/*
+ * error_injection/whitelist -- shows which functions can be overridden for
+ * error injection.
+ */
+static void *ei_seq_start(struct seq_file *m, loff_t *pos)
+{
+	mutex_lock(&ei_mutex);
+	return seq_list_start(&error_injection_list, *pos);
+}
+
+static void ei_seq_stop(struct seq_file *m, void *v)
+{
+	mutex_unlock(&ei_mutex);
+}
+
+static void *ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return seq_list_next(v, &error_injection_list, pos);
+}
+
+static int ei_seq_show(struct seq_file *m, void *v)
+{
+	struct ei_entry *ent = list_entry(v, struct ei_entry, list);
+
+	seq_printf(m, "%pf\n", (void *)ent->start_addr);
+	return 0;
+}
+
+static const struct seq_operations ei_seq_ops = {
+	.start = ei_seq_start,
+	.next  = ei_seq_next,
+	.stop  = ei_seq_stop,
+	.show  = ei_seq_show,
+};
+
+static int ei_open(struct inode *inode, struct file *filp)
+{
+	return seq_open(filp, &ei_seq_ops);
+}
+
+static const struct file_operations debugfs_ei_ops = {
+	.open           = ei_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
+static int __init ei_debugfs_init(void)
+{
+	struct dentry *dir, *file;
+
+	dir = debugfs_create_dir("error_injection", NULL);
+	if (!dir)
+		return -ENOMEM;
+
+	file = debugfs_create_file("list", 0444, dir, NULL, &debugfs_ei_ops);
+	if (!file) {
+		debugfs_remove(dir);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int __init init_error_injection(void)
+{
+	populate_kernel_ei_list();
+
+	if (!module_ei_init())
+		ei_debugfs_init();
+
+	return 0;
+}
+late_initcall(init_error_injection);
-- 
cgit 


From 663faf9f7beeaca4ad0176bb96c776eed9dad0c5 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 13 Jan 2018 02:55:33 +0900
Subject: error-injection: Add injectable error types

Add injectable error types for each error-injectable function.

One motivation of error injection test is to find software flaws,
mistakes or mis-handlings of expectable errors. If we find such
flaws by the test, that is a program bug, so we need to fix it.

But if the tester miss input the error (e.g. just return success
code without processing anything), it causes unexpected behavior
even if the caller is correctly programmed to handle any errors.
That is not what we want to test by error injection.

To clarify what type of errors the caller must expect for each
injectable function, this introduces injectable error types:

 - EI_ETYPE_NULL : means the function will return NULL if it
		    fails. No ERR_PTR, just a NULL.
 - EI_ETYPE_ERRNO : means the function will return -ERRNO
		    if it fails.
 - EI_ETYPE_ERRNO_NULL : means the function will return -ERRNO
		       (ERR_PTR) or NULL.

ALLOW_ERROR_INJECTION() macro is expanded to get one of
NULL, ERRNO, ERRNO_NULL to record the error type for
each function. e.g.

 ALLOW_ERROR_INJECTION(open_ctree, ERRNO)

This error types are shown in debugfs as below.

  ====
  / # cat /sys/kernel/debug/error_injection/list
  open_ctree [btrfs]	ERRNO
  io_ctl_init [btrfs]	ERRNO
  ====

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 lib/error-inject.c | 43 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/lib/error-inject.c b/lib/error-inject.c
index bccadcf3c981..c0d4600f4896 100644
--- a/lib/error-inject.c
+++ b/lib/error-inject.c
@@ -16,6 +16,7 @@ struct ei_entry {
 	struct list_head list;
 	unsigned long start_addr;
 	unsigned long end_addr;
+	int etype;
 	void *priv;
 };
 
@@ -35,6 +36,17 @@ bool within_error_injection_list(unsigned long addr)
 	return ret;
 }
 
+int get_injectable_error_type(unsigned long addr)
+{
+	struct ei_entry *ent;
+
+	list_for_each_entry(ent, &error_injection_list, list) {
+		if (addr >= ent->start_addr && addr < ent->end_addr)
+			return ent->etype;
+	}
+	return EI_ETYPE_NONE;
+}
+
 /*
  * Lookup and populate the error_injection_list.
  *
@@ -42,16 +54,17 @@ bool within_error_injection_list(unsigned long addr)
  * bpf_error_injection, so we need to populate the list of the symbols that have
  * been marked as safe for overriding.
  */
-static void populate_error_injection_list(unsigned long *start,
-					  unsigned long *end, void *priv)
+static void populate_error_injection_list(struct error_injection_entry *start,
+					  struct error_injection_entry *end,
+					  void *priv)
 {
-	unsigned long *iter;
+	struct error_injection_entry *iter;
 	struct ei_entry *ent;
 	unsigned long entry, offset = 0, size = 0;
 
 	mutex_lock(&ei_mutex);
 	for (iter = start; iter < end; iter++) {
-		entry = arch_deref_entry_point((void *)*iter);
+		entry = arch_deref_entry_point((void *)iter->addr);
 
 		if (!kernel_text_address(entry) ||
 		    !kallsyms_lookup_size_offset(entry, &size, &offset)) {
@@ -65,6 +78,7 @@ static void populate_error_injection_list(unsigned long *start,
 			break;
 		ent->start_addr = entry;
 		ent->end_addr = entry + size;
+		ent->etype = iter->etype;
 		ent->priv = priv;
 		INIT_LIST_HEAD(&ent->list);
 		list_add_tail(&ent->list, &error_injection_list);
@@ -73,8 +87,8 @@ static void populate_error_injection_list(unsigned long *start,
 }
 
 /* Markers of the _error_inject_whitelist section */
-extern unsigned long __start_error_injection_whitelist[];
-extern unsigned long __stop_error_injection_whitelist[];
+extern struct error_injection_entry __start_error_injection_whitelist[];
+extern struct error_injection_entry __stop_error_injection_whitelist[];
 
 static void __init populate_kernel_ei_list(void)
 {
@@ -157,11 +171,26 @@ static void *ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
 	return seq_list_next(v, &error_injection_list, pos);
 }
 
+static const char *error_type_string(int etype)
+{
+	switch (etype) {
+	case EI_ETYPE_NULL:
+		return "NULL";
+	case EI_ETYPE_ERRNO:
+		return "ERRNO";
+	case EI_ETYPE_ERRNO_NULL:
+		return "ERRNO_NULL";
+	default:
+		return "(unknown)";
+	}
+}
+
 static int ei_seq_show(struct seq_file *m, void *v)
 {
 	struct ei_entry *ent = list_entry(v, struct ei_entry, list);
 
-	seq_printf(m, "%pf\n", (void *)ent->start_addr);
+	seq_printf(m, "%pf\t%s\n", (void *)ent->start_addr,
+		   error_type_string(ent->etype));
 	return 0;
 }
 
-- 
cgit 


From 4b1a29a7f5425d32640b34b8a755f34e02f64d0f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 13 Jan 2018 02:56:03 +0900
Subject: error-injection: Support fault injection framework

Support in-kernel fault-injection framework via debugfs.
This allows you to inject a conditional error to specified
function using debugfs interfaces.

Here is the result of test script described in
Documentation/fault-injection/fault-injection.txt

  ===========
  # ./test_fail_function.sh
  1+0 records in
  1+0 records out
  1048576 bytes (1.0 MB, 1.0 MiB) copied, 0.0227404 s, 46.1 MB/s
  btrfs-progs v4.4
  See http://btrfs.wiki.kernel.org for more information.

  Label:              (null)
  UUID:               bfa96010-12e9-4360-aed0-42eec7af5798
  Node size:          16384
  Sector size:        4096
  Filesystem size:    1001.00MiB
  Block group profiles:
    Data:             single            8.00MiB
    Metadata:         DUP              58.00MiB
    System:           DUP              12.00MiB
  SSD detected:       no
  Incompat features:  extref, skinny-metadata
  Number of devices:  1
  Devices:
     ID        SIZE  PATH
      1  1001.00MiB  /dev/loop2

  mount: mount /dev/loop2 on /opt/tmpmnt failed: Cannot allocate memory
  SUCCESS!
  ===========

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 lib/Kconfig.debug | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2a33efdd1fea..890d4766cef3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1551,6 +1551,16 @@ config FAIL_FUTEX
 	help
 	  Provide fault-injection capability for futexes.
 
+config FAIL_FUNCTION
+	bool "Fault-injection capability for functions"
+	depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
+	help
+	  Provide function-based fault-injection capability.
+	  This will allow you to override a specific function with a return
+	  with given return value. As a result, function caller will see
+	  an error value and have to handle it. This is useful to test the
+	  error handling in various subsystems.
+
 config FAULT_INJECTION_DEBUG_FS
 	bool "Debugfs entries for fault-injection capabilities"
 	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
-- 
cgit 


From fcd1c9177195489c40198d2769649439dd88505b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 20 Jan 2018 01:24:31 +0100
Subject: bpf: add couple of test cases for signed extended imms

Add a couple of test cases for interpreter and JIT that are
related to an issue we faced some time ago in Cilium [1],
which is fixed in LLVM with commit e53750e1e086 ("bpf: fix
bug on silently truncating 64-bit immediate").

Test cases were run-time checking kernel to behave as intended
which should also provide some guidance for current or new
JITs in case they should trip over this. Added for cBPF and
eBPF.

  [1] https://github.com/cilium/cilium/pull/2162

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 lib/test_bpf.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

(limited to 'lib')

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index f369889e521d..e3938e395cba 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6109,6 +6109,110 @@ static struct bpf_test tests[] = {
 		{ { ETH_HLEN, 42 } },
 		.fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
 	},
+	/* Checking interpreter vs JIT wrt signed extended imms. */
+	{
+		"JNE signed compare, test 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+			BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+			BPF_MOV64_REG(R2, R1),
+			BPF_ALU64_REG(BPF_AND, R2, R3),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R2, -17104896, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JNE signed compare, test 2",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+			BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+			BPF_MOV64_REG(R2, R1),
+			BPF_ALU64_REG(BPF_AND, R2, R3),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R2, 0xfefb0000, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JNE signed compare, test 3",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+			BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+			BPF_ALU32_IMM(BPF_MOV, R4, 0xfefb0000),
+			BPF_MOV64_REG(R2, R1),
+			BPF_ALU64_REG(BPF_AND, R2, R3),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_REG(BPF_JNE, R2, R4, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2 } },
+	},
+	{
+		"JNE signed compare, test 4",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, -17104896),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R1, -17104896, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2 } },
+	},
+	{
+		"JNE signed compare, test 5",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0xfefb0000),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R1, 0xfefb0000, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JNE signed compare, test 6",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0x7efb0000),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R1, 0x7efb0000, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2 } },
+	},
+	{
+		"JNE signed compare, test 7",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffff0000),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 0xfefbbc12),
+			BPF_STMT(BPF_ALU | BPF_AND | BPF_X, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0xfefb0000, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_RET | BPF_K, 2),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ { 0, 2 } },
+	},
 };
 
 static struct net_device dev;
-- 
cgit 


From 21ccaf21497b72f42133182716a42dbf573d314b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 26 Jan 2018 23:33:48 +0100
Subject: bpf: add further test cases around div/mod and others

Update selftests to relfect recent changes and add various new
test cases.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 lib/test_bpf.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index e3938e395cba..4cd9ea9b3449 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -2003,10 +2003,14 @@ static struct bpf_test tests[] = {
 		{ { 4, 0 }, { 5, 10 } }
 	},
 	{
-		"INT: DIV by zero",
+		/* This one doesn't go through verifier, but is just raw insn
+		 * as opposed to cBPF tests from here. Thus div by 0 tests are
+		 * done in test_verifier in BPF kselftests.
+		 */
+		"INT: DIV by -1",
 		.u.insns_int = {
 			BPF_ALU64_REG(BPF_MOV, R6, R1),
-			BPF_ALU64_IMM(BPF_MOV, R7, 0),
+			BPF_ALU64_IMM(BPF_MOV, R7, -1),
 			BPF_LD_ABS(BPF_B, 3),
 			BPF_ALU32_REG(BPF_DIV, R0, R7),
 			BPF_EXIT_INSN(),
-- 
cgit