summaryrefslogtreecommitdiff
path: root/lib/maple_tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/maple_tree.c')
-rw-r--r--lib/maple_tree.c5299
1 files changed, 2704 insertions, 2595 deletions
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 26e2045d3cda..5aa4c9500018 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -4,6 +4,8 @@
* Copyright (c) 2018-2022 Oracle Corporation
* Authors: Liam R. Howlett <Liam.Howlett@oracle.com>
* Matthew Wilcox <willy@infradead.org>
+ * Copyright (c) 2023 ByteDance
+ * Author: Peng Zhang <zhangpeng.00@bytedance.com>
*/
/*
@@ -14,8 +16,8 @@
* and are simply the slot index + the minimum of the node.
*
* In regular B-Tree terms, pivots are called keys. The term pivot is used to
- * indicate that the tree is specifying ranges, Pivots may appear in the
- * subtree with an entry attached to the value where as keys are unique to a
+ * indicate that the tree is specifying ranges. Pivots may appear in the
+ * subtree with an entry attached to the value whereas keys are unique to a
* specific position of a B-tree. Pivot values are inclusive of the slot with
* the same index.
*
@@ -62,19 +64,33 @@
#define CREATE_TRACE_POINTS
#include <trace/events/maple_tree.h>
+#define TP_FCT tracepoint_string(__func__)
+
+/*
+ * Kernel pointer hashing renders much of the maple tree dump useless as tagged
+ * pointers get hashed to arbitrary values.
+ *
+ * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is
+ * permissible to bypass this. Otherwise remain cautious and retain the hashing.
+ *
+ * Userland doesn't know about %px so also use %p there.
+ */
+#if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE)
+#define PTR_FMT "%px"
+#else
+#define PTR_FMT "%p"
+#endif
+
#define MA_ROOT_PARENT 1
/*
* Maple state flags
- * * MA_STATE_BULK - Bulk insert mode
- * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert
* * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation
*/
-#define MA_STATE_BULK 1
-#define MA_STATE_REBALANCE 2
-#define MA_STATE_PREALLOC 4
+#define MA_STATE_PREALLOC 1
#define ma_parent_ptr(x) ((struct maple_pnode *)(x))
+#define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT)
#define ma_mnode_ptr(x) ((struct maple_node *)(x))
#define ma_enode_ptr(x) ((struct maple_enode *)(x))
static struct kmem_cache *maple_node_cache;
@@ -117,7 +133,6 @@ static const unsigned char mt_min_slots[] = {
#define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1)
struct maple_big_node {
- struct maple_pnode *parent;
unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1];
union {
struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS];
@@ -146,28 +161,38 @@ struct maple_subtree_state {
struct maple_big_node *bn;
};
+#ifdef CONFIG_KASAN_STACK
+/* Prevent mas_wr_bnode() from exceeding the stack frame limit */
+#define noinline_for_kasan noinline_for_stack
+#else
+#define noinline_for_kasan inline
+#endif
+
/* Functions */
static inline struct maple_node *mt_alloc_one(gfp_t gfp)
{
- return kmem_cache_alloc(maple_node_cache, gfp | __GFP_ZERO);
+ return kmem_cache_alloc(maple_node_cache, gfp);
}
-static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
+static inline void mt_free_bulk(size_t size, void __rcu **nodes)
{
- return kmem_cache_alloc_bulk(maple_node_cache, gfp | __GFP_ZERO, size,
- nodes);
+ kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
}
-static inline void mt_free_bulk(size_t size, void __rcu **nodes)
+static void mt_return_sheaf(struct slab_sheaf *sheaf)
{
- kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
+ kmem_cache_return_sheaf(maple_node_cache, GFP_NOWAIT, sheaf);
}
-static void mt_free_rcu(struct rcu_head *head)
+static struct slab_sheaf *mt_get_sheaf(gfp_t gfp, int count)
{
- struct maple_node *node = container_of(head, struct maple_node, rcu);
+ return kmem_cache_prefill_sheaf(maple_node_cache, gfp, count);
+}
- kmem_cache_free(maple_node_cache, node);
+static int mt_refill_sheaf(gfp_t gfp, struct slab_sheaf **sheaf,
+ unsigned int size)
+{
+ return kmem_cache_refill_sheaf(maple_node_cache, gfp, sheaf, size);
}
/*
@@ -179,19 +204,18 @@ static void mt_free_rcu(struct rcu_head *head)
*/
static void ma_free_rcu(struct maple_node *node)
{
- node->parent = ma_parent_ptr(node);
- call_rcu(&node->rcu, mt_free_rcu);
+ WARN_ON(node->parent != ma_parent_ptr(node));
+ kfree_rcu(node, rcu);
}
-
-static void mas_set_height(struct ma_state *mas)
+static void mt_set_height(struct maple_tree *mt, unsigned char height)
{
- unsigned int new_flags = mas->tree->ma_flags;
+ unsigned int new_flags = mt->ma_flags;
new_flags &= ~MT_FLAGS_HEIGHT_MASK;
- BUG_ON(mas->depth > MAPLE_HEIGHT_MAX);
- new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET;
- mas->tree->ma_flags = new_flags;
+ MT_BUG_ON(mt, height > MAPLE_HEIGHT_MAX);
+ new_flags |= height << MT_FLAGS_HEIGHT_OFFSET;
+ mt->ma_flags = new_flags;
}
static unsigned int mas_mt_height(struct ma_state *mas)
@@ -199,23 +223,29 @@ static unsigned int mas_mt_height(struct ma_state *mas)
return mt_height(mas->tree);
}
-static inline enum maple_type mte_node_type(const struct maple_enode *entry)
+static inline unsigned int mt_attr(struct maple_tree *mt)
+{
+ return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK;
+}
+
+static __always_inline enum maple_type mte_node_type(
+ const struct maple_enode *entry)
{
return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
MAPLE_NODE_TYPE_MASK;
}
-static inline bool ma_is_dense(const enum maple_type type)
+static __always_inline bool ma_is_dense(const enum maple_type type)
{
return type < maple_leaf_64;
}
-static inline bool ma_is_leaf(const enum maple_type type)
+static __always_inline bool ma_is_leaf(const enum maple_type type)
{
return type < maple_range_64;
}
-static inline bool mte_is_leaf(const struct maple_enode *entry)
+static __always_inline bool mte_is_leaf(const struct maple_enode *entry)
{
return ma_is_leaf(mte_node_type(entry));
}
@@ -224,44 +254,50 @@ static inline bool mte_is_leaf(const struct maple_enode *entry)
* We also reserve values with the bottom two bits set to '10' which are
* below 4096
*/
-static inline bool mt_is_reserved(const void *entry)
+static __always_inline bool mt_is_reserved(const void *entry)
{
return ((unsigned long)entry < MAPLE_RESERVED_RANGE) &&
xa_is_internal(entry);
}
-static inline void mas_set_err(struct ma_state *mas, long err)
+static __always_inline void mas_set_err(struct ma_state *mas, long err)
{
mas->node = MA_ERROR(err);
+ mas->status = ma_error;
}
-static inline bool mas_is_ptr(struct ma_state *mas)
+static __always_inline bool mas_is_ptr(const struct ma_state *mas)
{
- return mas->node == MAS_ROOT;
+ return mas->status == ma_root;
}
-static inline bool mas_is_start(struct ma_state *mas)
+static __always_inline bool mas_is_start(const struct ma_state *mas)
{
- return mas->node == MAS_START;
+ return mas->status == ma_start;
}
-bool mas_is_err(struct ma_state *mas)
+static __always_inline bool mas_is_none(const struct ma_state *mas)
{
- return xa_is_err(mas->node);
+ return mas->status == ma_none;
}
-static inline bool mas_searchable(struct ma_state *mas)
+static __always_inline bool mas_is_paused(const struct ma_state *mas)
{
- if (mas_is_none(mas))
- return false;
+ return mas->status == ma_pause;
+}
- if (mas_is_ptr(mas))
- return false;
+static __always_inline bool mas_is_overflow(struct ma_state *mas)
+{
+ return mas->status == ma_overflow;
+}
- return true;
+static inline bool mas_is_underflow(struct ma_state *mas)
+{
+ return mas->status == ma_underflow;
}
-static inline struct maple_node *mte_to_node(const struct maple_enode *entry)
+static __always_inline struct maple_node *mte_to_node(
+ const struct maple_enode *entry)
{
return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK);
}
@@ -323,27 +359,27 @@ static inline void *mte_safe_root(const struct maple_enode *node)
return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
}
-static inline void *mte_set_full(const struct maple_enode *node)
+static inline void __maybe_unused *mte_set_full(const struct maple_enode *node)
{
return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL);
}
-static inline void *mte_clear_full(const struct maple_enode *node)
+static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node)
{
return (void *)((unsigned long)node | MAPLE_ENODE_NULL);
}
-static inline bool mte_has_null(const struct maple_enode *node)
+static inline bool __maybe_unused mte_has_null(const struct maple_enode *node)
{
return (unsigned long)node & MAPLE_ENODE_NULL;
}
-static inline bool ma_is_root(struct maple_node *node)
+static __always_inline bool ma_is_root(struct maple_node *node)
{
return ((unsigned long)node->parent & MA_ROOT_PARENT);
}
-static inline bool mte_is_root(const struct maple_enode *node)
+static __always_inline bool mte_is_root(const struct maple_enode *node)
{
return ma_is_root(mte_to_node(node));
}
@@ -353,7 +389,7 @@ static inline bool mas_is_root_limits(const struct ma_state *mas)
return !mas->min && mas->max == ULONG_MAX;
}
-static inline bool mt_is_alloc(struct maple_tree *mt)
+static __always_inline bool mt_is_alloc(struct maple_tree *mt)
{
return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE);
}
@@ -366,11 +402,11 @@ static inline bool mt_is_alloc(struct maple_tree *mt)
* a reuse of the last bit in the node type. This is possible by using bit 1 to
* indicate if bit 2 is part of the type or the slot.
*
- * Note types:
- * 0x??1 = Root
- * 0x?00 = 16 bit nodes
- * 0x010 = 32 bit nodes
- * 0x110 = 64 bit nodes
+ * Node types:
+ * 0b??1 = Root
+ * 0b?00 = 16 bit nodes
+ * 0b010 = 32 bit nodes
+ * 0b110 = 64 bit nodes
*
* Slot size and alignment
* 0b??1 : Root
@@ -388,7 +424,7 @@ static inline bool mt_is_alloc(struct maple_tree *mt)
#define MAPLE_PARENT_16B_SLOT_MASK 0xFC
#define MAPLE_PARENT_RANGE64 0x06
-#define MAPLE_PARENT_RANGE32 0x04
+#define MAPLE_PARENT_RANGE32 0x02
#define MAPLE_PARENT_NOT_RANGE16 0x02
/*
@@ -420,28 +456,26 @@ static inline unsigned long mte_parent_slot_mask(unsigned long parent)
}
/*
- * mas_parent_enum() - Return the maple_type of the parent from the stored
+ * mas_parent_type() - Return the maple_type of the parent from the stored
* parent type.
* @mas: The maple state
- * @node: The maple_enode to extract the parent's enum
+ * @enode: The maple_enode to extract the parent's enum
* Return: The node->parent maple_type
*/
static inline
-enum maple_type mte_parent_enum(struct maple_enode *p_enode,
- struct maple_tree *mt)
+enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode)
{
unsigned long p_type;
- p_type = (unsigned long)p_enode;
- if (p_type & MAPLE_PARENT_ROOT)
- return 0; /* Validated in the caller. */
+ p_type = (unsigned long)mte_to_node(enode)->parent;
+ if (WARN_ON(p_type & MAPLE_PARENT_ROOT))
+ return 0;
p_type &= MAPLE_NODE_MASK;
- p_type = p_type & ~(MAPLE_PARENT_ROOT | mte_parent_slot_mask(p_type));
-
+ p_type &= ~mte_parent_slot_mask(p_type);
switch (p_type) {
case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */
- if (mt_is_alloc(mt))
+ if (mt_is_alloc(mas->tree))
return maple_arange_64;
return maple_range_64;
}
@@ -449,14 +483,9 @@ enum maple_type mte_parent_enum(struct maple_enode *p_enode,
return 0;
}
-static inline
-enum maple_type mas_parent_enum(struct ma_state *mas, struct maple_enode *enode)
-{
- return mte_parent_enum(ma_enode_ptr(mte_to_node(enode)->parent), mas->tree);
-}
-
/*
- * mte_set_parent() - Set the parent node and encode the slot
+ * mas_set_parent() - Set the parent node and encode the slot
+ * @mas: The maple state
* @enode: The encoded maple node.
* @parent: The encoded maple node that is the parent of @enode.
* @slot: The slot that @enode resides in @parent.
@@ -465,16 +494,16 @@ enum maple_type mas_parent_enum(struct ma_state *mas, struct maple_enode *enode)
* parent type.
*/
static inline
-void mte_set_parent(struct maple_enode *enode, const struct maple_enode *parent,
- unsigned char slot)
+void mas_set_parent(struct ma_state *mas, struct maple_enode *enode,
+ const struct maple_enode *parent, unsigned char slot)
{
- unsigned long val = (unsigned long) parent;
+ unsigned long val = (unsigned long)parent;
unsigned long shift;
unsigned long type;
enum maple_type p_type = mte_node_type(parent);
- BUG_ON(p_type == maple_dense);
- BUG_ON(p_type == maple_leaf_64);
+ MAS_BUG_ON(mas, p_type == maple_dense);
+ MAS_BUG_ON(mas, p_type == maple_leaf_64);
switch (p_type) {
case maple_range_64:
@@ -500,12 +529,12 @@ void mte_set_parent(struct maple_enode *enode, const struct maple_enode *parent,
*
* Return: The slot in the parent node where @enode resides.
*/
-static inline unsigned int mte_parent_slot(const struct maple_enode *enode)
+static __always_inline
+unsigned int mte_parent_slot(const struct maple_enode *enode)
{
- unsigned long val = (unsigned long) mte_to_node(enode)->parent;
+ unsigned long val = (unsigned long)mte_to_node(enode)->parent;
- /* Root. */
- if (val & 1)
+ if (unlikely(val & MA_ROOT_PARENT))
return 0;
/*
@@ -517,11 +546,12 @@ static inline unsigned int mte_parent_slot(const struct maple_enode *enode)
/*
* mte_parent() - Get the parent of @node.
- * @node: The encoded maple node.
+ * @enode: The encoded maple node.
*
* Return: The parent maple node.
*/
-static inline struct maple_node *mte_parent(const struct maple_enode *enode)
+static __always_inline
+struct maple_node *mte_parent(const struct maple_enode *enode)
{
return (void *)((unsigned long)
(mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK);
@@ -533,93 +563,36 @@ static inline struct maple_node *mte_parent(const struct maple_enode *enode)
*
* Return: true if dead, false otherwise.
*/
-static inline bool ma_dead_node(const struct maple_node *node)
+static __always_inline bool ma_dead_node(const struct maple_node *node)
{
- struct maple_node *parent = (void *)((unsigned long)
- node->parent & ~MAPLE_NODE_MASK);
+ struct maple_node *parent;
+ /* Do not reorder reads from the node prior to the parent check */
+ smp_rmb();
+ parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK);
return (parent == node);
}
+
/*
* mte_dead_node() - check if the @enode is dead.
* @enode: The encoded maple node
*
* Return: true if dead, false otherwise.
*/
-static inline bool mte_dead_node(const struct maple_enode *enode)
+static __always_inline bool mte_dead_node(const struct maple_enode *enode)
{
- struct maple_node *parent, *node;
+ struct maple_node *node;
node = mte_to_node(enode);
- parent = mte_parent(enode);
- return (parent == node);
-}
-
-/*
- * mas_allocated() - Get the number of nodes allocated in a maple state.
- * @mas: The maple state
- *
- * The ma_state alloc member is overloaded to hold a pointer to the first
- * allocated node or to the number of requested nodes to allocate. If bit 0 is
- * set, then the alloc contains the number of requested nodes. If there is an
- * allocated node, then the total allocated nodes is in that node.
- *
- * Return: The total number of nodes allocated
- */
-static inline unsigned long mas_allocated(const struct ma_state *mas)
-{
- if (!mas->alloc || ((unsigned long)mas->alloc & 0x1))
- return 0;
-
- return mas->alloc->total;
-}
-
-/*
- * mas_set_alloc_req() - Set the requested number of allocations.
- * @mas: the maple state
- * @count: the number of allocations.
- *
- * The requested number of allocations is either in the first allocated node,
- * located in @mas->alloc->request_count, or directly in @mas->alloc if there is
- * no allocated node. Set the request either in the node or do the necessary
- * encoding to store in @mas->alloc directly.
- */
-static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count)
-{
- if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) {
- if (!count)
- mas->alloc = NULL;
- else
- mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U);
- return;
- }
-
- mas->alloc->request_count = count;
-}
-
-/*
- * mas_alloc_req() - get the requested number of allocations.
- * @mas: The maple state
- *
- * The alloc count is either stored directly in @mas, or in
- * @mas->alloc->request_count if there is at least one node allocated. Decode
- * the request count if it's stored directly in @mas->alloc.
- *
- * Return: The allocation request count.
- */
-static inline unsigned int mas_alloc_req(const struct ma_state *mas)
-{
- if ((unsigned long)mas->alloc & 0x1)
- return (unsigned long)(mas->alloc) >> 1;
- else if (mas->alloc)
- return mas->alloc->request_count;
- return 0;
+ return ma_dead_node(node);
}
/*
* ma_pivots() - Get a pointer to the maple node pivots.
- * @node - the maple node
- * @type - the node type
+ * @node: the maple node
+ * @type: the node type
+ *
+ * In the event of a dead node, this array may be %NULL
*
* Return: A pointer to the maple node pivots
*/
@@ -640,8 +613,8 @@ static inline unsigned long *ma_pivots(struct maple_node *node,
/*
* ma_gaps() - Get a pointer to the maple node gaps.
- * @node - the maple node
- * @type - the node type
+ * @node: the maple node
+ * @type: the node type
*
* Return: A pointer to the maple node gaps
*/
@@ -660,34 +633,6 @@ static inline unsigned long *ma_gaps(struct maple_node *node,
}
/*
- * mte_pivot() - Get the pivot at @piv of the maple encoded node.
- * @mn: The maple encoded node.
- * @piv: The pivot.
- *
- * Return: the pivot at @piv of @mn.
- */
-static inline unsigned long mte_pivot(const struct maple_enode *mn,
- unsigned char piv)
-{
- struct maple_node *node = mte_to_node(mn);
-
- if (piv >= mt_pivots[piv]) {
- WARN_ON(1);
- return 0;
- }
- switch (mte_node_type(mn)) {
- case maple_arange_64:
- return node->ma64.pivot[piv];
- case maple_range_64:
- case maple_leaf_64:
- return node->mr64.pivot[piv];
- case maple_dense:
- return 0;
- }
- return 0;
-}
-
-/*
* mas_safe_pivot() - get the pivot at @piv or mas->max.
* @mas: The maple state
* @pivots: The pointer to the maple node pivots
@@ -697,7 +642,7 @@ static inline unsigned long mte_pivot(const struct maple_enode *mn,
* Return: The pivot at @piv within the limit of the @pivots array, @mas->max
* otherwise.
*/
-static inline unsigned long
+static __always_inline unsigned long
mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots,
unsigned char piv, enum maple_type type)
{
@@ -725,33 +670,6 @@ mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset)
}
/*
- * mas_logical_pivot() - Get the logical pivot of a given offset.
- * @mas: The maple state
- * @pivots: The pointer to the maple node pivots
- * @offset: The offset into the pivot array
- * @type: The maple node type
- *
- * When there is no value at a pivot (beyond the end of the data), then the
- * pivot is actually @mas->max.
- *
- * Return: the logical pivot of a given @offset.
- */
-static inline unsigned long
-mas_logical_pivot(struct ma_state *mas, unsigned long *pivots,
- unsigned char offset, enum maple_type type)
-{
- unsigned long lpiv = mas_safe_pivot(mas, pivots, offset, type);
-
- if (likely(lpiv))
- return lpiv;
-
- if (likely(offset))
- return mas->max;
-
- return lpiv;
-}
-
-/*
* mte_set_pivot() - Set a pivot to a value in an encoded maple node.
* @mn: The encoded maple node
* @piv: The pivot offset
@@ -765,7 +683,6 @@ static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv,
BUG_ON(piv >= mt_pivots[type]);
switch (type) {
- default:
case maple_range_64:
case maple_leaf_64:
node->mr64.pivot[piv] = val;
@@ -789,7 +706,6 @@ static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv,
static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
{
switch (mt) {
- default:
case maple_arange_64:
return mn->ma64.slot;
case maple_range_64:
@@ -798,20 +714,33 @@ static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
case maple_dense:
return mn->slot;
}
+
+ return NULL;
+}
+
+static inline bool mt_write_locked(const struct maple_tree *mt)
+{
+ return mt_external_lock(mt) ? mt_write_lock_is_held(mt) :
+ lockdep_is_held(&mt->ma_lock);
}
-static inline bool mt_locked(const struct maple_tree *mt)
+static __always_inline bool mt_locked(const struct maple_tree *mt)
{
return mt_external_lock(mt) ? mt_lock_is_held(mt) :
lockdep_is_held(&mt->ma_lock);
}
-static inline void *mt_slot(const struct maple_tree *mt,
+static __always_inline void *mt_slot(const struct maple_tree *mt,
void __rcu **slots, unsigned char offset)
{
return rcu_dereference_check(slots[offset], mt_locked(mt));
}
+static __always_inline void *mt_slot_locked(struct maple_tree *mt,
+ void __rcu **slots, unsigned char offset)
+{
+ return rcu_dereference_protected(slots[offset], mt_write_locked(mt));
+}
/*
* mas_slot_locked() - Get the slot value when holding the maple tree lock.
* @mas: The maple state
@@ -820,10 +749,10 @@ static inline void *mt_slot(const struct maple_tree *mt,
*
* Return: The entry stored in @slots at the @offset.
*/
-static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots,
- unsigned char offset)
+static __always_inline void *mas_slot_locked(struct ma_state *mas,
+ void __rcu **slots, unsigned char offset)
{
- return rcu_dereference_protected(slots[offset], mt_locked(mas->tree));
+ return mt_slot_locked(mas->tree, slots, offset);
}
/*
@@ -834,8 +763,8 @@ static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots,
*
* Return: The entry stored in @slots at the @offset
*/
-static inline void *mas_slot(struct ma_state *mas, void __rcu **slots,
- unsigned char offset)
+static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots,
+ unsigned char offset)
{
return mt_slot(mas->tree, slots, offset);
}
@@ -846,14 +775,14 @@ static inline void *mas_slot(struct ma_state *mas, void __rcu **slots,
*
* Return: The pointer to the root of the tree
*/
-static inline void *mas_root(struct ma_state *mas)
+static __always_inline void *mas_root(struct ma_state *mas)
{
return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree));
}
static inline void *mt_root_locked(struct maple_tree *mt)
{
- return rcu_dereference_protected(mt->ma_root, mt_locked(mt));
+ return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt));
}
/*
@@ -895,6 +824,43 @@ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
}
/*
+ * mt_clear_meta() - clear the metadata information of a node, if it exists
+ * @mt: The maple tree
+ * @mn: The maple node
+ * @type: The maple node type
+ */
+static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
+ enum maple_type type)
+{
+ struct maple_metadata *meta;
+ unsigned long *pivots;
+ void __rcu **slots;
+ void *next;
+
+ switch (type) {
+ case maple_range_64:
+ pivots = mn->mr64.pivot;
+ if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) {
+ slots = mn->mr64.slot;
+ next = mt_slot_locked(mt, slots,
+ MAPLE_RANGE64_SLOTS - 1);
+ if (unlikely((mte_to_node(next) &&
+ mte_node_type(next))))
+ return; /* no metadata, could be node */
+ }
+ fallthrough;
+ case maple_arange_64:
+ meta = ma_meta(mn, type);
+ break;
+ default:
+ return;
+ }
+
+ meta->gap = 0;
+ meta->end = 0;
+}
+
+/*
* ma_meta_end() - Get the data end of a node from the metadata
* @mn: The maple node
* @mt: The maple node type
@@ -910,20 +876,16 @@ static inline unsigned char ma_meta_end(struct maple_node *mn,
/*
* ma_meta_gap() - Get the largest gap location of a node from the metadata
* @mn: The maple node
- * @mt: The maple node type
*/
-static inline unsigned char ma_meta_gap(struct maple_node *mn,
- enum maple_type mt)
+static inline unsigned char ma_meta_gap(struct maple_node *mn)
{
- BUG_ON(mt != maple_arange_64);
-
return mn->ma64.meta.gap;
}
/*
* ma_set_meta_gap() - Set the largest gap location in a nodes metadata
* @mn: The maple node
- * @mn: The maple node type
+ * @mt: The maple node type
* @offset: The location of the largest gap.
*/
static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt,
@@ -937,8 +899,8 @@ static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt,
/*
* mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes.
- * @mat - the ma_topiary, a linked list of dead nodes.
- * @dead_enode - the node to be marked as dead and added to the tail of the list
+ * @mat: the ma_topiary, a linked list of dead nodes.
+ * @dead_enode: the node to be marked as dead and added to the tail of the list
*
* Add the @dead_enode to the linked list in @mat.
*/
@@ -956,47 +918,34 @@ static inline void mat_add(struct ma_topiary *mat,
mat->tail = dead_enode;
}
-static void mte_destroy_walk(struct maple_enode *, struct maple_tree *);
-static inline void mas_free(struct ma_state *mas, struct maple_enode *used);
-
-/*
- * mas_mat_free() - Free all nodes in a dead list.
- * @mas - the maple state
- * @mat - the ma_topiary linked list of dead nodes to free.
- *
- * Free walk a dead list.
- */
-static void mas_mat_free(struct ma_state *mas, struct ma_topiary *mat)
-{
- struct maple_enode *next;
-
- while (mat->head) {
- next = mte_to_mat(mat->head)->next;
- mas_free(mas, mat->head);
- mat->head = next;
- }
-}
-
+static void mt_free_walk(struct rcu_head *head);
+static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
+ bool free);
/*
* mas_mat_destroy() - Free all nodes and subtrees in a dead list.
- * @mas - the maple state
- * @mat - the ma_topiary linked list of dead nodes to free.
+ * @mas: the maple state
+ * @mat: the ma_topiary linked list of dead nodes to free.
*
* Destroy walk a dead list.
*/
static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat)
{
struct maple_enode *next;
+ struct maple_node *node;
+ bool in_rcu = mt_in_rcu(mas->tree);
while (mat->head) {
next = mte_to_mat(mat->head)->next;
- mte_destroy_walk(mat->head, mat->mtree);
+ node = mte_to_node(mat->head);
+ mt_destroy_walk(mat->head, mas->tree, !in_rcu);
+ if (in_rcu)
+ call_rcu(&node->rcu, mt_free_walk);
mat->head = next;
}
}
/*
* mas_descend() - Descend into the slot stored in the ma_state.
- * @mas - the maple state.
+ * @mas: the maple state.
*
* Note: Not RCU safe, only use in write side or debug code.
*/
@@ -1019,28 +968,10 @@ static inline void mas_descend(struct ma_state *mas)
}
/*
- * mte_set_gap() - Set a maple node gap.
- * @mn: The encoded maple node
- * @gap: The offset of the gap to set
- * @val: The gap value
- */
-static inline void mte_set_gap(const struct maple_enode *mn,
- unsigned char gap, unsigned long val)
-{
- switch (mte_node_type(mn)) {
- default:
- break;
- case maple_arange_64:
- mte_to_node(mn)->ma64.gap[gap] = val;
- break;
- }
-}
-
-/*
* mas_ascend() - Walk up a level of the tree.
* @mas: The maple state
*
- * Sets the @mas->max and @mas->min to the correct values when walking up. This
+ * Sets the @mas->max and @mas->min for the parent node of mas->node. This
* may cause several levels of walking up to find the correct min and max.
* May find a dead node which will cause a premature return.
* Return: 1 on dead node, 0 otherwise
@@ -1055,7 +986,6 @@ static int mas_ascend(struct ma_state *mas)
enum maple_type a_type;
unsigned long min, max;
unsigned long *pivots;
- unsigned char offset;
bool set_max = false, set_min = false;
a_node = mas_mn(mas);
@@ -1067,8 +997,9 @@ static int mas_ascend(struct ma_state *mas)
p_node = mte_parent(mas->node);
if (unlikely(a_node == p_node))
return 1;
- a_type = mas_parent_enum(mas, mas->node);
- offset = mte_parent_slot(mas->node);
+
+ a_type = mas_parent_type(mas, mas->node);
+ mas->offset = mte_parent_slot(mas->node);
a_enode = mt_mk_node(p_node, a_type);
/* Check to make sure all parent information is still accurate */
@@ -1076,7 +1007,6 @@ static int mas_ascend(struct ma_state *mas)
return 1;
mas->node = a_enode;
- mas->offset = offset;
if (mte_is_root(a_enode)) {
mas->max = ULONG_MAX;
@@ -1086,13 +1016,30 @@ static int mas_ascend(struct ma_state *mas)
min = 0;
max = ULONG_MAX;
+
+ /*
+ * !mas->offset implies that parent node min == mas->min.
+ * mas->offset > 0 implies that we need to walk up to find the
+ * implied pivot min.
+ */
+ if (!mas->offset) {
+ min = mas->min;
+ set_min = true;
+ }
+
+ if (mas->max == ULONG_MAX)
+ set_max = true;
+
do {
p_enode = a_enode;
- a_type = mas_parent_enum(mas, p_enode);
+ a_type = mas_parent_type(mas, p_enode);
a_node = mte_parent(p_enode);
a_slot = mte_parent_slot(p_enode);
- pivots = ma_pivots(a_node, a_type);
a_enode = mt_mk_node(a_node, a_type);
+ pivots = ma_pivots(a_node, a_type);
+
+ if (unlikely(ma_dead_node(a_node)))
+ return 1;
if (!set_min && a_slot) {
set_min = true;
@@ -1123,83 +1070,24 @@ static int mas_ascend(struct ma_state *mas)
*
* Return: A pointer to a maple node.
*/
-static inline struct maple_node *mas_pop_node(struct ma_state *mas)
+static __always_inline struct maple_node *mas_pop_node(struct ma_state *mas)
{
- struct maple_alloc *ret, *node = mas->alloc;
- unsigned long total = mas_allocated(mas);
-
- /* nothing or a request pending. */
- if (unlikely(!total))
- return NULL;
+ struct maple_node *ret;
- if (total == 1) {
- /* single allocation in this ma_state */
+ if (mas->alloc) {
+ ret = mas->alloc;
mas->alloc = NULL;
- ret = node;
- goto single_node;
- }
-
- if (!node->node_count) {
- /* Single allocation in this node. */
- mas->alloc = node->slot[0];
- node->slot[0] = NULL;
- mas->alloc->total = node->total - 1;
- ret = node;
- goto new_head;
- }
-
- node->total--;
- ret = node->slot[node->node_count];
- node->slot[node->node_count--] = NULL;
-
-single_node:
-new_head:
- ret->total = 0;
- ret->node_count = 0;
- if (ret->request_count) {
- mas_set_alloc_req(mas, ret->request_count + 1);
- ret->request_count = 0;
+ goto out;
}
- return (struct maple_node *)ret;
-}
-
-/*
- * mas_push_node() - Push a node back on the maple state allocation.
- * @mas: The maple state
- * @used: The used maple node
- *
- * Stores the maple node back into @mas->alloc for reuse. Updates allocated and
- * requested node count as necessary.
- */
-static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
-{
- struct maple_alloc *reuse = (struct maple_alloc *)used;
- struct maple_alloc *head = mas->alloc;
- unsigned long count;
- unsigned int requested = mas_alloc_req(mas);
- memset(reuse, 0, sizeof(*reuse));
- count = mas_allocated(mas);
-
- if (count && (head->node_count < MAPLE_ALLOC_SLOTS - 1)) {
- if (head->slot[0])
- head->node_count++;
- head->slot[head->node_count] = reuse;
- head->total++;
- goto done;
- }
+ if (WARN_ON_ONCE(!mas->sheaf))
+ return NULL;
- reuse->total = 1;
- if ((head) && !((unsigned long)head & 0x1)) {
- head->request_count = 0;
- reuse->slot[0] = head;
- reuse->total += head->total;
- }
+ ret = kmem_cache_alloc_from_sheaf(maple_node_cache, GFP_NOWAIT, mas->sheaf);
- mas->alloc = reuse;
-done:
- if (requested > 1)
- mas_set_alloc_req(mas, requested - 1);
+out:
+ memset(ret, 0, sizeof(*ret));
+ return ret;
}
/*
@@ -1209,76 +1097,68 @@ done:
*/
static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
{
- struct maple_alloc *node;
- unsigned long allocated = mas_allocated(mas);
- unsigned long success = allocated;
- unsigned int requested = mas_alloc_req(mas);
- unsigned int count;
- void **slots = NULL;
- unsigned int max_req = 0;
-
- if (!requested)
+ if (!mas->node_request)
return;
- mas_set_alloc_req(mas, 0);
- if (mas->mas_flags & MA_STATE_PREALLOC) {
- if (allocated)
+ if (mas->node_request == 1) {
+ if (mas->sheaf)
+ goto use_sheaf;
+
+ if (mas->alloc)
return;
- WARN_ON(!allocated);
- }
- if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS - 1) {
- node = (struct maple_alloc *)mt_alloc_one(gfp);
- if (!node)
- goto nomem_one;
+ mas->alloc = mt_alloc_one(gfp);
+ if (!mas->alloc)
+ goto error;
- if (allocated)
- node->slot[0] = mas->alloc;
+ mas->node_request = 0;
+ return;
+ }
- success++;
- mas->alloc = node;
- requested--;
+use_sheaf:
+ if (unlikely(mas->alloc)) {
+ kfree(mas->alloc);
+ mas->alloc = NULL;
}
- node = mas->alloc;
- while (requested) {
- max_req = MAPLE_ALLOC_SLOTS;
- if (node->slot[0]) {
- unsigned int offset = node->node_count + 1;
+ if (mas->sheaf) {
+ unsigned long refill;
- slots = (void **)&node->slot[offset];
- max_req -= offset;
- } else {
- slots = (void **)&node->slot;
+ refill = mas->node_request;
+ if (kmem_cache_sheaf_size(mas->sheaf) >= refill) {
+ mas->node_request = 0;
+ return;
}
- max_req = min(requested, max_req);
- count = mt_alloc_bulk(gfp, max_req, slots);
- if (!count)
- goto nomem_bulk;
+ if (mt_refill_sheaf(gfp, &mas->sheaf, refill))
+ goto error;
- node->node_count += count;
- /* zero indexed. */
- if (slots == (void **)&node->slot)
- node->node_count--;
+ mas->node_request = 0;
+ return;
+ }
- success += count;
- node = node->slot[0];
- requested -= count;
+ mas->sheaf = mt_get_sheaf(gfp, mas->node_request);
+ if (likely(mas->sheaf)) {
+ mas->node_request = 0;
+ return;
}
- mas->alloc->total = success;
- return;
-nomem_bulk:
- /* Clean up potential freed allocations on bulk failure */
- memset(slots, 0, max_req * sizeof(unsigned long));
-nomem_one:
- mas_set_alloc_req(mas, requested);
- if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
- mas->alloc->total = success;
+error:
mas_set_err(mas, -ENOMEM);
- return;
+}
+
+static inline void mas_empty_nodes(struct ma_state *mas)
+{
+ mas->node_request = 0;
+ if (mas->sheaf) {
+ mt_return_sheaf(mas->sheaf);
+ mas->sheaf = NULL;
+ }
+ if (mas->alloc) {
+ kfree(mas->alloc);
+ mas->alloc = NULL;
+ }
}
/*
@@ -1291,84 +1171,55 @@ nomem_one:
*/
static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
{
- struct maple_node *tmp = mte_to_node(used);
-
- if (mt_in_rcu(mas->tree))
- ma_free_rcu(tmp);
- else
- mas_push_node(mas, tmp);
-}
-
-/*
- * mas_node_count() - Check if enough nodes are allocated and request more if
- * there is not enough nodes.
- * @mas: The maple state
- * @count: The number of nodes needed
- * @gfp: the gfp flags
- */
-static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp)
-{
- unsigned long allocated = mas_allocated(mas);
-
- if (allocated < count) {
- mas_set_alloc_req(mas, count - allocated);
- mas_alloc_nodes(mas, gfp);
- }
-}
-
-/*
- * mas_node_count() - Check if enough nodes are allocated and request more if
- * there is not enough nodes.
- * @mas: The maple state
- * @count: The number of nodes needed
- *
- * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags.
- */
-static void mas_node_count(struct ma_state *mas, int count)
-{
- return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN);
+ ma_free_rcu(mte_to_node(used));
}
/*
* mas_start() - Sets up maple state for operations.
* @mas: The maple state.
*
- * If mas->node == MAS_START, then set the min, max, depth, and offset to
+ * If mas->status == ma_start, then set the min, max and depth to
* defaults.
*
* Return:
- * - If mas->node is an error or not MAS_START, return NULL.
- * - If it's an empty tree: NULL & mas->node == MAS_NONE
- * - If it's a single entry: The entry & mas->node == MAS_ROOT
- * - If it's a tree: NULL & mas->node == safe root node.
+ * - If mas->node is an error or not mas_start, return NULL.
+ * - If it's an empty tree: NULL & mas->status == ma_none
+ * - If it's a single entry: The entry & mas->status == ma_root
+ * - If it's a tree: NULL & mas->status == ma_active
*/
static inline struct maple_enode *mas_start(struct ma_state *mas)
{
if (likely(mas_is_start(mas))) {
struct maple_enode *root;
- mas->node = MAS_NONE;
mas->min = 0;
mas->max = ULONG_MAX;
- mas->depth = 0;
- mas->offset = 0;
+retry:
+ mas->depth = 0;
root = mas_root(mas);
/* Tree with nodes */
if (likely(xa_is_node(root))) {
- mas->depth = 1;
+ mas->depth = 0;
+ mas->status = ma_active;
mas->node = mte_safe_root(root);
+ mas->offset = 0;
+ if (mte_dead_node(mas->node))
+ goto retry;
+
return NULL;
}
+ mas->node = NULL;
/* empty tree */
if (unlikely(!root)) {
+ mas->status = ma_none;
mas->offset = MAPLE_NODE_SLOTS;
return NULL;
}
/* Single entry tree */
- mas->node = MAS_ROOT;
+ mas->status = ma_root;
mas->offset = MAPLE_NODE_SLOTS;
/* Single entry tree. */
@@ -1391,13 +1242,14 @@ static inline struct maple_enode *mas_start(struct ma_state *mas)
* Uses metadata to find the end of the data when possible.
* Return: The zero indexed last slot with data (may be null).
*/
-static inline unsigned char ma_data_end(struct maple_node *node,
- enum maple_type type,
- unsigned long *pivots,
- unsigned long max)
+static __always_inline unsigned char ma_data_end(struct maple_node *node,
+ enum maple_type type, unsigned long *pivots, unsigned long max)
{
unsigned char offset;
+ if (!pivots)
+ return 0;
+
if (type == maple_arange_64)
return ma_meta_end(node, type);
@@ -1433,6 +1285,9 @@ static inline unsigned char mas_data_end(struct ma_state *mas)
return ma_meta_end(node, type);
pivots = ma_pivots(node, type);
+ if (unlikely(ma_dead_node(node)))
+ return 0;
+
offset = mt_pivots[type] - 1;
if (likely(!pivots[offset]))
return ma_meta_end(node, type);
@@ -1445,7 +1300,7 @@ static inline unsigned char mas_data_end(struct ma_state *mas)
/*
* mas_leaf_max_gap() - Returns the largest gap in a leaf node
- * @mas - the maple state
+ * @mas: the maple state
*
* Return: The maximum gap in the leaf.
*/
@@ -1501,6 +1356,9 @@ static unsigned long mas_leaf_max_gap(struct ma_state *mas)
gap = ULONG_MAX - pivots[max_piv];
if (gap > max_gap)
max_gap = gap;
+
+ if (max_gap > pivots[max_piv] - mas->min)
+ return max_gap;
}
for (; i <= max_piv; i++) {
@@ -1524,7 +1382,7 @@ static unsigned long mas_leaf_max_gap(struct ma_state *mas)
* @node: The maple node
* @gaps: The pointer to the gaps
* @mt: The maple node type
- * @*off: Pointer to store the offset location of the gap.
+ * @off: Pointer to store the offset location of the gap.
*
* Uses the metadata data end to scan backwards across set gaps.
*
@@ -1553,8 +1411,6 @@ ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt,
* mas_max_gap() - find the largest gap in a non-leaf node and set the slot.
* @mas: The maple state.
*
- * If the metadata gap is set to MAPLE_ARANGE64_META_MAX, there is no gap.
- *
* Return: The gap value.
*/
static inline unsigned long mas_max_gap(struct ma_state *mas)
@@ -1569,10 +1425,8 @@ static inline unsigned long mas_max_gap(struct ma_state *mas)
return mas_leaf_max_gap(mas);
node = mas_mn(mas);
- offset = ma_meta_gap(node, mt);
- if (offset == MAPLE_ARANGE64_META_MAX)
- return 0;
-
+ MAS_BUG_ON(mas, mt != maple_arange_64);
+ offset = ma_meta_gap(node);
gaps = ma_gaps(node, mt);
return gaps[offset];
}
@@ -1597,16 +1451,14 @@ static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset,
enum maple_type pmt;
pnode = mte_parent(mas->node);
- pmt = mas_parent_enum(mas, mas->node);
+ pmt = mas_parent_type(mas, mas->node);
penode = mt_mk_node(pnode, pmt);
pgaps = ma_gaps(pnode, pmt);
ascend:
- meta_offset = ma_meta_gap(pnode, pmt);
- if (meta_offset == MAPLE_ARANGE64_META_MAX)
- meta_gap = 0;
- else
- meta_gap = pgaps[meta_offset];
+ MAS_BUG_ON(mas, pmt != maple_arange_64);
+ meta_offset = ma_meta_gap(pnode);
+ meta_gap = pgaps[meta_offset];
pgaps[offset] = new;
@@ -1619,7 +1471,6 @@ ascend:
ma_set_meta_gap(pnode, pmt, offset);
} else if (new < meta_gap) {
- meta_offset = 15;
new = ma_max_gap(pnode, pgaps, pmt, &meta_offset);
ma_set_meta_gap(pnode, pmt, meta_offset);
}
@@ -1629,7 +1480,7 @@ ascend:
/* Go to the parent node. */
pnode = mte_parent(penode);
- pmt = mas_parent_enum(mas, penode);
+ pmt = mas_parent_type(mas, penode);
pgaps = ma_gaps(pnode, pmt);
offset = mte_parent_slot(penode);
penode = mt_mk_node(pnode, pmt);
@@ -1638,7 +1489,7 @@ ascend:
/*
* mas_update_gap() - Update a nodes gaps and propagate up if necessary.
- * @mas - the maple state.
+ * @mas: the maple state.
*/
static inline void mas_update_gap(struct ma_state *mas)
{
@@ -1656,7 +1507,7 @@ static inline void mas_update_gap(struct ma_state *mas)
pslot = mte_parent_slot(mas->node);
p_gap = ma_gaps(mte_parent(mas->node),
- mas_parent_enum(mas, mas->node))[pslot];
+ mas_parent_type(mas, mas->node))[pslot];
if (p_gap != max_gap)
mas_parent_gap(mas, pslot, max_gap);
@@ -1665,14 +1516,14 @@ static inline void mas_update_gap(struct ma_state *mas)
/*
* mas_adopt_children() - Set the parent pointer of all nodes in @parent to
* @parent with the slot encoded.
- * @mas - the maple state (for the tree)
- * @parent - the maple encoded node containing the children.
+ * @mas: the maple state (for the tree)
+ * @parent: the maple encoded node containing the children.
*/
static inline void mas_adopt_children(struct ma_state *mas,
struct maple_enode *parent)
{
enum maple_type type = mte_node_type(parent);
- struct maple_node *node = mas_mn(mas);
+ struct maple_node *node = mte_to_node(parent);
void __rcu **slots = ma_slots(node, type);
unsigned long *pivots = ma_pivots(node, type);
struct maple_enode *child;
@@ -1681,57 +1532,62 @@ static inline void mas_adopt_children(struct ma_state *mas,
offset = ma_data_end(node, type, pivots, mas->max);
do {
child = mas_slot_locked(mas, slots, offset);
- mte_set_parent(child, parent, offset);
+ mas_set_parent(mas, child, parent, offset);
} while (offset--);
}
/*
- * mas_replace() - Replace a maple node in the tree with mas->node. Uses the
- * parent encoding to locate the maple node in the tree.
- * @mas - the ma_state to use for operations.
- * @advanced - boolean to adopt the child nodes and free the old node (false) or
- * leave the node (true) and handle the adoption and free elsewhere.
+ * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old
+ * node as dead.
+ * @mas: the maple state with the new node
+ * @old_enode: The old maple encoded node to replace.
+ * @new_height: if we are inserting a root node, update the height of the tree
*/
-static inline void mas_replace(struct ma_state *mas, bool advanced)
- __must_hold(mas->tree->lock)
+static inline void mas_put_in_tree(struct ma_state *mas,
+ struct maple_enode *old_enode, char new_height)
+ __must_hold(mas->tree->ma_lock)
{
- struct maple_node *mn = mas_mn(mas);
- struct maple_enode *old_enode;
- unsigned char offset = 0;
- void __rcu **slots = NULL;
-
- if (ma_is_root(mn)) {
- old_enode = mas_root_locked(mas);
- } else {
- offset = mte_parent_slot(mas->node);
- slots = ma_slots(mte_parent(mas->node),
- mas_parent_enum(mas, mas->node));
- old_enode = mas_slot_locked(mas, slots, offset);
- }
-
- if (!advanced && !mte_is_leaf(mas->node))
- mas_adopt_children(mas, mas->node);
+ unsigned char offset;
+ void __rcu **slots;
if (mte_is_root(mas->node)) {
- mn->parent = ma_parent_ptr(
- ((unsigned long)mas->tree | MA_ROOT_PARENT));
+ mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas));
rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
- mas_set_height(mas);
+ mt_set_height(mas->tree, new_height);
} else {
+
+ offset = mte_parent_slot(mas->node);
+ slots = ma_slots(mte_parent(mas->node),
+ mas_parent_type(mas, mas->node));
rcu_assign_pointer(slots[offset], mas->node);
}
- if (!advanced)
- mas_free(mas, old_enode);
+ mte_set_node_dead(old_enode);
}
/*
- * mas_new_child() - Find the new child of a node.
- * @mas: the maple state
+ * mas_replace_node() - Replace a node by putting it in the tree, marking it
+ * dead, and freeing it.
+ * the parent encoding to locate the maple node in the tree.
+ * @mas: the ma_state with @mas->node pointing to the new node.
+ * @old_enode: The old maple encoded node.
+ * @new_height: The new height of the tree as a result of the operation
+ */
+static inline void mas_replace_node(struct ma_state *mas,
+ struct maple_enode *old_enode, unsigned char new_height)
+ __must_hold(mas->tree->ma_lock)
+{
+ mas_put_in_tree(mas, old_enode, new_height);
+ mas_free(mas, old_enode);
+}
+
+/*
+ * mas_find_child() - Find a child who has the parent @mas->node.
+ * @mas: the maple state with the parent.
* @child: the maple state to store the child.
*/
-static inline bool mas_new_child(struct ma_state *mas, struct ma_state *child)
- __must_hold(mas->tree->lock)
+static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child)
+ __must_hold(mas->tree->ma_lock)
{
enum maple_type mt;
unsigned char offset;
@@ -1780,7 +1636,6 @@ static inline void mab_shift_right(struct maple_big_node *b_node,
/*
* mab_middle_node() - Check if a middle node is needed (unlikely)
* @b_node: the maple_big_node that contains the data.
- * @size: the amount of data in the b_node
* @split: the potential split location
* @slot_count: the size that can be stored in a single node being considered.
*
@@ -1828,38 +1683,25 @@ static inline int mab_no_null_split(struct maple_big_node *b_node,
/*
* mab_calc_split() - Calculate the split location and if there needs to be two
* splits.
+ * @mas: The maple state
* @bn: The maple_big_node with the data
* @mid_split: The second split, if required. 0 otherwise.
*
* Return: The first split location. The middle split is set in @mid_split.
*/
static inline int mab_calc_split(struct ma_state *mas,
- struct maple_big_node *bn, unsigned char *mid_split, unsigned long min)
+ struct maple_big_node *bn, unsigned char *mid_split)
{
unsigned char b_end = bn->b_end;
int split = b_end / 2; /* Assume equal split. */
- unsigned char slot_min, slot_count = mt_slots[bn->type];
+ unsigned char slot_count = mt_slots[bn->type];
/*
* To support gap tracking, all NULL entries are kept together and a node cannot
* end on a NULL entry, with the exception of the left-most leaf. The
* limitation means that the split of a node must be checked for this condition
* and be able to put more data in one direction or the other.
- */
- if (unlikely((mas->mas_flags & MA_STATE_BULK))) {
- *mid_split = 0;
- split = b_end - mt_min_slots[bn->type];
-
- if (!ma_is_leaf(bn->type))
- return split;
-
- mas->mas_flags |= MA_STATE_REBALANCE;
- if (!bn->slot[split])
- split--;
- return split;
- }
-
- /*
+ *
* Although extremely rare, it is possible to enter what is known as the 3-way
* split scenario. The 3-way split comes about by means of a store of a range
* that overwrites the end and beginning of two full nodes. The result is a set
@@ -1871,25 +1713,14 @@ static inline int mab_calc_split(struct ma_state *mas,
split = b_end / 3;
*mid_split = split * 2;
} else {
- slot_min = mt_min_slots[bn->type];
-
*mid_split = 0;
- /*
- * Avoid having a range less than the slot count unless it
- * causes one node to be deficient.
- * NOTE: mt_min_slots is 1 based, b_end and split are zero.
- */
- while (((bn->pivot[split] - min) < slot_count - 1) &&
- (split < slot_count - 1) && (b_end - split > slot_min))
- split++;
}
/* Avoid ending a node on a NULL entry */
split = mab_no_null_split(bn, split, slot_count);
- if (!(*mid_split))
- return split;
- *mid_split = mab_no_null_split(bn, *mid_split, slot_count);
+ if (unlikely(*mid_split))
+ *mid_split = mab_no_null_split(bn, *mid_split, slot_count);
return split;
}
@@ -1928,14 +1759,13 @@ static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start,
for (; i < piv_end; i++, j++) {
b_node->pivot[j] = pivots[i];
if (unlikely(!b_node->pivot[j]))
- break;
+ goto complete;
if (unlikely(mas->max == b_node->pivot[j]))
goto complete;
}
- if (likely(i <= mas_end))
- b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
+ b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
complete:
b_node->b_end = ++j;
@@ -1951,27 +1781,13 @@ complete:
/*
* mas_leaf_set_meta() - Set the metadata of a leaf if possible.
- * @mas: The maple state
* @node: The maple node
- * @pivots: pointer to the maple node pivots
* @mt: The maple type
- * @end: The assumed end
- *
- * Note, end may be incremented within this function but not modified at the
- * source. This is fine since the metadata is the last thing to be stored in a
- * node during a write.
+ * @end: The node end
*/
-static inline void mas_leaf_set_meta(struct ma_state *mas,
- struct maple_node *node, unsigned long *pivots,
+static inline void mas_leaf_set_meta(struct maple_node *node,
enum maple_type mt, unsigned char end)
{
- /* There is no room for metadata already */
- if (mt_pivots[mt] <= end)
- return;
-
- if (pivots[end] && pivots[end] < mas->max)
- end++;
-
if (end < mt_slots[mt] - 1)
ma_set_meta(node, mt, 0, end);
}
@@ -2015,7 +1831,7 @@ static inline void mab_mas_cp(struct maple_big_node *b_node,
end = j - 1;
if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) {
unsigned long max_gap = 0;
- unsigned char offset = 15;
+ unsigned char offset = 0;
gaps = ma_gaps(node, mt);
do {
@@ -2028,78 +1844,7 @@ static inline void mab_mas_cp(struct maple_big_node *b_node,
ma_set_meta(node, mt, offset, end);
} else {
- mas_leaf_set_meta(mas, node, pivots, mt, end);
- }
-}
-
-/*
- * mas_descend_adopt() - Descend through a sub-tree and adopt children.
- * @mas: the maple state with the maple encoded node of the sub-tree.
- *
- * Descend through a sub-tree and adopt children who do not have the correct
- * parents set. Follow the parents which have the correct parents as they are
- * the new entries which need to be followed to find other incorrectly set
- * parents.
- */
-static inline void mas_descend_adopt(struct ma_state *mas)
-{
- struct ma_state list[3], next[3];
- int i, n;
-
- /*
- * At each level there may be up to 3 correct parent pointers which indicates
- * the new nodes which need to be walked to find any new nodes at a lower level.
- */
-
- for (i = 0; i < 3; i++) {
- list[i] = *mas;
- list[i].offset = 0;
- next[i].offset = 0;
- }
- next[0] = *mas;
-
- while (!mte_is_leaf(list[0].node)) {
- n = 0;
- for (i = 0; i < 3; i++) {
- if (mas_is_none(&list[i]))
- continue;
-
- if (i && list[i-1].node == list[i].node)
- continue;
-
- while ((n < 3) && (mas_new_child(&list[i], &next[n])))
- n++;
-
- mas_adopt_children(&list[i], list[i].node);
- }
-
- while (n < 3)
- next[n++].node = MAS_NONE;
-
- /* descend by setting the list to the children */
- for (i = 0; i < 3; i++)
- list[i] = next[i];
- }
-}
-
-/*
- * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert.
- * @mas: The maple state
- * @end: The maple node end
- * @mt: The maple node type
- */
-static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end,
- enum maple_type mt)
-{
- if (!(mas->mas_flags & MA_STATE_BULK))
- return;
-
- if (mte_is_root(mas->node))
- return;
-
- if (end > mt_min_slots[mt]) {
- mas->mas_flags &= ~MA_STATE_REBALANCE;
- return;
+ mas_leaf_set_meta(node, mt, end);
}
}
@@ -2112,7 +1857,7 @@ static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end,
*
* Return: The actual end of the data stored in @b_node
*/
-static inline void mas_store_b_node(struct ma_wr_state *wr_mas,
+static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas,
struct maple_big_node *b_node, unsigned char offset_end)
{
unsigned char slot;
@@ -2150,11 +1895,8 @@ static inline void mas_store_b_node(struct ma_wr_state *wr_mas,
goto b_end;
/* Handle new range ending before old range ends */
- piv = mas_logical_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
+ piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
if (piv > mas->last) {
- if (piv == ULONG_MAX)
- mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type);
-
if (offset_end != slot)
wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
offset_end);
@@ -2166,11 +1908,11 @@ static inline void mas_store_b_node(struct ma_wr_state *wr_mas,
}
slot = offset_end + 1;
- if (slot > wr_mas->node_end)
+ if (slot > mas->end)
goto b_end;
/* Copy end data to the end of the node. */
- mas_mab_cp(mas, slot, wr_mas->node_end + 1, b_node, ++b_end);
+ mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end);
b_node->b_end--;
return;
@@ -2188,9 +1930,7 @@ static inline bool mas_prev_sibling(struct ma_state *mas)
{
unsigned int p_slot = mte_parent_slot(mas->node);
- if (mte_is_root(mas->node))
- return false;
-
+ /* For root node, p_slot is set to 0 by mte_parent_slot(). */
if (!p_slot)
return false;
@@ -2225,23 +1965,28 @@ static inline bool mas_next_sibling(struct ma_state *mas)
}
/*
- * mte_node_or_node() - Return the encoded node or MAS_NONE.
+ * mas_node_or_none() - Set the enode and state.
+ * @mas: the maple state
* @enode: The encoded maple node.
*
- * Shorthand to avoid setting %NULLs in the tree or maple_subtree_state.
- *
- * Return: @enode or MAS_NONE
+ * Set the node to the enode and the status.
*/
-static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode)
+static inline void mas_node_or_none(struct ma_state *mas,
+ struct maple_enode *enode)
{
- if (enode)
- return enode;
-
- return ma_enode_ptr(MAS_NONE);
+ if (enode) {
+ mas->node = enode;
+ mas->status = ma_active;
+ } else {
+ mas->node = NULL;
+ mas->status = ma_none;
+ }
}
/*
* mas_wr_node_walk() - Find the correct offset for the index in the @mas.
+ * If @mas->index cannot be found within the containing
+ * node, we traverse to the last entry in the node.
* @wr_mas: The maple write state
*
* Uses mas_slot_locked() and does not need to worry about dead nodes.
@@ -2249,9 +1994,7 @@ static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode)
static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas)
{
struct ma_state *mas = wr_mas->mas;
- unsigned char count;
- unsigned char offset;
- unsigned long index, min, max;
+ unsigned char count, offset;
if (unlikely(ma_is_dense(wr_mas->type))) {
wr_mas->r_max = wr_mas->r_min = mas->index;
@@ -2261,135 +2004,21 @@ static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas)
wr_mas->node = mas_mn(wr_mas->mas);
wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type);
- count = wr_mas->node_end = ma_data_end(wr_mas->node, wr_mas->type,
- wr_mas->pivots, mas->max);
+ count = mas->end = ma_data_end(wr_mas->node, wr_mas->type,
+ wr_mas->pivots, mas->max);
offset = mas->offset;
- min = mas_safe_min(mas, wr_mas->pivots, offset);
- if (unlikely(offset == count))
- goto max;
-
- max = wr_mas->pivots[offset];
- index = mas->index;
- if (unlikely(index <= max))
- goto done;
- if (unlikely(!max && offset))
- goto max;
+ while (offset < count && mas->index > wr_mas->pivots[offset])
+ offset++;
- min = max + 1;
- while (++offset < count) {
- max = wr_mas->pivots[offset];
- if (index <= max)
- goto done;
- else if (unlikely(!max))
- break;
-
- min = max + 1;
- }
-
-max:
- max = mas->max;
-done:
- wr_mas->r_max = max;
- wr_mas->r_min = min;
+ wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max;
+ wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset);
wr_mas->offset_end = mas->offset = offset;
}
/*
- * mas_topiary_range() - Add a range of slots to the topiary.
- * @mas: The maple state
- * @destroy: The topiary to add the slots (usually destroy)
- * @start: The starting slot inclusively
- * @end: The end slot inclusively
- */
-static inline void mas_topiary_range(struct ma_state *mas,
- struct ma_topiary *destroy, unsigned char start, unsigned char end)
-{
- void __rcu **slots;
- unsigned char offset;
-
- MT_BUG_ON(mas->tree, mte_is_leaf(mas->node));
- slots = ma_slots(mas_mn(mas), mte_node_type(mas->node));
- for (offset = start; offset <= end; offset++) {
- struct maple_enode *enode = mas_slot_locked(mas, slots, offset);
-
- if (mte_dead_node(enode))
- continue;
-
- mat_add(destroy, enode);
- }
-}
-
-/*
- * mast_topiary() - Add the portions of the tree to the removal list; either to
- * be freed or discarded (destroy walk).
- * @mast: The maple_subtree_state.
- */
-static inline void mast_topiary(struct maple_subtree_state *mast)
-{
- MA_WR_STATE(wr_mas, mast->orig_l, NULL);
- unsigned char r_start, r_end;
- unsigned char l_start, l_end;
- void __rcu **l_slots, **r_slots;
-
- wr_mas.type = mte_node_type(mast->orig_l->node);
- mast->orig_l->index = mast->orig_l->last;
- mas_wr_node_walk(&wr_mas);
- l_start = mast->orig_l->offset + 1;
- l_end = mas_data_end(mast->orig_l);
- r_start = 0;
- r_end = mast->orig_r->offset;
-
- if (r_end)
- r_end--;
-
- l_slots = ma_slots(mas_mn(mast->orig_l),
- mte_node_type(mast->orig_l->node));
-
- r_slots = ma_slots(mas_mn(mast->orig_r),
- mte_node_type(mast->orig_r->node));
-
- if ((l_start < l_end) &&
- mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_start))) {
- l_start++;
- }
-
- if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_end))) {
- if (r_end)
- r_end--;
- }
-
- if ((l_start > r_end) && (mast->orig_l->node == mast->orig_r->node))
- return;
-
- /* At the node where left and right sides meet, add the parts between */
- if (mast->orig_l->node == mast->orig_r->node) {
- return mas_topiary_range(mast->orig_l, mast->destroy,
- l_start, r_end);
- }
-
- /* mast->orig_r is different and consumed. */
- if (mte_is_leaf(mast->orig_r->node))
- return;
-
- if (mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_end)))
- l_end--;
-
-
- if (l_start <= l_end)
- mas_topiary_range(mast->orig_l, mast->destroy, l_start, l_end);
-
- if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_start)))
- r_start++;
-
- if (r_start <= r_end)
- mas_topiary_range(mast->orig_r, mast->destroy, 0, r_end);
-}
-
-/*
* mast_rebalance_next() - Rebalance against the next node
* @mast: The maple subtree state
- * @old_r: The encoded maple node to the right (next node).
*/
static inline void mast_rebalance_next(struct maple_subtree_state *mast)
{
@@ -2403,7 +2032,6 @@ static inline void mast_rebalance_next(struct maple_subtree_state *mast)
/*
* mast_rebalance_prev() - Rebalance against the previous node
* @mast: The maple subtree state
- * @old_l: The encoded maple node to the left (previous node)
*/
static inline void mast_rebalance_prev(struct maple_subtree_state *mast)
{
@@ -2421,7 +2049,7 @@ static inline void mast_rebalance_prev(struct maple_subtree_state *mast)
/*
* mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring
* the node to the right. Checking the nodes to the right then the left at each
- * level upwards until root is reached. Free and destroy as needed.
+ * level upwards until root is reached.
* Data is copied into the @mast->bn.
* @mast: The maple_subtree_state.
*/
@@ -2430,97 +2058,31 @@ bool mast_spanning_rebalance(struct maple_subtree_state *mast)
{
struct ma_state r_tmp = *mast->orig_r;
struct ma_state l_tmp = *mast->orig_l;
- struct maple_enode *ancestor = NULL;
- unsigned char start, end;
unsigned char depth = 0;
- r_tmp = *mast->orig_r;
- l_tmp = *mast->orig_l;
do {
mas_ascend(mast->orig_r);
mas_ascend(mast->orig_l);
depth++;
- if (!ancestor &&
- (mast->orig_r->node == mast->orig_l->node)) {
- ancestor = mast->orig_r->node;
- end = mast->orig_r->offset - 1;
- start = mast->orig_l->offset + 1;
- }
-
if (mast->orig_r->offset < mas_data_end(mast->orig_r)) {
- if (!ancestor) {
- ancestor = mast->orig_r->node;
- start = 0;
- }
-
mast->orig_r->offset++;
do {
mas_descend(mast->orig_r);
mast->orig_r->offset = 0;
- depth--;
- } while (depth);
+ } while (--depth);
mast_rebalance_next(mast);
- do {
- unsigned char l_off = 0;
- struct maple_enode *child = r_tmp.node;
-
- mas_ascend(&r_tmp);
- if (ancestor == r_tmp.node)
- l_off = start;
-
- if (r_tmp.offset)
- r_tmp.offset--;
-
- if (l_off < r_tmp.offset)
- mas_topiary_range(&r_tmp, mast->destroy,
- l_off, r_tmp.offset);
-
- if (l_tmp.node != child)
- mat_add(mast->free, child);
-
- } while (r_tmp.node != ancestor);
-
*mast->orig_l = l_tmp;
return true;
-
} else if (mast->orig_l->offset != 0) {
- if (!ancestor) {
- ancestor = mast->orig_l->node;
- end = mas_data_end(mast->orig_l);
- }
-
mast->orig_l->offset--;
do {
mas_descend(mast->orig_l);
mast->orig_l->offset =
mas_data_end(mast->orig_l);
- depth--;
- } while (depth);
+ } while (--depth);
mast_rebalance_prev(mast);
- do {
- unsigned char r_off;
- struct maple_enode *child = l_tmp.node;
-
- mas_ascend(&l_tmp);
- if (ancestor == l_tmp.node)
- r_off = end;
- else
- r_off = mas_data_end(&l_tmp);
-
- if (l_tmp.offset < r_off)
- l_tmp.offset++;
-
- if (l_tmp.offset < r_off)
- mas_topiary_range(&l_tmp, mast->destroy,
- l_tmp.offset, r_off);
-
- if (r_tmp.node != child)
- mat_add(mast->free, child);
-
- } while (l_tmp.node != ancestor);
-
*mast->orig_r = r_tmp;
return true;
}
@@ -2532,36 +2094,24 @@ bool mast_spanning_rebalance(struct maple_subtree_state *mast)
}
/*
- * mast_ascend_free() - Add current original maple state nodes to the free list
- * and ascend.
+ * mast_ascend() - Ascend the original left and right maple states.
* @mast: the maple subtree state.
*
- * Ascend the original left and right sides and add the previous nodes to the
- * free list. Set the slots to point to the correct location in the new nodes.
+ * Ascend the original left and right sides. Set the offsets to point to the
+ * data already in the new tree (@mast->l and @mast->r).
*/
-static inline void
-mast_ascend_free(struct maple_subtree_state *mast)
+static inline void mast_ascend(struct maple_subtree_state *mast)
{
MA_WR_STATE(wr_mas, mast->orig_r, NULL);
- struct maple_enode *left = mast->orig_l->node;
- struct maple_enode *right = mast->orig_r->node;
-
mas_ascend(mast->orig_l);
mas_ascend(mast->orig_r);
- mat_add(mast->free, left);
-
- if (left != right)
- mat_add(mast->free, right);
mast->orig_r->offset = 0;
mast->orig_r->index = mast->r->max;
/* last should be larger than or equal to index */
if (mast->orig_r->last < mast->orig_r->index)
mast->orig_r->last = mast->orig_r->index;
- /*
- * The node may not contain the value so set slot to ensure all
- * of the nodes contents are freed or destroyed.
- */
+
wr_mas.type = mte_node_type(mast->orig_r->node);
mas_wr_node_walk(&wr_mas);
/* Set up the left side of things */
@@ -2605,7 +2155,7 @@ static inline struct maple_enode
static inline unsigned char mas_mab_to_node(struct ma_state *mas,
struct maple_big_node *b_node, struct maple_enode **left,
struct maple_enode **right, struct maple_enode **middle,
- unsigned char *mid_split, unsigned long min)
+ unsigned char *mid_split)
{
unsigned char split = 0;
unsigned char slot_count = mt_slots[b_node->type];
@@ -2618,7 +2168,7 @@ static inline unsigned char mas_mab_to_node(struct ma_state *mas,
if (b_node->b_end < slot_count) {
split = b_node->b_end;
} else {
- split = mab_calc_split(mas, b_node, mid_split, min);
+ split = mab_calc_split(mas, b_node, mid_split);
*right = mas_new_ma_node(mas, b_node);
}
@@ -2632,9 +2182,9 @@ static inline unsigned char mas_mab_to_node(struct ma_state *mas,
/*
* mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end
* pointer.
- * @b_node - the big node to add the entry
- * @mas - the maple state to get the pivot (mas->max)
- * @entry - the entry to add, if NULL nothing happens.
+ * @b_node: the big node to add the entry
+ * @mas: the maple state to get the pivot (mas->max)
+ * @entry: the entry to add, if NULL nothing happens.
*/
static inline void mab_set_b_end(struct maple_big_node *b_node,
struct ma_state *mas,
@@ -2653,11 +2203,11 @@ static inline void mab_set_b_end(struct maple_big_node *b_node,
* mas_set_split_parent() - combine_then_separate helper function. Sets the parent
* of @mas->node to either @left or @right, depending on @slot and @split
*
- * @mas - the maple state with the node that needs a parent
- * @left - possible parent 1
- * @right - possible parent 2
- * @slot - the slot the mas->node was placed
- * @split - the split location between @left and @right
+ * @mas: the maple state with the node that needs a parent
+ * @left: possible parent 1
+ * @right: possible parent 2
+ * @slot: the slot the mas->node was placed
+ * @split: the split location between @left and @right
*/
static inline void mas_set_split_parent(struct ma_state *mas,
struct maple_enode *left,
@@ -2668,20 +2218,20 @@ static inline void mas_set_split_parent(struct ma_state *mas,
return;
if ((*slot) <= split)
- mte_set_parent(mas->node, left, *slot);
+ mas_set_parent(mas, mas->node, left, *slot);
else if (right)
- mte_set_parent(mas->node, right, (*slot) - split - 1);
+ mas_set_parent(mas, mas->node, right, (*slot) - split - 1);
(*slot)++;
}
/*
* mte_mid_split_check() - Check if the next node passes the mid-split
- * @**l: Pointer to left encoded maple node.
- * @**m: Pointer to middle encoded maple node.
- * @**r: Pointer to right encoded maple node.
+ * @l: Pointer to left encoded maple node.
+ * @m: Pointer to middle encoded maple node.
+ * @r: Pointer to right encoded maple node.
* @slot: The offset
- * @*split: The split location.
+ * @split: The split location.
* @mid_split: The middle split.
*/
static inline void mte_mid_split_check(struct maple_enode **l,
@@ -2705,10 +2255,10 @@ static inline void mte_mid_split_check(struct maple_enode **l,
/*
* mast_set_split_parents() - Helper function to set three nodes parents. Slot
* is taken from @mast->l.
- * @mast - the maple subtree state
- * @left - the left node
- * @right - the right node
- * @split - the split location.
+ * @mast: the maple subtree state
+ * @left: the left node
+ * @right: the right node
+ * @split: the split location.
*/
static inline void mast_set_split_parents(struct maple_subtree_state *mast,
struct maple_enode *left,
@@ -2740,58 +2290,152 @@ static inline void mast_set_split_parents(struct maple_subtree_state *mast,
}
/*
- * mas_wmb_replace() - Write memory barrier and replace
- * @mas: The maple state
- * @free: the maple topiary list of nodes to free
- * @destroy: The maple topiary list of nodes to destroy (walk and free)
+ * mas_topiary_node() - Dispose of a single node
+ * @mas: The maple state for pushing nodes
+ * @in_rcu: If the tree is in rcu mode
*
- * Updates gap as necessary.
+ * The node will either be RCU freed or pushed back on the maple state.
*/
-static inline void mas_wmb_replace(struct ma_state *mas,
- struct ma_topiary *free,
- struct ma_topiary *destroy)
+static inline void mas_topiary_node(struct ma_state *mas,
+ struct ma_state *tmp_mas, bool in_rcu)
{
- /* All nodes must see old data as dead prior to replacing that data */
- smp_wmb(); /* Needed for RCU */
+ struct maple_node *tmp;
+ struct maple_enode *enode;
- /* Insert the new data in the tree */
- mas_replace(mas, true);
+ if (mas_is_none(tmp_mas))
+ return;
- if (!mte_is_leaf(mas->node))
- mas_descend_adopt(mas);
+ enode = tmp_mas->node;
+ tmp = mte_to_node(enode);
+ mte_set_node_dead(enode);
+ ma_free_rcu(tmp);
+}
- mas_mat_free(mas, free);
+/*
+ * mas_topiary_replace() - Replace the data with new data, then repair the
+ * parent links within the new tree. Iterate over the dead sub-tree and collect
+ * the dead subtrees and topiary the nodes that are no longer of use.
+ *
+ * The new tree will have up to three children with the correct parent. Keep
+ * track of the new entries as they need to be followed to find the next level
+ * of new entries.
+ *
+ * The old tree will have up to three children with the old parent. Keep track
+ * of the old entries as they may have more nodes below replaced. Nodes within
+ * [index, last] are dead subtrees, others need to be freed and followed.
+ *
+ * @mas: The maple state pointing at the new data
+ * @old_enode: The maple encoded node being replaced
+ * @new_height: The new height of the tree as a result of the operation
+ *
+ */
+static inline void mas_topiary_replace(struct ma_state *mas,
+ struct maple_enode *old_enode, unsigned char new_height)
+{
+ struct ma_state tmp[3], tmp_next[3];
+ MA_TOPIARY(subtrees, mas->tree);
+ bool in_rcu;
+ int i, n;
- if (destroy)
- mas_mat_destroy(mas, destroy);
+ /* Place data in tree & then mark node as old */
+ mas_put_in_tree(mas, old_enode, new_height);
- if (mte_is_leaf(mas->node))
- return;
+ /* Update the parent pointers in the tree */
+ tmp[0] = *mas;
+ tmp[0].offset = 0;
+ tmp[1].status = ma_none;
+ tmp[2].status = ma_none;
+ while (!mte_is_leaf(tmp[0].node)) {
+ n = 0;
+ for (i = 0; i < 3; i++) {
+ if (mas_is_none(&tmp[i]))
+ continue;
- mas_update_gap(mas);
+ while (n < 3) {
+ if (!mas_find_child(&tmp[i], &tmp_next[n]))
+ break;
+ n++;
+ }
+
+ mas_adopt_children(&tmp[i], tmp[i].node);
+ }
+
+ if (MAS_WARN_ON(mas, n == 0))
+ break;
+
+ while (n < 3)
+ tmp_next[n++].status = ma_none;
+
+ for (i = 0; i < 3; i++)
+ tmp[i] = tmp_next[i];
+ }
+
+ /* Collect the old nodes that need to be discarded */
+ if (mte_is_leaf(old_enode))
+ return mas_free(mas, old_enode);
+
+ tmp[0] = *mas;
+ tmp[0].offset = 0;
+ tmp[0].node = old_enode;
+ tmp[1].status = ma_none;
+ tmp[2].status = ma_none;
+ in_rcu = mt_in_rcu(mas->tree);
+ do {
+ n = 0;
+ for (i = 0; i < 3; i++) {
+ if (mas_is_none(&tmp[i]))
+ continue;
+
+ while (n < 3) {
+ if (!mas_find_child(&tmp[i], &tmp_next[n]))
+ break;
+
+ if ((tmp_next[n].min >= tmp_next->index) &&
+ (tmp_next[n].max <= tmp_next->last)) {
+ mat_add(&subtrees, tmp_next[n].node);
+ tmp_next[n].status = ma_none;
+ } else {
+ n++;
+ }
+ }
+ }
+
+ if (MAS_WARN_ON(mas, n == 0))
+ break;
+
+ while (n < 3)
+ tmp_next[n++].status = ma_none;
+
+ for (i = 0; i < 3; i++) {
+ mas_topiary_node(mas, &tmp[i], in_rcu);
+ tmp[i] = tmp_next[i];
+ }
+ } while (!mte_is_leaf(tmp[0].node));
+
+ for (i = 0; i < 3; i++)
+ mas_topiary_node(mas, &tmp[i], in_rcu);
+
+ mas_mat_destroy(mas, &subtrees);
}
/*
- * mast_new_root() - Set a new tree root during subtree creation
- * @mast: The maple subtree state
+ * mas_wmb_replace() - Write memory barrier and replace
* @mas: The maple state
+ * @old_enode: The old maple encoded node that is being replaced.
+ * @new_height: The new height of the tree as a result of the operation
+ *
+ * Updates gap as necessary.
*/
-static inline void mast_new_root(struct maple_subtree_state *mast,
- struct ma_state *mas)
+static inline void mas_wmb_replace(struct ma_state *mas,
+ struct maple_enode *old_enode, unsigned char new_height)
{
- mas_mn(mast->l)->parent =
- ma_parent_ptr(((unsigned long)mas->tree | MA_ROOT_PARENT));
- if (!mte_dead_node(mast->orig_l->node) &&
- !mte_is_root(mast->orig_l->node)) {
- do {
- mast_ascend_free(mast);
- mast_topiary(mast);
- } while (!mte_is_root(mast->orig_l->node));
- }
- if ((mast->orig_l->node != mas->node) &&
- (mast->l->depth > mas_mt_height(mas))) {
- mat_add(mast->free, mas->node);
- }
+ /* Insert the new data in the tree */
+ mas_topiary_replace(mas, old_enode, new_height);
+
+ if (mte_is_leaf(mas->node))
+ return;
+
+ mas_update_gap(mas);
}
/*
@@ -2809,9 +2453,9 @@ static inline void mast_cp_to_nodes(struct maple_subtree_state *mast,
{
bool new_lmax = true;
- mast->l->node = mte_node_or_none(left);
- mast->m->node = mte_node_or_none(middle);
- mast->r->node = mte_node_or_none(right);
+ mas_node_or_none(mast->l, left);
+ mas_node_or_none(mast->m, middle);
+ mas_node_or_none(mast->r, right);
mast->l->min = mast->orig_l->min;
if (split == mast->bn->b_end) {
@@ -2885,7 +2529,7 @@ static inline bool mast_sufficient(struct maple_subtree_state *mast)
*/
static inline bool mast_overflow(struct maple_subtree_state *mast)
{
- if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node))
+ if (mast->bn->b_end > mt_slot_count(mast->orig_l->node))
return true;
return false;
@@ -2907,32 +2551,29 @@ static inline void *mtree_range_walk(struct ma_state *mas)
min = mas->min;
max = mas->max;
do {
- offset = 0;
last = next;
node = mte_to_node(next);
type = mte_node_type(next);
pivots = ma_pivots(node, type);
end = ma_data_end(node, type, pivots, max);
- if (unlikely(ma_dead_node(node)))
- goto dead_node;
-
- if (pivots[offset] >= mas->index) {
- prev_max = max;
- prev_min = min;
- max = pivots[offset];
+ prev_min = min;
+ prev_max = max;
+ if (pivots[0] >= mas->index) {
+ offset = 0;
+ max = pivots[0];
goto next;
}
- do {
+ offset = 1;
+ while (offset < end) {
+ if (pivots[offset] >= mas->index) {
+ max = pivots[offset];
+ break;
+ }
offset++;
- } while ((offset < end) && (pivots[offset] < mas->index));
+ }
- prev_min = min;
min = pivots[offset - 1] + 1;
- prev_max = max;
- if (likely(offset < end && pivots[offset]))
- max = pivots[offset];
-
next:
slots = ma_slots(node, type);
next = mt_slot(mas->tree, slots, offset);
@@ -2940,13 +2581,14 @@ next:
goto dead_node;
} while (!ma_is_leaf(type));
+ mas->end = end;
mas->offset = offset;
mas->index = min;
mas->last = max;
mas->min = prev_min;
mas->max = prev_max;
mas->node = last;
- return (void *) next;
+ return (void *)next;
dead_node:
mas_reset(mas);
@@ -2968,21 +2610,19 @@ dead_node:
* orig_l_mas->last is used in mas_consume to find the slots that will need to
* be either freed or destroyed. orig_l_mas->depth keeps track of the height of
* the new sub-tree in case the sub-tree becomes the full tree.
- *
- * Return: the number of elements in b_node during the last loop.
*/
-static int mas_spanning_rebalance(struct ma_state *mas,
+static void mas_spanning_rebalance(struct ma_state *mas,
struct maple_subtree_state *mast, unsigned char count)
{
unsigned char split, mid_split;
unsigned char slot = 0;
+ unsigned char new_height = 0; /* used if node is a new root */
struct maple_enode *left = NULL, *middle = NULL, *right = NULL;
+ struct maple_enode *old_enode;
MA_STATE(l_mas, mas->tree, mas->index, mas->index);
MA_STATE(r_mas, mas->tree, mas->index, mas->last);
MA_STATE(m_mas, mas->tree, mas->index, mas->index);
- MA_TOPIARY(free, mas->tree);
- MA_TOPIARY(destroy, mas->tree);
/*
* The tree needs to be rebalanced and leaves need to be kept at the same level.
@@ -2991,17 +2631,13 @@ static int mas_spanning_rebalance(struct ma_state *mas,
mast->l = &l_mas;
mast->m = &m_mas;
mast->r = &r_mas;
- mast->free = &free;
- mast->destroy = &destroy;
- l_mas.node = r_mas.node = m_mas.node = MAS_NONE;
+ l_mas.status = r_mas.status = m_mas.status = ma_none;
/* Check if this is not root and has sufficient data. */
if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) &&
unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type]))
mast_spanning_rebalance(mast);
- mast->orig_l->depth = 0;
-
/*
* Each level of the tree is examined and balanced, pushing data to the left or
* right, or rebalancing against left or right nodes is employed to avoid
@@ -3011,16 +2647,17 @@ static int mas_spanning_rebalance(struct ma_state *mas,
* original tree and the partially new tree. To remedy the parent pointers in
* the old tree, the new data is swapped into the active tree and a walk down
* the tree is performed and the parent pointers are updated.
- * See mas_descend_adopt() for more information..
+ * See mas_topiary_replace() for more information.
*/
while (count--) {
mast->bn->b_end--;
mast->bn->type = mte_node_type(mast->orig_l->node);
split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
- &mid_split, mast->orig_l->min);
+ &mid_split);
mast_set_split_parents(mast, left, middle, right, split,
mid_split);
mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
+ new_height++;
/*
* Copy data from next level in the tree to mast->bn from next
@@ -3028,13 +2665,12 @@ static int mas_spanning_rebalance(struct ma_state *mas,
*/
memset(mast->bn, 0, sizeof(struct maple_big_node));
mast->bn->type = mte_node_type(left);
- mast->orig_l->depth++;
/* Root already stored in l->node. */
if (mas_is_root_limits(mast->l))
goto new_root;
- mast_ascend_free(mast);
+ mast_ascend(mast);
mast_combine_cp_left(mast);
l_mas.offset = mast->bn->b_end;
mab_set_b_end(mast->bn, &l_mas, left);
@@ -3043,14 +2679,23 @@ static int mas_spanning_rebalance(struct ma_state *mas,
/* Copy anything necessary out of the right node. */
mast_combine_cp_right(mast);
- mast_topiary(mast);
mast->orig_l->last = mast->orig_l->max;
- if (mast_sufficient(mast))
- continue;
+ if (mast_sufficient(mast)) {
+ if (mast_overflow(mast))
+ continue;
+
+ if (mast->orig_l->node == mast->orig_r->node) {
+ /*
+ * The data in b_node should be stored in one
+ * node and in the tree
+ */
+ slot = mast->l->offset;
+ break;
+ }
- if (mast_overflow(mast))
continue;
+ }
/* May be a new root stored in mast->bn */
if (mas_is_root_limits(mast->orig_l))
@@ -3065,36 +2710,34 @@ static int mas_spanning_rebalance(struct ma_state *mas,
l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)),
mte_node_type(mast->orig_l->node));
- mast->orig_l->depth++;
+
mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true);
- mte_set_parent(left, l_mas.node, slot);
+ new_height++;
+ mas_set_parent(mas, left, l_mas.node, slot);
if (middle)
- mte_set_parent(middle, l_mas.node, ++slot);
+ mas_set_parent(mas, middle, l_mas.node, ++slot);
if (right)
- mte_set_parent(right, l_mas.node, ++slot);
+ mas_set_parent(mas, right, l_mas.node, ++slot);
if (mas_is_root_limits(mast->l)) {
new_root:
- mast_new_root(mast, mas);
+ mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas));
+ while (!mte_is_root(mast->orig_l->node))
+ mast_ascend(mast);
} else {
mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent;
}
- if (!mte_dead_node(mast->orig_l->node))
- mat_add(&free, mast->orig_l->node);
-
- mas->depth = mast->orig_l->depth;
- *mast->orig_l = l_mas;
- mte_set_node_dead(mas->node);
-
- /* Set up mas for insertion. */
- mast->orig_l->depth = mas->depth;
- mast->orig_l->alloc = mas->alloc;
- *mas = *mast->orig_l;
- mas_wmb_replace(mas, &free, &destroy);
+ old_enode = mast->orig_l->node;
+ mas->depth = l_mas.depth;
+ mas->node = l_mas.node;
+ mas->min = l_mas.min;
+ mas->max = l_mas.max;
+ mas->offset = l_mas.offset;
+ mas_wmb_replace(mas, old_enode, new_height);
mtree_range_walk(mas);
- return mast->bn->b_end;
+ return;
}
/*
@@ -3104,10 +2747,8 @@ new_root:
*
* Rebalance two nodes into a single node or two new nodes that are sufficient.
* Continue upwards until tree is sufficient.
- *
- * Return: the number of elements in b_node during the last loop.
*/
-static inline int mas_rebalance(struct ma_state *mas,
+static inline void mas_rebalance(struct ma_state *mas,
struct maple_big_node *b_node)
{
char empty_count = mas_mt_height(mas);
@@ -3117,7 +2758,7 @@ static inline int mas_rebalance(struct ma_state *mas,
MA_STATE(l_mas, mas->tree, mas->index, mas->last);
MA_STATE(r_mas, mas->tree, mas->index, mas->last);
- trace_ma_op(__func__, mas);
+ trace_ma_op(TP_FCT, mas);
/*
* Rebalancing occurs if a node is insufficient. Data is rebalanced
@@ -3128,9 +2769,6 @@ static inline int mas_rebalance(struct ma_state *mas,
* tries to combine the data in the same way. If one node contains the
* entire range of the tree, then that node is used as a new root node.
*/
- mas_node_count(mas, 1 + empty_count * 3);
- if (mas_is_err(mas))
- return 0;
mast.orig_l = &l_mas;
mast.orig_r = &r_mas;
@@ -3156,134 +2794,12 @@ static inline int mas_rebalance(struct ma_state *mas,
}
/*
- * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple
- * state.
- * @mas: The maple state
- * @end: The end of the left-most node.
- *
- * During a mass-insert event (such as forking), it may be necessary to
- * rebalance the left-most node when it is not sufficient.
- */
-static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end)
-{
- enum maple_type mt = mte_node_type(mas->node);
- struct maple_node reuse, *newnode, *parent, *new_left, *left, *node;
- struct maple_enode *eparent;
- unsigned char offset, tmp, split = mt_slots[mt] / 2;
- void __rcu **l_slots, **slots;
- unsigned long *l_pivs, *pivs, gap;
- bool in_rcu = mt_in_rcu(mas->tree);
-
- MA_STATE(l_mas, mas->tree, mas->index, mas->last);
-
- l_mas = *mas;
- mas_prev_sibling(&l_mas);
-
- /* set up node. */
- if (in_rcu) {
- /* Allocate for both left and right as well as parent. */
- mas_node_count(mas, 3);
- if (mas_is_err(mas))
- return;
-
- newnode = mas_pop_node(mas);
- } else {
- newnode = &reuse;
- }
-
- node = mas_mn(mas);
- newnode->parent = node->parent;
- slots = ma_slots(newnode, mt);
- pivs = ma_pivots(newnode, mt);
- left = mas_mn(&l_mas);
- l_slots = ma_slots(left, mt);
- l_pivs = ma_pivots(left, mt);
- if (!l_slots[split])
- split++;
- tmp = mas_data_end(&l_mas) - split;
-
- memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp);
- memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp);
- pivs[tmp] = l_mas.max;
- memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end);
- memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end);
-
- l_mas.max = l_pivs[split];
- mas->min = l_mas.max + 1;
- eparent = mt_mk_node(mte_parent(l_mas.node),
- mas_parent_enum(&l_mas, l_mas.node));
- tmp += end;
- if (!in_rcu) {
- unsigned char max_p = mt_pivots[mt];
- unsigned char max_s = mt_slots[mt];
-
- if (tmp < max_p)
- memset(pivs + tmp, 0,
- sizeof(unsigned long *) * (max_p - tmp));
-
- if (tmp < mt_slots[mt])
- memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp));
-
- memcpy(node, newnode, sizeof(struct maple_node));
- ma_set_meta(node, mt, 0, tmp - 1);
- mte_set_pivot(eparent, mte_parent_slot(l_mas.node),
- l_pivs[split]);
-
- /* Remove data from l_pivs. */
- tmp = split + 1;
- memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp));
- memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp));
- ma_set_meta(left, mt, 0, split);
-
- goto done;
- }
-
- /* RCU requires replacing both l_mas, mas, and parent. */
- mas->node = mt_mk_node(newnode, mt);
- ma_set_meta(newnode, mt, 0, tmp);
-
- new_left = mas_pop_node(mas);
- new_left->parent = left->parent;
- mt = mte_node_type(l_mas.node);
- slots = ma_slots(new_left, mt);
- pivs = ma_pivots(new_left, mt);
- memcpy(slots, l_slots, sizeof(void *) * split);
- memcpy(pivs, l_pivs, sizeof(unsigned long) * split);
- ma_set_meta(new_left, mt, 0, split);
- l_mas.node = mt_mk_node(new_left, mt);
-
- /* replace parent. */
- offset = mte_parent_slot(mas->node);
- mt = mas_parent_enum(&l_mas, l_mas.node);
- parent = mas_pop_node(mas);
- slots = ma_slots(parent, mt);
- pivs = ma_pivots(parent, mt);
- memcpy(parent, mte_to_node(eparent), sizeof(struct maple_node));
- rcu_assign_pointer(slots[offset], mas->node);
- rcu_assign_pointer(slots[offset - 1], l_mas.node);
- pivs[offset - 1] = l_mas.max;
- eparent = mt_mk_node(parent, mt);
-done:
- gap = mas_leaf_max_gap(mas);
- mte_set_gap(eparent, mte_parent_slot(mas->node), gap);
- gap = mas_leaf_max_gap(&l_mas);
- mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap);
- mas_ascend(mas);
-
- if (in_rcu)
- mas_replace(mas, false);
-
- mas_update_gap(mas);
-}
-
-/*
* mas_split_final_node() - Split the final node in a subtree operation.
* @mast: the maple subtree state
* @mas: The maple state
- * @height: The height of the tree in case it's a new root.
*/
-static inline bool mas_split_final_node(struct maple_subtree_state *mast,
- struct ma_state *mas, int height)
+static inline void mas_split_final_node(struct maple_subtree_state *mast,
+ struct ma_state *mas)
{
struct maple_enode *ancestor;
@@ -3292,21 +2808,19 @@ static inline bool mas_split_final_node(struct maple_subtree_state *mast,
mast->bn->type = maple_arange_64;
else
mast->bn->type = maple_range_64;
- mas->depth = height;
}
/*
* Only a single node is used here, could be root.
* The Big_node data should just fit in a single node.
*/
ancestor = mas_new_ma_node(mas, mast->bn);
- mte_set_parent(mast->l->node, ancestor, mast->l->offset);
- mte_set_parent(mast->r->node, ancestor, mast->r->offset);
+ mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset);
+ mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset);
mte_to_node(ancestor)->parent = mas_mn(mas)->parent;
mast->l->node = ancestor;
mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true);
mas->offset = mast->bn->b_end - 1;
- return true;
}
/*
@@ -3320,19 +2834,14 @@ static inline void mast_fill_bnode(struct maple_subtree_state *mast,
unsigned char skip)
{
bool cp = true;
- struct maple_enode *old = mas->node;
unsigned char split;
- memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap));
- memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot));
- memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot));
- mast->bn->b_end = 0;
+ memset(mast->bn, 0, sizeof(struct maple_big_node));
if (mte_is_root(mas->node)) {
cp = false;
} else {
mas_ascend(mas);
- mat_add(mast->free, old);
mas->offset = mte_parent_slot(mas->node);
}
@@ -3386,7 +2895,6 @@ static inline void mast_split_data(struct maple_subtree_state *mast,
* mas_push_data() - Instead of splitting a node, it is beneficial to push the
* data to the right or left node if there is room.
* @mas: The maple state
- * @height: The current height of the maple state
* @mast: The maple subtree state
* @left: Push left or not.
*
@@ -3394,8 +2902,8 @@ static inline void mast_split_data(struct maple_subtree_state *mast,
*
* Return: True if pushed, false otherwise.
*/
-static inline bool mas_push_data(struct ma_state *mas, int height,
- struct maple_subtree_state *mast, bool left)
+static inline bool mas_push_data(struct ma_state *mas,
+ struct maple_subtree_state *mast, bool left)
{
unsigned char slot_total = mast->bn->b_end;
unsigned char end, space, split;
@@ -3436,13 +2944,11 @@ static inline bool mas_push_data(struct ma_state *mas, int height,
split = mt_slots[mast->bn->type] - 2;
if (left) {
/* Switch mas to prev node */
- mat_add(mast->free, mas->node);
*mas = tmp_mas;
/* Start using mast->l for the left side. */
tmp_mas.node = mast->l->node;
*mast->l = tmp_mas;
} else {
- mat_add(mast->free, tmp_mas.node);
tmp_mas.node = mast->r->node;
*mast->r = tmp_mas;
split = slot_total - split;
@@ -3454,7 +2960,7 @@ static inline bool mas_push_data(struct ma_state *mas, int height,
mast_split_data(mast, mas, split);
mast_fill_bnode(mast, mas, 2);
- mas_split_final_node(mast, mas, height + 1);
+ mas_split_final_node(mast, mas);
return true;
}
@@ -3462,14 +2968,14 @@ static inline bool mas_push_data(struct ma_state *mas, int height,
* mas_split() - Split data that is too big for one node into two.
* @mas: The maple state
* @b_node: The maple big node
- * Return: 1 on success, 0 on failure.
*/
-static int mas_split(struct ma_state *mas, struct maple_big_node *b_node)
+static void mas_split(struct ma_state *mas, struct maple_big_node *b_node)
{
-
struct maple_subtree_state mast;
int height = 0;
+ unsigned int orig_height = mas_mt_height(mas);
unsigned char mid_split, split = 0;
+ struct maple_enode *old;
/*
* Splitting is handled differently from any other B-tree; the Maple
@@ -3492,25 +2998,18 @@ static int mas_split(struct ma_state *mas, struct maple_big_node *b_node)
MA_STATE(r_mas, mas->tree, mas->index, mas->last);
MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
- MA_TOPIARY(mat, mas->tree);
- trace_ma_op(__func__, mas);
- mas->depth = mas_mt_height(mas);
- /* Allocation failures will happen early. */
- mas_node_count(mas, 1 + mas->depth * 2);
- if (mas_is_err(mas))
- return 0;
+ trace_ma_op(TP_FCT, mas);
mast.l = &l_mas;
mast.r = &r_mas;
mast.orig_l = &prev_l_mas;
mast.orig_r = &prev_r_mas;
- mast.free = &mat;
mast.bn = b_node;
- while (height++ <= mas->depth) {
+ while (height++ <= orig_height) {
if (mt_slots[b_node->type] > b_node->b_end) {
- mas_split_final_node(&mast, mas, height);
+ mas_split_final_node(&mast, mas);
break;
}
@@ -3525,14 +3024,17 @@ static int mas_split(struct ma_state *mas, struct maple_big_node *b_node)
* is a significant savings.
*/
/* Try to push left. */
- if (mas_push_data(mas, height, &mast, true))
+ if (mas_push_data(mas, &mast, true)) {
+ height++;
break;
-
+ }
/* Try to push right. */
- if (mas_push_data(mas, height, &mast, false))
+ if (mas_push_data(mas, &mast, false)) {
+ height++;
break;
+ }
- split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min);
+ split = mab_calc_split(mas, b_node, &mid_split);
mast_split_data(&mast, mas, split);
/*
* Usually correct, mab_mas_cp in the above call overwrites
@@ -3545,76 +3047,29 @@ static int mas_split(struct ma_state *mas, struct maple_big_node *b_node)
}
/* Set the original node as dead */
- mat_add(mast.free, mas->node);
+ old = mas->node;
mas->node = l_mas.node;
- mas_wmb_replace(mas, mast.free, NULL);
+ mas_wmb_replace(mas, old, height);
mtree_range_walk(mas);
- return 1;
-}
-
-/*
- * mas_reuse_node() - Reuse the node to store the data.
- * @wr_mas: The maple write state
- * @bn: The maple big node
- * @end: The end of the data.
- *
- * Will always return false in RCU mode.
- *
- * Return: True if node was reused, false otherwise.
- */
-static inline bool mas_reuse_node(struct ma_wr_state *wr_mas,
- struct maple_big_node *bn, unsigned char end)
-{
- /* Need to be rcu safe. */
- if (mt_in_rcu(wr_mas->mas->tree))
- return false;
-
- if (end > bn->b_end) {
- int clear = mt_slots[wr_mas->type] - bn->b_end;
-
- memset(wr_mas->slots + bn->b_end, 0, sizeof(void *) * clear--);
- memset(wr_mas->pivots + bn->b_end, 0, sizeof(void *) * clear);
- }
- mab_mas_cp(bn, 0, bn->b_end, wr_mas->mas, false);
- return true;
+ return;
}
/*
* mas_commit_b_node() - Commit the big node into the tree.
* @wr_mas: The maple write state
* @b_node: The maple big node
- * @end: The end of the data.
*/
-static inline int mas_commit_b_node(struct ma_wr_state *wr_mas,
- struct maple_big_node *b_node, unsigned char end)
+static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas,
+ struct maple_big_node *b_node)
{
- struct maple_node *node;
- unsigned char b_end = b_node->b_end;
- enum maple_type b_type = b_node->type;
+ enum store_type type = wr_mas->mas->store_type;
- if ((b_end < mt_min_slots[b_type]) &&
- (!mte_is_root(wr_mas->mas->node)) &&
- (mas_mt_height(wr_mas->mas) > 1))
- return mas_rebalance(wr_mas->mas, b_node);
-
- if (b_end >= mt_slots[b_type])
- return mas_split(wr_mas->mas, b_node);
+ WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store);
- if (mas_reuse_node(wr_mas, b_node, end))
- goto reuse_node;
-
- mas_node_count(wr_mas->mas, 1);
- if (mas_is_err(wr_mas->mas))
- return 0;
+ if (type == wr_rebalance)
+ return mas_rebalance(wr_mas->mas, b_node);
- node = mas_pop_node(wr_mas->mas);
- node->parent = mas_mn(wr_mas->mas)->parent;
- wr_mas->mas->node = mt_mk_node(node, b_type);
- mab_mas_cp(b_node, 0, b_end, wr_mas->mas, false);
- mas_replace(wr_mas->mas, false);
-reuse_node:
- mas_update_gap(wr_mas->mas);
- return 1;
+ return mas_split(wr_mas->mas, b_node);
}
/*
@@ -3622,7 +3077,7 @@ reuse_node:
* @mas: The maple state
* @entry: The entry to store into the tree
*/
-static inline int mas_root_expand(struct ma_state *mas, void *entry)
+static inline void mas_root_expand(struct ma_state *mas, void *entry)
{
void *contents = mas_root_locked(mas);
enum maple_type type = maple_leaf_64;
@@ -3631,16 +3086,12 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry)
unsigned long *pivots;
int slot = 0;
- mas_node_count(mas, 1);
- if (unlikely(mas_is_err(mas)))
- return 0;
-
node = mas_pop_node(mas);
pivots = ma_pivots(node, type);
slots = ma_slots(node, type);
- node->parent = ma_parent_ptr(
- ((unsigned long)mas->tree | MA_ROOT_PARENT));
+ node->parent = ma_parent_ptr(mas_tree_parent(mas));
mas->node = mt_mk_node(node, type);
+ mas->status = ma_active;
if (mas->index) {
if (contents) {
@@ -3655,35 +3106,42 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry)
mas->offset = slot;
pivots[slot] = mas->last;
if (mas->last != ULONG_MAX)
- slot++;
- mas->depth = 1;
- mas_set_height(mas);
+ pivots[++slot] = ULONG_MAX;
+ mt_set_height(mas->tree, 1);
+ ma_set_meta(node, maple_leaf_64, 0, slot);
/* swap the new root into the tree */
rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
- ma_set_meta(node, maple_leaf_64, 0, slot);
- return slot;
+ return;
}
+/*
+ * mas_store_root() - Storing value into root.
+ * @mas: The maple state
+ * @entry: The entry to store.
+ *
+ * There is no root node now and we are storing a value into the root - this
+ * function either assigns the pointer or expands into a node.
+ */
static inline void mas_store_root(struct ma_state *mas, void *entry)
{
- if (likely((mas->last != 0) || (mas->index != 0)))
+ if (!entry) {
+ if (!mas->index)
+ rcu_assign_pointer(mas->tree->ma_root, NULL);
+ } else if (likely((mas->last != 0) || (mas->index != 0)))
mas_root_expand(mas, entry);
else if (((unsigned long) (entry) & 3) == 2)
mas_root_expand(mas, entry);
else {
rcu_assign_pointer(mas->tree->ma_root, entry);
- mas->node = MAS_START;
+ mas->status = ma_start;
}
}
/*
* mas_is_span_wr() - Check if the write needs to be treated as a write that
* spans the node.
- * @mas: The maple state
- * @piv: The pivot value being written
- * @type: The maple node type
- * @entry: The data to write
+ * @wr_mas: The maple write state
*
* Spanning writes are writes that start in one node and end in another OR if
* the write of a %NULL will cause the node to end with a %NULL.
@@ -3692,43 +3150,31 @@ static inline void mas_store_root(struct ma_state *mas, void *entry)
*/
static bool mas_is_span_wr(struct ma_wr_state *wr_mas)
{
- unsigned long max;
+ unsigned long max = wr_mas->r_max;
unsigned long last = wr_mas->mas->last;
- unsigned long piv = wr_mas->r_max;
enum maple_type type = wr_mas->type;
void *entry = wr_mas->entry;
- /* Contained in this pivot */
- if (piv > last)
+ /* Contained in this pivot, fast path */
+ if (last < max)
return false;
- max = wr_mas->mas->max;
- if (unlikely(ma_is_leaf(type))) {
- /* Fits in the node, but may span slots. */
+ if (ma_is_leaf(type)) {
+ max = wr_mas->mas->max;
if (last < max)
return false;
+ }
- /* Writes to the end of the node but not null. */
- if ((last == max) && entry)
- return false;
-
+ if (last == max) {
/*
- * Writing ULONG_MAX is not a spanning write regardless of the
- * value being written as long as the range fits in the node.
+ * The last entry of leaf node cannot be NULL unless it is the
+ * rightmost node (writing ULONG_MAX), otherwise it spans slots.
*/
- if ((last == ULONG_MAX) && (last == max))
- return false;
- } else if (piv == last) {
- if (entry)
- return false;
-
- /* Detect spanning store wr walk */
- if (last == ULONG_MAX)
+ if (entry || last == ULONG_MAX)
return false;
}
- trace_ma_write(__func__, wr_mas->mas, piv, entry);
-
+ trace_ma_write(TP_FCT, wr_mas->mas, wr_mas->r_max, entry);
return true;
}
@@ -3769,13 +3215,23 @@ static bool mas_wr_walk(struct ma_wr_state *wr_mas)
if (ma_is_leaf(wr_mas->type))
return true;
+ if (mas->end < mt_slots[wr_mas->type] - 1)
+ wr_mas->vacant_height = mas->depth + 1;
+
+ if (ma_is_root(mas_mn(mas))) {
+ /* root needs more than 2 entries to be sufficient + 1 */
+ if (mas->end > 2)
+ wr_mas->sufficient_height = 1;
+ } else if (mas->end > mt_min_slots[wr_mas->type] + 1)
+ wr_mas->sufficient_height = mas->depth + 1;
+
mas_wr_walk_traverse(wr_mas);
}
return true;
}
-static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
+static void mas_wr_walk_index(struct ma_wr_state *wr_mas)
{
struct ma_state *mas = wr_mas->mas;
@@ -3784,11 +3240,9 @@ static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
mas->offset);
if (ma_is_leaf(wr_mas->type))
- return true;
+ return;
mas_wr_walk_traverse(wr_mas);
-
}
- return true;
}
/*
* mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
@@ -3862,43 +3316,33 @@ static inline void *mtree_lookup_walk(struct ma_state *mas)
enum maple_type type;
void __rcu **slots;
unsigned char end;
- unsigned long max;
next = mas->node;
- max = ULONG_MAX;
do {
- offset = 0;
node = mte_to_node(next);
type = mte_node_type(next);
pivots = ma_pivots(node, type);
- end = ma_data_end(node, type, pivots, max);
- if (unlikely(ma_dead_node(node)))
- goto dead_node;
-
- if (pivots[offset] >= mas->index)
- goto next;
-
+ end = mt_pivots[type];
+ offset = 0;
do {
- offset++;
- } while ((offset < end) && (pivots[offset] < mas->index));
-
- if (likely(offset > end))
- max = pivots[offset];
+ if (pivots[offset] >= mas->index)
+ break;
+ } while (++offset < end);
-next:
slots = ma_slots(node, type);
next = mt_slot(mas->tree, slots, offset);
if (unlikely(ma_dead_node(node)))
goto dead_node;
} while (!ma_is_leaf(type));
- return (void *) next;
+ return (void *)next;
dead_node:
mas_reset(mas);
return NULL;
}
+static void mte_destroy_walk(struct maple_enode *, struct maple_tree *);
/*
* mas_new_root() - Create a new root node that only contains the entry passed
* in.
@@ -3906,10 +3350,8 @@ dead_node:
* @entry: The entry to store.
*
* Only valid when the index == 0 and the last == ULONG_MAX
- *
- * Return 0 on error, 1 on success.
*/
-static inline int mas_new_root(struct ma_state *mas, void *entry)
+static inline void mas_new_root(struct ma_state *mas, void *entry)
{
struct maple_enode *root = mas_root_locked(mas);
enum maple_type type = maple_leaf_64;
@@ -3917,35 +3359,31 @@ static inline int mas_new_root(struct ma_state *mas, void *entry)
void __rcu **slots;
unsigned long *pivots;
- if (!entry && !mas->index && mas->last == ULONG_MAX) {
- mas->depth = 0;
- mas_set_height(mas);
+ WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX);
+
+ if (!entry) {
+ mt_set_height(mas->tree, 0);
rcu_assign_pointer(mas->tree->ma_root, entry);
- mas->node = MAS_START;
+ mas->status = ma_start;
goto done;
}
- mas_node_count(mas, 1);
- if (mas_is_err(mas))
- return 0;
-
node = mas_pop_node(mas);
pivots = ma_pivots(node, type);
slots = ma_slots(node, type);
- node->parent = ma_parent_ptr(
- ((unsigned long)mas->tree | MA_ROOT_PARENT));
+ node->parent = ma_parent_ptr(mas_tree_parent(mas));
mas->node = mt_mk_node(node, type);
+ mas->status = ma_active;
rcu_assign_pointer(slots[0], entry);
pivots[0] = mas->last;
- mas->depth = 1;
- mas_set_height(mas);
+ mt_set_height(mas->tree, 1);
rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
done:
if (xa_is_node(root))
mte_destroy_walk(root, mas->tree);
- return 1;
+ return;
}
/*
* mas_wr_spanning_store() - Create a subtree with the store operation completed
@@ -3953,10 +3391,8 @@ done:
* Note that mas is expected to point to the node which caused the store to
* span.
* @wr_mas: The maple write state
- *
- * Return: 0 on error, positive on success.
*/
-static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
+static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas)
{
struct maple_subtree_state mast;
struct maple_big_node b_node;
@@ -3966,7 +3402,6 @@ static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
/* Left and Right side of spanning store */
MA_STATE(l_mas, NULL, 0, 0);
MA_STATE(r_mas, NULL, 0, 0);
-
MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry);
MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry);
@@ -3983,7 +3418,7 @@ static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
* of data may happen.
*/
mas = wr_mas->mas;
- trace_ma_op(__func__, mas);
+ trace_ma_op(TP_FCT, mas);
if (unlikely(!mas->index && mas->last == ULONG_MAX))
return mas_new_root(mas, wr_mas->entry);
@@ -3992,9 +3427,6 @@ static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
* entries per level plus a new root.
*/
height = mas_mt_height(mas);
- mas_node_count(mas, 1 + height * 3);
- if (mas_is_err(mas))
- return 0;
/*
* Set up right side. Need to get to the next offset after the spanning
@@ -4029,10 +3461,10 @@ static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
memset(&b_node, 0, sizeof(struct maple_big_node));
/* Copy l_mas and store the value in b_node. */
- mas_store_b_node(&l_wr_mas, &b_node, l_wr_mas.node_end);
- /* Copy r_mas into b_node. */
- if (r_mas.offset <= r_wr_mas.node_end)
- mas_mab_cp(&r_mas, r_mas.offset, r_wr_mas.node_end,
+ mas_store_b_node(&l_wr_mas, &b_node, l_mas.end);
+ /* Copy r_mas into b_node if there is anything to copy. */
+ if (r_mas.max > r_mas.last)
+ mas_mab_cp(&r_mas, r_mas.offset, r_mas.end,
&b_node, b_node.b_end + 1);
else
b_node.b_end++;
@@ -4052,61 +3484,24 @@ static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
* @wr_mas: The maple write state
*
* Attempts to reuse the node, but may allocate.
- *
- * Return: True if stored, false otherwise
*/
-static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)
+static inline void mas_wr_node_store(struct ma_wr_state *wr_mas,
+ unsigned char new_end)
{
struct ma_state *mas = wr_mas->mas;
void __rcu **dst_slots;
unsigned long *dst_pivots;
- unsigned char dst_offset;
- unsigned char new_end = wr_mas->node_end;
- unsigned char offset;
- unsigned char node_slots = mt_slots[wr_mas->type];
+ unsigned char dst_offset, offset_end = wr_mas->offset_end;
struct maple_node reuse, *newnode;
- unsigned char copy_size, max_piv = mt_pivots[wr_mas->type];
+ unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type];
bool in_rcu = mt_in_rcu(mas->tree);
+ unsigned char height = mas_mt_height(mas);
- offset = mas->offset;
- if (mas->last == wr_mas->r_max) {
- /* runs right to the end of the node */
- if (mas->last == mas->max)
- new_end = offset;
- /* don't copy this offset */
- wr_mas->offset_end++;
- } else if (mas->last < wr_mas->r_max) {
- /* new range ends in this range */
- if (unlikely(wr_mas->r_max == ULONG_MAX))
- mas_bulk_rebalance(mas, wr_mas->node_end, wr_mas->type);
-
- new_end++;
- } else {
- if (wr_mas->end_piv == mas->last)
- wr_mas->offset_end++;
-
- new_end -= wr_mas->offset_end - offset - 1;
- }
-
- /* new range starts within a range */
- if (wr_mas->r_min < mas->index)
- new_end++;
-
- /* Not enough room */
- if (new_end >= node_slots)
- return false;
-
- /* Not enough data. */
- if (!mte_is_root(mas->node) && (new_end <= mt_min_slots[wr_mas->type]) &&
- !(mas->mas_flags & MA_STATE_BULK))
- return false;
+ if (mas->last == wr_mas->end_piv)
+ offset_end++; /* don't copy this offset */
/* set up node. */
if (in_rcu) {
- mas_node_count(mas, 1);
- if (mas_is_err(mas))
- return false;
-
newnode = mas_pop_node(mas);
} else {
memset(&reuse, 0, sizeof(struct maple_node));
@@ -4117,144 +3512,122 @@ static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)
dst_pivots = ma_pivots(newnode, wr_mas->type);
dst_slots = ma_slots(newnode, wr_mas->type);
/* Copy from start to insert point */
- memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * (offset + 1));
- memcpy(dst_slots, wr_mas->slots, sizeof(void *) * (offset + 1));
- dst_offset = offset;
+ memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset);
+ memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset);
/* Handle insert of new range starting after old range */
if (wr_mas->r_min < mas->index) {
- mas->offset++;
- rcu_assign_pointer(dst_slots[dst_offset], wr_mas->content);
- dst_pivots[dst_offset++] = mas->index - 1;
+ rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content);
+ dst_pivots[mas->offset++] = mas->index - 1;
}
/* Store the new entry and range end. */
- if (dst_offset < max_piv)
- dst_pivots[dst_offset] = mas->last;
- mas->offset = dst_offset;
- rcu_assign_pointer(dst_slots[dst_offset], wr_mas->entry);
+ if (mas->offset < node_pivots)
+ dst_pivots[mas->offset] = mas->last;
+ rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry);
/*
* this range wrote to the end of the node or it overwrote the rest of
* the data
*/
- if (wr_mas->offset_end > wr_mas->node_end || mas->last >= mas->max) {
- new_end = dst_offset;
+ if (offset_end > mas->end)
goto done;
- }
- dst_offset++;
+ dst_offset = mas->offset + 1;
/* Copy to the end of node if necessary. */
- copy_size = wr_mas->node_end - wr_mas->offset_end + 1;
- memcpy(dst_slots + dst_offset, wr_mas->slots + wr_mas->offset_end,
+ copy_size = mas->end - offset_end + 1;
+ memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end,
sizeof(void *) * copy_size);
- if (dst_offset < max_piv) {
- if (copy_size > max_piv - dst_offset)
- copy_size = max_piv - dst_offset;
+ memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end,
+ sizeof(unsigned long) * (copy_size - 1));
- memcpy(dst_pivots + dst_offset,
- wr_mas->pivots + wr_mas->offset_end,
- sizeof(unsigned long) * copy_size);
- }
-
- if ((wr_mas->node_end == node_slots - 1) && (new_end < node_slots - 1))
+ if (new_end < node_pivots)
dst_pivots[new_end] = mas->max;
done:
- mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end);
+ mas_leaf_set_meta(newnode, maple_leaf_64, new_end);
if (in_rcu) {
+ struct maple_enode *old_enode = mas->node;
+
mas->node = mt_mk_node(newnode, wr_mas->type);
- mas_replace(mas, false);
+ mas_replace_node(mas, old_enode, height);
} else {
memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
}
- trace_ma_write(__func__, mas, 0, wr_mas->entry);
+ trace_ma_write(TP_FCT, mas, 0, wr_mas->entry);
mas_update_gap(mas);
- return true;
+ mas->end = new_end;
+ return;
}
/*
* mas_wr_slot_store: Attempt to store a value in a slot.
* @wr_mas: the maple write state
- *
- * Return: True if stored, false otherwise
*/
-static inline bool mas_wr_slot_store(struct ma_wr_state *wr_mas)
+static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas)
{
struct ma_state *mas = wr_mas->mas;
- unsigned long lmax; /* Logical max. */
unsigned char offset = mas->offset;
+ void __rcu **slots = wr_mas->slots;
+ bool gap = false;
- if ((wr_mas->r_max > mas->last) && ((wr_mas->r_min != mas->index) ||
- (offset != wr_mas->node_end)))
- return false;
+ gap |= !mt_slot_locked(mas->tree, slots, offset);
+ gap |= !mt_slot_locked(mas->tree, slots, offset + 1);
- if (offset == wr_mas->node_end - 1)
- lmax = mas->max;
- else
- lmax = wr_mas->pivots[offset + 1];
-
- /* going to overwrite too many slots. */
- if (lmax < mas->last)
- return false;
-
- if (wr_mas->r_min == mas->index) {
- /* overwriting two or more ranges with one. */
- if (lmax == mas->last)
- return false;
-
- /* Overwriting all of offset and a portion of offset + 1. */
- rcu_assign_pointer(wr_mas->slots[offset], wr_mas->entry);
- wr_mas->pivots[offset] = mas->last;
- goto done;
- }
-
- /* Doesn't end on the next range end. */
- if (lmax != mas->last)
- return false;
-
- /* Overwriting a portion of offset and all of offset + 1 */
- if ((offset + 1 < mt_pivots[wr_mas->type]) &&
- (wr_mas->entry || wr_mas->pivots[offset + 1]))
+ if (wr_mas->offset_end - offset == 1) {
+ if (mas->index == wr_mas->r_min) {
+ /* Overwriting the range and a part of the next one */
+ rcu_assign_pointer(slots[offset], wr_mas->entry);
+ wr_mas->pivots[offset] = mas->last;
+ } else {
+ /* Overwriting a part of the range and the next one */
+ rcu_assign_pointer(slots[offset + 1], wr_mas->entry);
+ wr_mas->pivots[offset] = mas->index - 1;
+ mas->offset++; /* Keep mas accurate. */
+ }
+ } else {
+ WARN_ON_ONCE(mt_in_rcu(mas->tree));
+ /*
+ * Expand the range, only partially overwriting the previous and
+ * next ranges
+ */
+ gap |= !mt_slot_locked(mas->tree, slots, offset + 2);
+ rcu_assign_pointer(slots[offset + 1], wr_mas->entry);
+ wr_mas->pivots[offset] = mas->index - 1;
wr_mas->pivots[offset + 1] = mas->last;
+ mas->offset++; /* Keep mas accurate. */
+ }
- rcu_assign_pointer(wr_mas->slots[offset + 1], wr_mas->entry);
- wr_mas->pivots[offset] = mas->index - 1;
- mas->offset++; /* Keep mas accurate. */
-
-done:
- trace_ma_write(__func__, mas, 0, wr_mas->entry);
- mas_update_gap(mas);
- return true;
-}
-
-static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas)
-{
- while ((wr_mas->mas->last > wr_mas->end_piv) &&
- (wr_mas->offset_end < wr_mas->node_end))
- wr_mas->end_piv = wr_mas->pivots[++wr_mas->offset_end];
+ trace_ma_write(TP_FCT, mas, 0, wr_mas->entry);
+ /*
+ * Only update gap when the new entry is empty or there is an empty
+ * entry in the original two ranges.
+ */
+ if (!wr_mas->entry || gap)
+ mas_update_gap(mas);
- if (wr_mas->mas->last > wr_mas->end_piv)
- wr_mas->end_piv = wr_mas->mas->max;
+ return;
}
static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas)
{
struct ma_state *mas = wr_mas->mas;
- if (mas->last < wr_mas->end_piv && !wr_mas->slots[wr_mas->offset_end])
+ if (!wr_mas->slots[wr_mas->offset_end]) {
+ /* If this one is null, the next and prev are not */
mas->last = wr_mas->end_piv;
-
- /* Check next slot(s) if we are overwriting the end */
- if ((mas->last == wr_mas->end_piv) &&
- (wr_mas->node_end != wr_mas->offset_end) &&
- !wr_mas->slots[wr_mas->offset_end + 1]) {
- wr_mas->offset_end++;
- if (wr_mas->offset_end == wr_mas->node_end)
- mas->last = mas->max;
- else
- mas->last = wr_mas->pivots[wr_mas->offset_end];
- wr_mas->end_piv = mas->last;
+ } else {
+ /* Check next slot(s) if we are overwriting the end */
+ if ((mas->last == wr_mas->end_piv) &&
+ (mas->end != wr_mas->offset_end) &&
+ !wr_mas->slots[wr_mas->offset_end + 1]) {
+ wr_mas->offset_end++;
+ if (wr_mas->offset_end == mas->end)
+ mas->last = mas->max;
+ else
+ mas->last = wr_mas->pivots[wr_mas->offset_end];
+ wr_mas->end_piv = mas->last;
+ }
}
if (!wr_mas->content) {
@@ -4272,41 +3645,82 @@ static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas)
}
}
-static inline bool mas_wr_append(struct ma_wr_state *wr_mas)
+static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas)
{
- unsigned char end = wr_mas->node_end;
- unsigned char new_end = end + 1;
- struct ma_state *mas = wr_mas->mas;
- unsigned char node_pivots = mt_pivots[wr_mas->type];
+ while ((wr_mas->offset_end < wr_mas->mas->end) &&
+ (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end]))
+ wr_mas->offset_end++;
- if ((mas->index != wr_mas->r_min) && (mas->last == wr_mas->r_max)) {
- if (new_end < node_pivots)
- wr_mas->pivots[new_end] = wr_mas->pivots[end];
+ if (wr_mas->offset_end < wr_mas->mas->end)
+ wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end];
+ else
+ wr_mas->end_piv = wr_mas->mas->max;
+}
- if (new_end < node_pivots)
- ma_set_meta(wr_mas->node, maple_leaf_64, 0, new_end);
+static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
+{
+ struct ma_state *mas = wr_mas->mas;
+ unsigned char new_end = mas->end + 2;
- rcu_assign_pointer(wr_mas->slots[new_end], wr_mas->entry);
- mas->offset = new_end;
- wr_mas->pivots[end] = mas->index - 1;
+ new_end -= wr_mas->offset_end - mas->offset;
+ if (wr_mas->r_min == mas->index)
+ new_end--;
- return true;
- }
+ if (wr_mas->end_piv == mas->last)
+ new_end--;
- if ((mas->index == wr_mas->r_min) && (mas->last < wr_mas->r_max)) {
- if (new_end < node_pivots)
- wr_mas->pivots[new_end] = wr_mas->pivots[end];
+ return new_end;
+}
+
+/*
+ * mas_wr_append: Attempt to append
+ * @wr_mas: the maple write state
+ * @new_end: The end of the node after the modification
+ *
+ * This is currently unsafe in rcu mode since the end of the node may be cached
+ * by readers while the node contents may be updated which could result in
+ * inaccurate information.
+ */
+static inline void mas_wr_append(struct ma_wr_state *wr_mas,
+ unsigned char new_end)
+{
+ struct ma_state *mas = wr_mas->mas;
+ void __rcu **slots;
+ unsigned char end = mas->end;
- rcu_assign_pointer(wr_mas->slots[new_end], wr_mas->content);
- if (new_end < node_pivots)
- ma_set_meta(wr_mas->node, maple_leaf_64, 0, new_end);
+ if (new_end < mt_pivots[wr_mas->type]) {
+ wr_mas->pivots[new_end] = wr_mas->pivots[end];
+ ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end);
+ }
- wr_mas->pivots[end] = mas->last;
- rcu_assign_pointer(wr_mas->slots[end], wr_mas->entry);
- return true;
+ slots = wr_mas->slots;
+ if (new_end == end + 1) {
+ if (mas->last == wr_mas->r_max) {
+ /* Append to end of range */
+ rcu_assign_pointer(slots[new_end], wr_mas->entry);
+ wr_mas->pivots[end] = mas->index - 1;
+ mas->offset = new_end;
+ } else {
+ /* Append to start of range */
+ rcu_assign_pointer(slots[new_end], wr_mas->content);
+ wr_mas->pivots[end] = mas->last;
+ rcu_assign_pointer(slots[end], wr_mas->entry);
+ }
+ } else {
+ /* Append to the range without touching any boundaries. */
+ rcu_assign_pointer(slots[new_end], wr_mas->content);
+ wr_mas->pivots[end + 1] = mas->last;
+ rcu_assign_pointer(slots[end + 1], wr_mas->entry);
+ wr_mas->pivots[end] = mas->index - 1;
+ mas->offset = end + 1;
}
- return false;
+ if (!wr_mas->content || !wr_mas->entry)
+ mas_update_gap(mas);
+
+ mas->end = new_end;
+ trace_ma_write(TP_FCT, mas, new_end, wr_mas->entry);
+ return;
}
/*
@@ -4319,92 +3733,223 @@ static void mas_wr_bnode(struct ma_wr_state *wr_mas)
{
struct maple_big_node b_node;
- trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
+ trace_ma_write(TP_FCT, wr_mas->mas, 0, wr_mas->entry);
memset(&b_node, 0, sizeof(struct maple_big_node));
mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
- mas_commit_b_node(wr_mas, &b_node, wr_mas->node_end);
+ mas_commit_b_node(wr_mas, &b_node);
}
-static inline void mas_wr_modify(struct ma_wr_state *wr_mas)
+/*
+ * mas_wr_store_entry() - Internal call to store a value
+ * @wr_mas: The maple write state
+ */
+static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas)
{
- unsigned char node_slots;
- unsigned char node_size;
struct ma_state *mas = wr_mas->mas;
+ unsigned char new_end = mas_wr_new_end(wr_mas);
- /* Direct replacement */
- if (wr_mas->r_min == mas->index && wr_mas->r_max == mas->last) {
+ switch (mas->store_type) {
+ case wr_exact_fit:
rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry);
if (!!wr_mas->entry ^ !!wr_mas->content)
mas_update_gap(mas);
- return;
+ break;
+ case wr_append:
+ mas_wr_append(wr_mas, new_end);
+ break;
+ case wr_slot_store:
+ mas_wr_slot_store(wr_mas);
+ break;
+ case wr_node_store:
+ mas_wr_node_store(wr_mas, new_end);
+ break;
+ case wr_spanning_store:
+ mas_wr_spanning_store(wr_mas);
+ break;
+ case wr_split_store:
+ case wr_rebalance:
+ mas_wr_bnode(wr_mas);
+ break;
+ case wr_new_root:
+ mas_new_root(mas, wr_mas->entry);
+ break;
+ case wr_store_root:
+ mas_store_root(mas, wr_mas->entry);
+ break;
+ case wr_invalid:
+ MT_BUG_ON(mas->tree, 1);
}
- /* Attempt to append */
- node_slots = mt_slots[wr_mas->type];
- node_size = wr_mas->node_end - wr_mas->offset_end + mas->offset + 2;
- if (mas->max == ULONG_MAX)
- node_size++;
+ return;
+}
+
+static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas)
+{
+ struct ma_state *mas = wr_mas->mas;
- /* slot and node store will not fit, go to the slow path */
- if (unlikely(node_size >= node_slots))
- goto slow_path;
+ if (!mas_is_active(mas)) {
+ if (mas_is_start(mas))
+ goto set_content;
- if (wr_mas->entry && (wr_mas->node_end < node_slots - 1) &&
- (mas->offset == wr_mas->node_end) && mas_wr_append(wr_mas)) {
- if (!wr_mas->content || !wr_mas->entry)
- mas_update_gap(mas);
- return;
+ if (unlikely(mas_is_paused(mas)))
+ goto reset;
+
+ if (unlikely(mas_is_none(mas)))
+ goto reset;
+
+ if (unlikely(mas_is_overflow(mas)))
+ goto reset;
+
+ if (unlikely(mas_is_underflow(mas)))
+ goto reset;
}
- if ((wr_mas->offset_end - mas->offset <= 1) && mas_wr_slot_store(wr_mas))
- return;
- else if (mas_wr_node_store(wr_mas))
- return;
+ /*
+ * A less strict version of mas_is_span_wr() where we allow spanning
+ * writes within this node. This is to stop partial walks in
+ * mas_prealloc() from being reset.
+ */
+ if (mas->last > mas->max)
+ goto reset;
- if (mas_is_err(mas))
- return;
+ if (wr_mas->entry)
+ goto set_content;
+
+ if (mte_is_leaf(mas->node) && mas->last == mas->max)
+ goto reset;
+
+ goto set_content;
+
+reset:
+ mas_reset(mas);
+set_content:
+ wr_mas->content = mas_start(mas);
+}
+
+/**
+ * mas_prealloc_calc() - Calculate number of nodes needed for a
+ * given store oepration
+ * @wr_mas: The maple write state
+ * @entry: The entry to store into the tree
+ *
+ * Return: Number of nodes required for preallocation.
+ */
+static inline void mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry)
+{
+ struct ma_state *mas = wr_mas->mas;
+ unsigned char height = mas_mt_height(mas);
+ int ret = height * 3 + 1;
+ unsigned char delta = height - wr_mas->vacant_height;
+
+ switch (mas->store_type) {
+ case wr_exact_fit:
+ case wr_append:
+ case wr_slot_store:
+ ret = 0;
+ break;
+ case wr_spanning_store:
+ if (wr_mas->sufficient_height < wr_mas->vacant_height)
+ ret = (height - wr_mas->sufficient_height) * 3 + 1;
+ else
+ ret = delta * 3 + 1;
+ break;
+ case wr_split_store:
+ ret = delta * 2 + 1;
+ break;
+ case wr_rebalance:
+ if (wr_mas->sufficient_height < wr_mas->vacant_height)
+ ret = (height - wr_mas->sufficient_height) * 2 + 1;
+ else
+ ret = delta * 2 + 1;
+ break;
+ case wr_node_store:
+ ret = mt_in_rcu(mas->tree) ? 1 : 0;
+ break;
+ case wr_new_root:
+ ret = 1;
+ break;
+ case wr_store_root:
+ if (likely((mas->last != 0) || (mas->index != 0)))
+ ret = 1;
+ else if (((unsigned long) (entry) & 3) == 2)
+ ret = 1;
+ else
+ ret = 0;
+ break;
+ case wr_invalid:
+ WARN_ON_ONCE(1);
+ }
-slow_path:
- mas_wr_bnode(wr_mas);
+ mas->node_request = ret;
}
/*
- * mas_wr_store_entry() - Internal call to store a value
- * @mas: The maple state
- * @entry: The entry to store.
+ * mas_wr_store_type() - Determine the store type for a given
+ * store operation.
+ * @wr_mas: The maple write state
*
- * Return: The contents that was stored at the index.
+ * Return: the type of store needed for the operation
*/
-static inline void *mas_wr_store_entry(struct ma_wr_state *wr_mas)
+static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas)
{
struct ma_state *mas = wr_mas->mas;
+ unsigned char new_end;
- wr_mas->content = mas_start(mas);
- if (mas_is_none(mas) || mas_is_ptr(mas)) {
- mas_store_root(mas, wr_mas->entry);
- return wr_mas->content;
- }
+ if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
+ return wr_store_root;
- if (unlikely(!mas_wr_walk(wr_mas))) {
- mas_wr_spanning_store(wr_mas);
- return wr_mas->content;
- }
+ if (unlikely(!mas_wr_walk(wr_mas)))
+ return wr_spanning_store;
/* At this point, we are at the leaf node that needs to be altered. */
- wr_mas->end_piv = wr_mas->r_max;
mas_wr_end_piv(wr_mas);
-
if (!wr_mas->entry)
mas_wr_extend_null(wr_mas);
- /* New root for a single pointer */
- if (unlikely(!mas->index && mas->last == ULONG_MAX)) {
- mas_new_root(mas, wr_mas->entry);
- return wr_mas->content;
+ if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last))
+ return wr_exact_fit;
+
+ if (unlikely(!mas->index && mas->last == ULONG_MAX))
+ return wr_new_root;
+
+ new_end = mas_wr_new_end(wr_mas);
+ /* Potential spanning rebalance collapsing a node */
+ if (new_end < mt_min_slots[wr_mas->type]) {
+ if (!mte_is_root(mas->node))
+ return wr_rebalance;
+ return wr_node_store;
}
- mas_wr_modify(wr_mas);
- return wr_mas->content;
+ if (new_end >= mt_slots[wr_mas->type])
+ return wr_split_store;
+
+ if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end))
+ return wr_append;
+
+ if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) ||
+ (wr_mas->offset_end - mas->offset == 1)))
+ return wr_slot_store;
+
+ return wr_node_store;
+}
+
+/**
+ * mas_wr_preallocate() - Preallocate enough nodes for a store operation
+ * @wr_mas: The maple write state
+ * @entry: The entry that will be stored
+ *
+ */
+static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry)
+{
+ struct ma_state *mas = wr_mas->mas;
+
+ mas_wr_prealloc_setup(wr_mas);
+ mas->store_type = mas_wr_store_type(wr_mas);
+ mas_prealloc_calc(wr_mas, entry);
+ if (!mas->node_request)
+ return;
+
+ mas_alloc_nodes(mas, GFP_NOWAIT);
}
/**
@@ -4437,26 +3982,24 @@ static inline void *mas_insert(struct ma_state *mas, void *entry)
if (wr_mas.content)
goto exists;
- if (mas_is_none(mas) || mas_is_ptr(mas)) {
- mas_store_root(mas, entry);
+ mas_wr_preallocate(&wr_mas, entry);
+ if (mas_is_err(mas))
return NULL;
- }
/* spanning writes always overwrite something */
- if (!mas_wr_walk(&wr_mas))
+ if (mas->store_type == wr_spanning_store)
goto exists;
/* At this point, we are at the leaf node that needs to be altered. */
- wr_mas.offset_end = mas->offset;
- wr_mas.end_piv = wr_mas.r_max;
-
- if (wr_mas.content || (mas->last > wr_mas.r_max))
- goto exists;
+ if (mas->store_type != wr_new_root && mas->store_type != wr_store_root) {
+ wr_mas.offset_end = mas->offset;
+ wr_mas.end_piv = wr_mas.r_max;
- if (!entry)
- return NULL;
+ if (wr_mas.content || (mas->last > wr_mas.r_max))
+ goto exists;
+ }
- mas_wr_modify(&wr_mas);
+ mas_wr_store_entry(&wr_mas);
return wr_mas.content;
exists:
@@ -4465,30 +4008,107 @@ exists:
}
+/**
+ * mas_alloc_cyclic() - Internal call to find somewhere to store an entry
+ * @mas: The maple state.
+ * @startp: Pointer to ID.
+ * @range_lo: Lower bound of range to search.
+ * @range_hi: Upper bound of range to search.
+ * @entry: The entry to store.
+ * @next: Pointer to next ID to allocate.
+ * @gfp: The GFP_FLAGS to use for allocations.
+ *
+ * Return: 0 if the allocation succeeded without wrapping, 1 if the
+ * allocation succeeded after wrapping, or -EBUSY if there are no
+ * free entries.
+ */
+int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
+ void *entry, unsigned long range_lo, unsigned long range_hi,
+ unsigned long *next, gfp_t gfp)
+{
+ unsigned long min = range_lo;
+ int ret = 0;
+
+ range_lo = max(min, *next);
+ ret = mas_empty_area(mas, range_lo, range_hi, 1);
+ if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) {
+ mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED;
+ ret = 1;
+ }
+ if (ret < 0 && range_lo > min) {
+ mas_reset(mas);
+ ret = mas_empty_area(mas, min, range_hi, 1);
+ if (ret == 0)
+ ret = 1;
+ }
+ if (ret < 0)
+ return ret;
+
+ do {
+ mas_insert(mas, entry);
+ } while (mas_nomem(mas, gfp));
+ if (mas_is_err(mas))
+ return xa_err(mas->node);
+
+ *startp = mas->index;
+ *next = *startp + 1;
+ if (*next == 0)
+ mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED;
+
+ mas_destroy(mas);
+ return ret;
+}
+EXPORT_SYMBOL(mas_alloc_cyclic);
+
+static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index)
+{
+retry:
+ mas_set(mas, index);
+ mas_state_walk(mas);
+ if (mas_is_start(mas))
+ goto retry;
+}
+
+static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas,
+ struct maple_node *node, const unsigned long index)
+{
+ if (unlikely(ma_dead_node(node))) {
+ mas_rewalk(mas, index);
+ return true;
+ }
+ return false;
+}
+
/*
* mas_prev_node() - Find the prev non-null entry at the same level in the
- * tree. The prev value will be mas->node[mas->offset] or MAS_NONE.
+ * tree. The prev value will be mas->node[mas->offset] or the status will be
+ * ma_none.
* @mas: The maple state
* @min: The lower limit to search
*
- * The prev node value will be mas->node[mas->offset] or MAS_NONE.
+ * The prev node value will be mas->node[mas->offset] or the status will be
+ * ma_none.
* Return: 1 if the node is dead, 0 otherwise.
*/
-static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
+static int mas_prev_node(struct ma_state *mas, unsigned long min)
{
enum maple_type mt;
int offset, level;
void __rcu **slots;
struct maple_node *node;
- struct maple_enode *enode;
unsigned long *pivots;
+ unsigned long max;
- if (mas_is_none(mas))
- return 0;
+ node = mas_mn(mas);
+ if (!mas->min)
+ goto no_entry;
+
+ max = mas->min - 1;
+ if (max < min)
+ goto no_entry;
level = 0;
do {
- node = mas_mn(mas);
if (ma_is_root(node))
goto no_entry;
@@ -4497,378 +4117,311 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
return 1;
offset = mas->offset;
level++;
+ node = mas_mn(mas);
} while (!offset);
offset--;
mt = mte_node_type(mas->node);
- node = mas_mn(mas);
- slots = ma_slots(node, mt);
- pivots = ma_pivots(node, mt);
- mas->max = pivots[offset];
- if (offset)
- mas->min = pivots[offset - 1] + 1;
- if (unlikely(ma_dead_node(node)))
- return 1;
-
- if (mas->max < min)
- goto no_entry_min;
-
while (level > 1) {
level--;
- enode = mas_slot(mas, slots, offset);
+ slots = ma_slots(node, mt);
+ mas->node = mas_slot(mas, slots, offset);
if (unlikely(ma_dead_node(node)))
return 1;
- mas->node = enode;
mt = mte_node_type(mas->node);
node = mas_mn(mas);
- slots = ma_slots(node, mt);
pivots = ma_pivots(node, mt);
- offset = ma_data_end(node, mt, pivots, mas->max);
- if (offset)
- mas->min = pivots[offset - 1] + 1;
-
- if (offset < mt_pivots[mt])
- mas->max = pivots[offset];
-
- if (mas->max < min)
- goto no_entry;
+ offset = ma_data_end(node, mt, pivots, max);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
}
+ slots = ma_slots(node, mt);
mas->node = mas_slot(mas, slots, offset);
+ pivots = ma_pivots(node, mt);
if (unlikely(ma_dead_node(node)))
return 1;
+ if (likely(offset))
+ mas->min = pivots[offset - 1] + 1;
+ mas->max = max;
mas->offset = mas_data_end(mas);
if (unlikely(mte_dead_node(mas->node)))
return 1;
+ mas->end = mas->offset;
return 0;
-no_entry_min:
- mas->offset = offset;
- if (offset)
- mas->min = pivots[offset - 1] + 1;
no_entry:
if (unlikely(ma_dead_node(node)))
return 1;
- mas->node = MAS_NONE;
+ mas->status = ma_underflow;
return 0;
}
/*
+ * mas_prev_slot() - Get the entry in the previous slot
+ *
+ * @mas: The maple state
+ * @min: The minimum starting range
+ * @empty: Can be empty
+ *
+ * Return: The entry in the previous slot which is possibly NULL
+ */
+static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty)
+{
+ void *entry;
+ void __rcu **slots;
+ unsigned long pivot;
+ enum maple_type type;
+ unsigned long *pivots;
+ struct maple_node *node;
+ unsigned long save_point = mas->index;
+
+retry:
+ node = mas_mn(mas);
+ type = mte_node_type(mas->node);
+ pivots = ma_pivots(node, type);
+ if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
+ goto retry;
+
+ if (mas->min <= min) {
+ pivot = mas_safe_min(mas, pivots, mas->offset);
+
+ if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
+ goto retry;
+
+ if (pivot <= min)
+ goto underflow;
+ }
+
+again:
+ if (likely(mas->offset)) {
+ mas->offset--;
+ mas->last = mas->index - 1;
+ mas->index = mas_safe_min(mas, pivots, mas->offset);
+ } else {
+ if (mas->index <= min)
+ goto underflow;
+
+ if (mas_prev_node(mas, min)) {
+ mas_rewalk(mas, save_point);
+ goto retry;
+ }
+
+ if (WARN_ON_ONCE(mas_is_underflow(mas)))
+ return NULL;
+
+ mas->last = mas->max;
+ node = mas_mn(mas);
+ type = mte_node_type(mas->node);
+ pivots = ma_pivots(node, type);
+ mas->index = pivots[mas->offset - 1] + 1;
+ }
+
+ slots = ma_slots(node, type);
+ entry = mas_slot(mas, slots, mas->offset);
+ if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
+ goto retry;
+
+ if (likely(entry))
+ return entry;
+
+ if (!empty) {
+ if (mas->index <= min)
+ goto underflow;
+
+ goto again;
+ }
+
+ return entry;
+
+underflow:
+ mas->status = ma_underflow;
+ return NULL;
+}
+
+/*
* mas_next_node() - Get the next node at the same level in the tree.
* @mas: The maple state
+ * @node: The maple node
* @max: The maximum pivot value to check.
*
- * The next value will be mas->node[mas->offset] or MAS_NONE.
+ * The next value will be mas->node[mas->offset] or the status will have
+ * overflowed.
* Return: 1 on dead node, 0 otherwise.
*/
-static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
- unsigned long max)
+static int mas_next_node(struct ma_state *mas, struct maple_node *node,
+ unsigned long max)
{
- unsigned long min, pivot;
+ unsigned long min;
unsigned long *pivots;
struct maple_enode *enode;
+ struct maple_node *tmp;
int level = 0;
- unsigned char offset;
+ unsigned char node_end;
enum maple_type mt;
void __rcu **slots;
if (mas->max >= max)
- goto no_entry;
+ goto overflow;
+ min = mas->max + 1;
level = 0;
do {
if (ma_is_root(node))
- goto no_entry;
-
- min = mas->max + 1;
- if (min > max)
- goto no_entry;
+ goto overflow;
+ /* Walk up. */
if (unlikely(mas_ascend(mas)))
return 1;
- offset = mas->offset;
level++;
node = mas_mn(mas);
mt = mte_node_type(mas->node);
pivots = ma_pivots(node, mt);
- } while (unlikely(offset == ma_data_end(node, mt, pivots, mas->max)));
-
- slots = ma_slots(node, mt);
- pivot = mas_safe_pivot(mas, pivots, ++offset, mt);
- while (unlikely(level > 1)) {
- /* Descend, if necessary */
- enode = mas_slot(mas, slots, offset);
+ node_end = ma_data_end(node, mt, pivots, mas->max);
if (unlikely(ma_dead_node(node)))
return 1;
- mas->node = enode;
+ } while (unlikely(mas->offset == node_end));
+
+ slots = ma_slots(node, mt);
+ mas->offset++;
+ enode = mas_slot(mas, slots, mas->offset);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
+
+ if (level > 1)
+ mas->offset = 0;
+
+ while (unlikely(level > 1)) {
level--;
+ mas->node = enode;
node = mas_mn(mas);
mt = mte_node_type(mas->node);
slots = ma_slots(node, mt);
- pivots = ma_pivots(node, mt);
- offset = 0;
- pivot = pivots[0];
+ enode = mas_slot(mas, slots, 0);
+ if (unlikely(ma_dead_node(node)))
+ return 1;
}
- enode = mas_slot(mas, slots, offset);
+ if (!mas->offset)
+ pivots = ma_pivots(node, mt);
+
+ mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt);
+ tmp = mte_to_node(enode);
+ mt = mte_node_type(enode);
+ pivots = ma_pivots(tmp, mt);
+ mas->end = ma_data_end(tmp, mt, pivots, mas->max);
if (unlikely(ma_dead_node(node)))
return 1;
mas->node = enode;
mas->min = min;
- mas->max = pivot;
return 0;
-no_entry:
+overflow:
if (unlikely(ma_dead_node(node)))
return 1;
- mas->node = MAS_NONE;
+ mas->status = ma_overflow;
return 0;
}
/*
- * mas_next_nentry() - Get the next node entry
- * @mas: The maple state
- * @max: The maximum value to check
- * @*range_start: Pointer to store the start of the range.
+ * mas_next_slot() - Get the entry in the next slot
*
- * Sets @mas->offset to the offset of the next node entry, @mas->last to the
- * pivot of the entry.
+ * @mas: The maple state
+ * @max: The maximum starting range
+ * @empty: Can be empty
*
- * Return: The next entry, %NULL otherwise
+ * Return: The entry in the next slot which is possibly NULL
*/
-static inline void *mas_next_nentry(struct ma_state *mas,
- struct maple_node *node, unsigned long max, enum maple_type type)
+static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty)
{
- unsigned char count;
- unsigned long pivot;
- unsigned long *pivots;
void __rcu **slots;
+ unsigned long *pivots;
+ unsigned long pivot;
+ enum maple_type type;
+ struct maple_node *node;
+ unsigned long save_point = mas->last;
void *entry;
- if (mas->last == mas->max) {
- mas->index = mas->max;
- return NULL;
- }
-
+retry:
+ node = mas_mn(mas);
+ type = mte_node_type(mas->node);
pivots = ma_pivots(node, type);
- slots = ma_slots(node, type);
- mas->index = mas_safe_min(mas, pivots, mas->offset);
- if (ma_dead_node(node))
- return NULL;
-
- if (mas->index > max)
- return NULL;
-
- count = ma_data_end(node, type, pivots, mas->max);
- if (mas->offset > count)
- return NULL;
+ if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
+ goto retry;
- while (mas->offset < count) {
- pivot = pivots[mas->offset];
- entry = mas_slot(mas, slots, mas->offset);
- if (ma_dead_node(node))
- return NULL;
+ if (mas->max >= max) {
+ if (likely(mas->offset < mas->end))
+ pivot = pivots[mas->offset];
+ else
+ pivot = mas->max;
- if (entry)
- goto found;
+ if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
+ goto retry;
- if (pivot >= max)
+ if (pivot >= max) { /* Was at the limit, next will extend beyond */
+ mas->status = ma_overflow;
return NULL;
-
- mas->index = pivot + 1;
- mas->offset++;
- }
-
- if (mas->index > mas->max) {
- mas->index = mas->last;
- return NULL;
+ }
}
- pivot = mas_safe_pivot(mas, pivots, mas->offset, type);
- entry = mas_slot(mas, slots, mas->offset);
- if (ma_dead_node(node))
- return NULL;
-
- if (!pivot)
- return NULL;
-
- if (!entry)
- return NULL;
-
-found:
- mas->last = pivot;
- return entry;
-}
-
-static inline void mas_rewalk(struct ma_state *mas, unsigned long index)
-{
-
-retry:
- mas_set(mas, index);
- mas_state_walk(mas);
- if (mas_is_start(mas))
- goto retry;
-
- return;
-
-}
-
-/*
- * mas_next_entry() - Internal function to get the next entry.
- * @mas: The maple state
- * @limit: The maximum range start.
- *
- * Set the @mas->node to the next entry and the range_start to
- * the beginning value for the entry. Does not check beyond @limit.
- * Sets @mas->index and @mas->last to the limit if it is hit.
- * Restarts on dead nodes.
- *
- * Return: the next entry or %NULL.
- */
-static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
-{
- void *entry = NULL;
- struct maple_enode *prev_node;
- struct maple_node *node;
- unsigned char offset;
- unsigned long last;
- enum maple_type mt;
-
- last = mas->last;
-retry:
- offset = mas->offset;
- prev_node = mas->node;
- node = mas_mn(mas);
- mt = mte_node_type(mas->node);
- mas->offset++;
- if (unlikely(mas->offset >= mt_slots[mt])) {
- mas->offset = mt_slots[mt] - 1;
- goto next_node;
- }
+ if (likely(mas->offset < mas->end)) {
+ mas->index = pivots[mas->offset] + 1;
+again:
+ mas->offset++;
+ if (likely(mas->offset < mas->end))
+ mas->last = pivots[mas->offset];
+ else
+ mas->last = mas->max;
+ } else {
+ if (mas->last >= max) {
+ mas->status = ma_overflow;
+ return NULL;
+ }
- while (!mas_is_none(mas)) {
- entry = mas_next_nentry(mas, node, limit, mt);
- if (unlikely(ma_dead_node(node))) {
- mas_rewalk(mas, last);
+ if (mas_next_node(mas, node, max)) {
+ mas_rewalk(mas, save_point);
goto retry;
}
- if (likely(entry))
- return entry;
-
- if (unlikely((mas->index > limit)))
- break;
+ if (WARN_ON_ONCE(mas_is_overflow(mas)))
+ return NULL;
-next_node:
- prev_node = mas->node;
- offset = mas->offset;
- if (unlikely(mas_next_node(mas, node, limit))) {
- mas_rewalk(mas, last);
- goto retry;
- }
mas->offset = 0;
+ mas->index = mas->min;
node = mas_mn(mas);
- mt = mte_node_type(mas->node);
- }
-
- mas->index = mas->last = limit;
- mas->offset = offset;
- mas->node = prev_node;
- return NULL;
-}
-
-/*
- * mas_prev_nentry() - Get the previous node entry.
- * @mas: The maple state.
- * @limit: The lower limit to check for a value.
- *
- * Return: the entry, %NULL otherwise.
- */
-static inline void *mas_prev_nentry(struct ma_state *mas, unsigned long limit,
- unsigned long index)
-{
- unsigned long pivot, min;
- unsigned char offset;
- struct maple_node *mn;
- enum maple_type mt;
- unsigned long *pivots;
- void __rcu **slots;
- void *entry;
-
-retry:
- if (!mas->offset)
- return NULL;
-
- mn = mas_mn(mas);
- mt = mte_node_type(mas->node);
- offset = mas->offset - 1;
- if (offset >= mt_slots[mt])
- offset = mt_slots[mt] - 1;
-
- slots = ma_slots(mn, mt);
- pivots = ma_pivots(mn, mt);
- if (offset == mt_pivots[mt])
- pivot = mas->max;
- else
- pivot = pivots[offset];
-
- if (unlikely(ma_dead_node(mn))) {
- mas_rewalk(mas, index);
- goto retry;
+ type = mte_node_type(mas->node);
+ pivots = ma_pivots(node, type);
+ mas->last = pivots[0];
}
- while (offset && ((!mas_slot(mas, slots, offset) && pivot >= limit) ||
- !pivot))
- pivot = pivots[--offset];
-
- min = mas_safe_min(mas, pivots, offset);
- entry = mas_slot(mas, slots, offset);
- if (unlikely(ma_dead_node(mn))) {
- mas_rewalk(mas, index);
+ slots = ma_slots(node, type);
+ entry = mt_slot(mas->tree, slots, mas->offset);
+ if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
goto retry;
- }
- if (likely(entry)) {
- mas->offset = offset;
- mas->last = pivot;
- mas->index = min;
- }
- return entry;
-}
-
-static inline void *mas_prev_entry(struct ma_state *mas, unsigned long min)
-{
- void *entry;
-
-retry:
- while (likely(!mas_is_none(mas))) {
- entry = mas_prev_nentry(mas, min, mas->index);
- if (unlikely(mas->last < min))
- goto not_found;
+ if (entry)
+ return entry;
- if (likely(entry))
- return entry;
- if (unlikely(mas_prev_node(mas, min))) {
- mas_rewalk(mas, mas->index);
- goto retry;
+ if (!empty) {
+ if (mas->last >= max) {
+ mas->status = ma_overflow;
+ return NULL;
}
- mas->offset++;
+ mas->index = mas->last + 1;
+ goto again;
}
- mas->offset--;
-not_found:
- mas->index = mas->last = min;
- return NULL;
+ return entry;
}
/*
@@ -4880,14 +4433,15 @@ not_found:
* Return: True if found in a leaf, false otherwise.
*
*/
-static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
+static bool mas_rev_awalk(struct ma_state *mas, unsigned long size,
+ unsigned long *gap_min, unsigned long *gap_max)
{
enum maple_type type = mte_node_type(mas->node);
struct maple_node *node = mas_mn(mas);
unsigned long *pivots, *gaps;
void __rcu **slots;
unsigned long gap = 0;
- unsigned long max, min, index;
+ unsigned long max, min;
unsigned char offset;
if (unlikely(mas_is_err(mas)))
@@ -4909,8 +4463,7 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
min = mas_safe_min(mas, pivots, --offset);
max = mas_safe_pivot(mas, pivots, offset, type);
- index = mas->index;
- while (index <= max) {
+ while (mas->index <= max) {
gap = 0;
if (gaps)
gap = gaps[offset];
@@ -4941,15 +4494,13 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
min = mas_safe_min(mas, pivots, offset);
}
- if (unlikely(index > max)) {
- mas_set_err(mas, -EBUSY);
- return false;
- }
+ if (unlikely((mas->index > max) || (size - 1 > max - mas->index)))
+ goto no_space;
if (unlikely(ma_is_leaf(type))) {
mas->offset = offset;
- mas->min = min;
- mas->max = min + gap - 1;
+ *gap_min = min;
+ *gap_max = min + gap - 1;
return true;
}
@@ -4961,9 +4512,11 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
return false;
ascend:
- if (mte_is_root(mas->node))
- mas_set_err(mas, -EBUSY);
+ if (!mte_is_root(mas->node))
+ return false;
+no_space:
+ mas_set_err(mas, -EBUSY);
return false;
}
@@ -4971,10 +4524,10 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
{
enum maple_type type = mte_node_type(mas->node);
unsigned long pivot, min, gap = 0;
- unsigned char offset;
- unsigned long *gaps;
- unsigned long *pivots = ma_pivots(mas_mn(mas), type);
- void __rcu **slots = ma_slots(mas_mn(mas), type);
+ unsigned char offset, data_end;
+ unsigned long *gaps, *pivots;
+ void __rcu **slots;
+ struct maple_node *node;
bool found = false;
if (ma_is_dense(type)) {
@@ -4982,13 +4535,15 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
return true;
}
- gaps = ma_gaps(mte_to_node(mas->node), type);
+ node = mas_mn(mas);
+ pivots = ma_pivots(node, type);
+ slots = ma_slots(node, type);
+ gaps = ma_gaps(node, type);
offset = mas->offset;
min = mas_safe_min(mas, pivots, offset);
- for (; offset < mt_slots[type]; offset++) {
+ data_end = ma_data_end(node, type, pivots, mas->max);
+ for (; offset <= data_end; offset++) {
pivot = mas_safe_pivot(mas, pivots, offset, type);
- if (offset && !pivot)
- break;
/* Not within lower bounds */
if (mas->index > pivot)
@@ -5004,15 +4559,14 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
if (gap >= size) {
if (ma_is_leaf(type)) {
found = true;
- goto done;
- }
- if (mas->index <= pivot) {
- mas->node = mas_slot(mas, slots, offset);
- mas->min = min;
- mas->max = pivot;
- offset = 0;
break;
}
+
+ mas->node = mas_slot(mas, slots, offset);
+ mas->min = min;
+ mas->max = pivot;
+ offset = 0;
+ break;
}
next_slot:
min = pivot + 1;
@@ -5022,9 +4576,6 @@ next_slot:
}
}
- if (mte_is_root(mas->node))
- found = true;
-done:
mas->offset = offset;
return found;
}
@@ -5034,7 +4585,7 @@ done:
* @mas: The maple state.
*
* mas->index and mas->last will be set to the range if there is a value. If
- * mas->node is MAS_NONE, reset to MAS_START.
+ * mas->status is ma_none, reset to ma_start
*
* Return: the entry at the location or %NULL.
*/
@@ -5042,24 +4593,25 @@ void *mas_walk(struct ma_state *mas)
{
void *entry;
+ if (!mas_is_active(mas) && !mas_is_start(mas))
+ mas->status = ma_start;
retry:
entry = mas_state_walk(mas);
- if (mas_is_start(mas))
+ if (mas_is_start(mas)) {
goto retry;
-
- if (mas_is_ptr(mas)) {
+ } else if (mas_is_none(mas)) {
+ mas->index = 0;
+ mas->last = ULONG_MAX;
+ } else if (mas_is_ptr(mas)) {
if (!mas->index) {
mas->last = 0;
- } else {
- mas->index = 1;
- mas->last = ULONG_MAX;
+ return entry;
}
- return entry;
- }
- if (mas_is_none(mas)) {
- mas->index = 0;
+ mas->index = 1;
mas->last = ULONG_MAX;
+ mas->status = ma_none;
+ return NULL;
}
return entry;
@@ -5093,35 +4645,21 @@ static inline bool mas_rewind_node(struct ma_state *mas)
*/
static inline bool mas_skip_node(struct ma_state *mas)
{
- unsigned char slot, slot_count;
- unsigned long *pivots;
- enum maple_type mt;
+ if (mas_is_err(mas))
+ return false;
- mt = mte_node_type(mas->node);
- slot_count = mt_slots[mt] - 1;
do {
if (mte_is_root(mas->node)) {
- slot = mas->offset;
- if (slot > slot_count) {
+ if (mas->offset >= mas_data_end(mas)) {
mas_set_err(mas, -EBUSY);
return false;
}
} else {
mas_ascend(mas);
- slot = mas->offset;
- mt = mte_node_type(mas->node);
- slot_count = mt_slots[mt] - 1;
}
- } while (slot > slot_count);
-
- mas->offset = ++slot;
- pivots = ma_pivots(mas_mn(mas), mt);
- if (slot > 0)
- mas->min = pivots[slot - 1] + 1;
-
- if (slot <= slot_count)
- mas->max = pivots[slot];
+ } while (mas->offset >= mas_data_end(mas));
+ mas->offset++;
return true;
}
@@ -5141,8 +4679,8 @@ static inline void mas_awalk(struct ma_state *mas, unsigned long size)
* There are 4 options:
* go to child (descend)
* go back to parent (ascend)
- * no gap found. (return, slot == MAPLE_NODE_SLOTS)
- * found the gap. (return, slot != MAPLE_NODE_SLOTS)
+ * no gap found. (return, error == -EBUSY)
+ * found the gap. (return)
*/
while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) {
if (last == mas->node)
@@ -5153,46 +4691,6 @@ static inline void mas_awalk(struct ma_state *mas, unsigned long size)
}
/*
- * mas_fill_gap() - Fill a located gap with @entry.
- * @mas: The maple state
- * @entry: The value to store
- * @slot: The offset into the node to store the @entry
- * @size: The size of the entry
- * @index: The start location
- */
-static inline void mas_fill_gap(struct ma_state *mas, void *entry,
- unsigned char slot, unsigned long size, unsigned long *index)
-{
- MA_WR_STATE(wr_mas, mas, entry);
- unsigned char pslot = mte_parent_slot(mas->node);
- struct maple_enode *mn = mas->node;
- unsigned long *pivots;
- enum maple_type ptype;
- /*
- * mas->index is the start address for the search
- * which may no longer be needed.
- * mas->last is the end address for the search
- */
-
- *index = mas->index;
- mas->last = mas->index + size - 1;
-
- /*
- * It is possible that using mas->max and mas->min to correctly
- * calculate the index and last will cause an issue in the gap
- * calculation, so fix the ma_state here
- */
- mas_ascend(mas);
- ptype = mte_node_type(mas->node);
- pivots = ma_pivots(mas_mn(mas), ptype);
- mas->max = mas_safe_pivot(mas, pivots, pslot, ptype);
- mas->min = mas_safe_min(mas, pivots, pslot);
- mas->node = mn;
- mas->offset = slot;
- mas_wr_store_entry(&wr_mas);
-}
-
-/*
* mas_sparse_area() - Internal function. Return upper or lower limit when
* searching for a gap in an empty tree.
* @mas: The maple state
@@ -5201,25 +4699,28 @@ static inline void mas_fill_gap(struct ma_state *mas, void *entry,
* @size: The size of the gap
* @fwd: Searching forward or back
*/
-static inline void mas_sparse_area(struct ma_state *mas, unsigned long min,
+static inline int mas_sparse_area(struct ma_state *mas, unsigned long min,
unsigned long max, unsigned long size, bool fwd)
{
- unsigned long start = 0;
-
- if (!unlikely(mas_is_none(mas)))
- start++;
+ if (!unlikely(mas_is_none(mas)) && min == 0) {
+ min++;
+ /*
+ * At this time, min is increased, we need to recheck whether
+ * the size is satisfied.
+ */
+ if (min > max || max - min + 1 < size)
+ return -EBUSY;
+ }
/* mas_is_ptr */
- if (start < min)
- start = min;
-
if (fwd) {
- mas->index = start;
- mas->last = start + size - 1;
- return;
+ mas->index = min;
+ mas->last = min + size - 1;
+ } else {
+ mas->last = max;
+ mas->index = max - size + 1;
}
-
- mas->index = max;
+ return 0;
}
/*
@@ -5236,6 +4737,13 @@ int mas_empty_area(struct ma_state *mas, unsigned long min,
unsigned char offset;
unsigned long *pivots;
enum maple_type mt;
+ struct maple_node *node;
+
+ if (min > max)
+ return -EINVAL;
+
+ if (size == 0 || max - min < size - 1)
+ return -EINVAL;
if (mas_is_start(mas))
mas_start(mas);
@@ -5245,10 +4753,8 @@ int mas_empty_area(struct ma_state *mas, unsigned long min,
return -EBUSY;
/* Empty set */
- if (mas_is_none(mas) || mas_is_ptr(mas)) {
- mas_sparse_area(mas, min, max, size, true);
- return 0;
- }
+ if (mas_is_none(mas) || mas_is_ptr(mas))
+ return mas_sparse_area(mas, min, max, size, true);
/* The start of the window can only be within these values */
mas->index = min;
@@ -5259,21 +4765,14 @@ int mas_empty_area(struct ma_state *mas, unsigned long min,
return xa_err(mas->node);
offset = mas->offset;
- if (unlikely(offset == MAPLE_NODE_SLOTS))
- return -EBUSY;
-
+ node = mas_mn(mas);
mt = mte_node_type(mas->node);
- pivots = ma_pivots(mas_mn(mas), mt);
- if (offset)
- mas->min = pivots[offset - 1] + 1;
-
- if (offset < mt_pivots[mt])
- mas->max = pivots[offset];
-
- if (mas->index < mas->min)
- mas->index = mas->min;
-
+ pivots = ma_pivots(node, mt);
+ min = mas_safe_min(mas, pivots, offset);
+ if (mas->index < min)
+ mas->index = min;
mas->last = mas->index + size - 1;
+ mas->end = ma_data_end(node, mt, pivots, mas->max);
return 0;
}
EXPORT_SYMBOL_GPL(mas_empty_area);
@@ -5291,26 +4790,30 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
{
struct maple_enode *last = mas->node;
- if (mas_is_start(mas)) {
+ if (min > max)
+ return -EINVAL;
+
+ if (size == 0 || max - min < size - 1)
+ return -EINVAL;
+
+ if (mas_is_start(mas))
mas_start(mas);
- mas->offset = mas_data_end(mas);
- } else if (mas->offset >= 2) {
- mas->offset -= 2;
- } else if (!mas_rewind_node(mas)) {
+ else if ((mas->offset < 2) && (!mas_rewind_node(mas)))
return -EBUSY;
- }
- /* Empty set. */
- if (mas_is_none(mas) || mas_is_ptr(mas)) {
- mas_sparse_area(mas, min, max, size, false);
- return 0;
- }
+ if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
+ return mas_sparse_area(mas, min, max, size, false);
+ else if (mas->offset >= 2)
+ mas->offset -= 2;
+ else
+ mas->offset = mas_data_end(mas);
+
/* The start of the window can only be within these values. */
mas->index = min;
mas->last = max;
- while (!mas_rev_awalk(mas, size)) {
+ while (!mas_rev_awalk(mas, size, &min, &max)) {
if (last == mas->node) {
if (!mas_rewind_node(mas))
return -EBUSY;
@@ -5325,91 +4828,20 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
if (unlikely(mas->offset == MAPLE_NODE_SLOTS))
return -EBUSY;
- /*
- * mas_rev_awalk() has set mas->min and mas->max to the gap values. If
- * the maximum is outside the window we are searching, then use the last
- * location in the search.
- * mas->max and mas->min is the range of the gap.
- * mas->index and mas->last are currently set to the search range.
- */
-
/* Trim the upper limit to the max. */
- if (mas->max <= mas->last)
- mas->last = mas->max;
+ if (max < mas->last)
+ mas->last = max;
mas->index = mas->last - size + 1;
+ mas->end = mas_data_end(mas);
return 0;
}
EXPORT_SYMBOL_GPL(mas_empty_area_rev);
-static inline int mas_alloc(struct ma_state *mas, void *entry,
- unsigned long size, unsigned long *index)
-{
- unsigned long min;
-
- mas_start(mas);
- if (mas_is_none(mas) || mas_is_ptr(mas)) {
- mas_root_expand(mas, entry);
- if (mas_is_err(mas))
- return xa_err(mas->node);
-
- if (!mas->index)
- return mte_pivot(mas->node, 0);
- return mte_pivot(mas->node, 1);
- }
-
- /* Must be walking a tree. */
- mas_awalk(mas, size);
- if (mas_is_err(mas))
- return xa_err(mas->node);
-
- if (mas->offset == MAPLE_NODE_SLOTS)
- goto no_gap;
-
- /*
- * At this point, mas->node points to the right node and we have an
- * offset that has a sufficient gap.
- */
- min = mas->min;
- if (mas->offset)
- min = mte_pivot(mas->node, mas->offset - 1) + 1;
-
- if (mas->index < min)
- mas->index = min;
-
- mas_fill_gap(mas, entry, mas->offset, size, index);
- return 0;
-
-no_gap:
- return -EBUSY;
-}
-
-static inline int mas_rev_alloc(struct ma_state *mas, unsigned long min,
- unsigned long max, void *entry,
- unsigned long size, unsigned long *index)
-{
- int ret = 0;
-
- ret = mas_empty_area_rev(mas, min, max, size);
- if (ret)
- return ret;
-
- if (mas_is_err(mas))
- return xa_err(mas->node);
-
- if (mas->offset == MAPLE_NODE_SLOTS)
- goto no_gap;
-
- mas_fill_gap(mas, entry, mas->offset, size, index);
- return 0;
-
-no_gap:
- return -EBUSY;
-}
-
/*
- * mas_dead_leaves() - Mark all leaves of a node as dead.
- * @mas: The maple state
+ * mte_dead_leaves() - Mark all leaves of a node as dead.
+ * @enode: the encoded node
+ * @mt: the maple tree
* @slots: Pointer to the slot array
*
* Must hold the write lock.
@@ -5417,15 +4849,16 @@ no_gap:
* Return: The number of leaves marked as dead.
*/
static inline
-unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
+unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt,
+ void __rcu **slots)
{
struct maple_node *node;
enum maple_type type;
void *entry;
int offset;
- for (offset = 0; offset < mt_slot_count(mas->node); offset++) {
- entry = mas_slot_locked(mas, slots, offset);
+ for (offset = 0; offset < mt_slot_count(enode); offset++) {
+ entry = mt_slot(mt, slots, offset);
type = mte_node_type(entry);
node = mte_to_node(entry);
/* Use both node and type to catch LE & BE metadata */
@@ -5433,7 +4866,6 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
break;
mte_set_node_dead(entry);
- smp_wmb(); /* Needed for RCU */
node->type = type;
rcu_assign_pointer(slots[offset], node);
}
@@ -5441,157 +4873,167 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
return offset;
}
-static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset)
+/**
+ * mte_dead_walk() - Walk down a dead tree to just before the leaves
+ * @enode: The maple encoded node
+ * @offset: The starting offset
+ *
+ * Note: This can only be used from the RCU callback context.
+ */
+static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)
{
struct maple_node *node, *next;
void __rcu **slots = NULL;
- next = mas_mn(mas);
+ next = mte_to_node(*enode);
do {
- mas->node = ma_enode_ptr(next);
- node = mas_mn(mas);
+ *enode = ma_enode_ptr(next);
+ node = mte_to_node(*enode);
slots = ma_slots(node, node->type);
- next = mas_slot_locked(mas, slots, offset);
+ next = rcu_dereference_protected(slots[offset],
+ lock_is_held(&rcu_callback_map));
offset = 0;
} while (!ma_is_leaf(next->type));
return slots;
}
+/**
+ * mt_free_walk() - Walk & free a tree in the RCU callback context
+ * @head: The RCU head that's within the node.
+ *
+ * Note: This can only be used from the RCU callback context.
+ */
static void mt_free_walk(struct rcu_head *head)
{
void __rcu **slots;
struct maple_node *node, *start;
- struct maple_tree mt;
+ struct maple_enode *enode;
unsigned char offset;
enum maple_type type;
- MA_STATE(mas, &mt, 0, 0);
node = container_of(head, struct maple_node, rcu);
if (ma_is_leaf(node->type))
goto free_leaf;
- mt_init_flags(&mt, node->ma_flags);
- mas_lock(&mas);
start = node;
- mas.node = mt_mk_node(node, node->type);
- slots = mas_dead_walk(&mas, 0);
- node = mas_mn(&mas);
+ enode = mt_mk_node(node, node->type);
+ slots = mte_dead_walk(&enode, 0);
+ node = mte_to_node(enode);
do {
mt_free_bulk(node->slot_len, slots);
offset = node->parent_slot + 1;
- mas.node = node->piv_parent;
- if (mas_mn(&mas) == node)
- goto start_slots_free;
-
- type = mte_node_type(mas.node);
- slots = ma_slots(mte_to_node(mas.node), type);
- if ((offset < mt_slots[type]) && (slots[offset]))
- slots = mas_dead_walk(&mas, offset);
-
- node = mas_mn(&mas);
+ enode = node->piv_parent;
+ if (mte_to_node(enode) == node)
+ goto free_leaf;
+
+ type = mte_node_type(enode);
+ slots = ma_slots(mte_to_node(enode), type);
+ if ((offset < mt_slots[type]) &&
+ rcu_dereference_protected(slots[offset],
+ lock_is_held(&rcu_callback_map)))
+ slots = mte_dead_walk(&enode, offset);
+ node = mte_to_node(enode);
} while ((node != start) || (node->slot_len < offset));
slots = ma_slots(node, node->type);
mt_free_bulk(node->slot_len, slots);
-start_slots_free:
- mas_unlock(&mas);
free_leaf:
- mt_free_rcu(&node->rcu);
+ kfree(node);
}
-static inline void __rcu **mas_destroy_descend(struct ma_state *mas,
- struct maple_enode *prev, unsigned char offset)
+static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
+ struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)
{
struct maple_node *node;
- struct maple_enode *next = mas->node;
+ struct maple_enode *next = *enode;
void __rcu **slots = NULL;
+ enum maple_type type;
+ unsigned char next_offset = 0;
do {
- mas->node = next;
- node = mas_mn(mas);
- slots = ma_slots(node, mte_node_type(mas->node));
- next = mas_slot_locked(mas, slots, 0);
+ *enode = next;
+ node = mte_to_node(*enode);
+ type = mte_node_type(*enode);
+ slots = ma_slots(node, type);
+ next = mt_slot_locked(mt, slots, next_offset);
if ((mte_dead_node(next)))
- next = mas_slot_locked(mas, slots, 1);
+ next = mt_slot_locked(mt, slots, ++next_offset);
- mte_set_node_dead(mas->node);
- node->type = mte_node_type(mas->node);
+ mte_set_node_dead(*enode);
+ node->type = type;
node->piv_parent = prev;
node->parent_slot = offset;
- offset = 0;
- prev = mas->node;
+ offset = next_offset;
+ next_offset = 0;
+ prev = *enode;
} while (!mte_is_leaf(next));
return slots;
}
-static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags,
+static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
bool free)
{
void __rcu **slots;
struct maple_node *node = mte_to_node(enode);
struct maple_enode *start;
- struct maple_tree mt;
- MA_STATE(mas, &mt, 0, 0);
-
- if (mte_is_leaf(enode))
+ if (mte_is_leaf(enode)) {
+ mte_set_node_dead(enode);
+ node->type = mte_node_type(enode);
goto free_leaf;
+ }
- mt_init_flags(&mt, ma_flags);
- mas_lock(&mas);
-
- mas.node = start = enode;
- slots = mas_destroy_descend(&mas, start, 0);
- node = mas_mn(&mas);
+ start = enode;
+ slots = mte_destroy_descend(&enode, mt, start, 0);
+ node = mte_to_node(enode); // Updated in the above call.
do {
enum maple_type type;
unsigned char offset;
struct maple_enode *parent, *tmp;
- node->slot_len = mas_dead_leaves(&mas, slots);
+ node->slot_len = mte_dead_leaves(enode, mt, slots);
if (free)
mt_free_bulk(node->slot_len, slots);
offset = node->parent_slot + 1;
- mas.node = node->piv_parent;
- if (mas_mn(&mas) == node)
- goto start_slots_free;
+ enode = node->piv_parent;
+ if (mte_to_node(enode) == node)
+ goto free_leaf;
- type = mte_node_type(mas.node);
- slots = ma_slots(mte_to_node(mas.node), type);
+ type = mte_node_type(enode);
+ slots = ma_slots(mte_to_node(enode), type);
if (offset >= mt_slots[type])
goto next;
- tmp = mas_slot_locked(&mas, slots, offset);
+ tmp = mt_slot_locked(mt, slots, offset);
if (mte_node_type(tmp) && mte_to_node(tmp)) {
- parent = mas.node;
- mas.node = tmp;
- slots = mas_destroy_descend(&mas, parent, offset);
+ parent = enode;
+ enode = tmp;
+ slots = mte_destroy_descend(&enode, mt, parent, offset);
}
next:
- node = mas_mn(&mas);
- } while (start != mas.node);
+ node = mte_to_node(enode);
+ } while (start != enode);
- node = mas_mn(&mas);
- node->slot_len = mas_dead_leaves(&mas, slots);
+ node = mte_to_node(enode);
+ node->slot_len = mte_dead_leaves(enode, mt, slots);
if (free)
mt_free_bulk(node->slot_len, slots);
-start_slots_free:
- mas_unlock(&mas);
-
free_leaf:
if (free)
- mt_free_rcu(&node->rcu);
+ kfree(node);
+ else
+ mt_clear_meta(mt, node, node->type);
}
/*
* mte_destroy_walk() - Free a tree or sub-tree.
- * @enode - the encoded maple node (maple_enode) to start
- * @mn - the tree to free - needed for node types.
+ * @enode: the encoded maple node (maple_enode) to start
+ * @mt: the tree to free - needed for node types.
*
* Must hold the write lock.
*/
@@ -5601,28 +5043,12 @@ static inline void mte_destroy_walk(struct maple_enode *enode,
struct maple_node *node = mte_to_node(enode);
if (mt_in_rcu(mt)) {
- mt_destroy_walk(enode, mt->ma_flags, false);
+ mt_destroy_walk(enode, mt, false);
call_rcu(&node->rcu, mt_free_walk);
} else {
- mt_destroy_walk(enode, mt->ma_flags, true);
- }
-}
-
-static void mas_wr_store_setup(struct ma_wr_state *wr_mas)
-{
- if (!mas_is_start(wr_mas->mas)) {
- if (mas_is_none(wr_mas->mas)) {
- mas_reset(wr_mas->mas);
- } else {
- wr_mas->r_max = wr_mas->mas->max;
- wr_mas->type = mte_node_type(wr_mas->mas->node);
- if (mas_is_span_wr(wr_mas))
- mas_reset(wr_mas->mas);
- }
+ mt_destroy_walk(enode, mt, true);
}
-
}
-
/* Interface */
/**
@@ -5631,8 +5057,6 @@ static void mas_wr_store_setup(struct ma_wr_state *wr_mas)
* @entry: The entry to store.
*
* The @mas->index and @mas->last is used to set the range for the @entry.
- * Note: The @mas should have pre-allocated entries to ensure there is memory to
- * store the entry. Please see mas_expected_entries()/mas_destroy() for more details.
*
* Return: the first entry between mas->index and mas->last or %NULL.
*/
@@ -5640,11 +5064,12 @@ void *mas_store(struct ma_state *mas, void *entry)
{
MA_WR_STATE(wr_mas, mas, entry);
- trace_ma_write(__func__, mas, 0, entry);
+ trace_ma_write(TP_FCT, mas, 0, entry);
#ifdef CONFIG_DEBUG_MAPLE_TREE
- if (mas->index > mas->last)
- pr_err("Error %lu > %lu %p\n", mas->index, mas->last, entry);
- MT_BUG_ON(mas->tree, mas->index > mas->last);
+ if (MAS_WARN_ON(mas, mas->index > mas->last))
+ pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last,
+ entry);
+
if (mas->index > mas->last) {
mas_set_err(mas, -EINVAL);
return NULL;
@@ -5658,8 +5083,25 @@ void *mas_store(struct ma_state *mas, void *entry)
* want to examine what happens if a single store operation was to
* overwrite multiple entries within a self-balancing B-Tree.
*/
- mas_wr_store_setup(&wr_mas);
+ mas_wr_prealloc_setup(&wr_mas);
+ mas->store_type = mas_wr_store_type(&wr_mas);
+ if (mas->mas_flags & MA_STATE_PREALLOC) {
+ mas_wr_store_entry(&wr_mas);
+ MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
+ return wr_mas.content;
+ }
+
+ mas_prealloc_calc(&wr_mas, entry);
+ if (!mas->node_request)
+ goto store;
+
+ mas_alloc_nodes(mas, GFP_NOWAIT);
+ if (mas_is_err(mas))
+ return NULL;
+
+store:
mas_wr_store_entry(&wr_mas);
+ mas_destroy(mas);
return wr_mas.content;
}
EXPORT_SYMBOL_GPL(mas_store);
@@ -5675,19 +5117,28 @@ EXPORT_SYMBOL_GPL(mas_store);
*/
int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
{
+ unsigned long index = mas->index;
+ unsigned long last = mas->last;
MA_WR_STATE(wr_mas, mas, entry);
+ int ret = 0;
- mas_wr_store_setup(&wr_mas);
- trace_ma_write(__func__, mas, 0, entry);
retry:
- mas_wr_store_entry(&wr_mas);
- if (unlikely(mas_nomem(mas, gfp)))
+ mas_wr_preallocate(&wr_mas, entry);
+ if (unlikely(mas_nomem(mas, gfp))) {
+ if (!entry)
+ __mas_set_range(mas, index, last);
goto retry;
+ }
- if (unlikely(mas_is_err(mas)))
- return xa_err(mas->node);
+ if (mas_is_err(mas)) {
+ ret = xa_err(mas->node);
+ goto out;
+ }
- return 0;
+ mas_wr_store_entry(&wr_mas);
+out:
+ mas_destroy(mas);
+ return ret;
}
EXPORT_SYMBOL_GPL(mas_store_gfp);
@@ -5701,10 +5152,22 @@ void mas_store_prealloc(struct ma_state *mas, void *entry)
{
MA_WR_STATE(wr_mas, mas, entry);
- mas_wr_store_setup(&wr_mas);
- trace_ma_write(__func__, mas, 0, entry);
+ if (mas->store_type == wr_store_root) {
+ mas_wr_prealloc_setup(&wr_mas);
+ goto store;
+ }
+
+ mas_wr_walk_descend(&wr_mas);
+ if (mas->store_type != wr_spanning_store) {
+ /* set wr_mas->content to current slot */
+ wr_mas.content = mas_slot_locked(mas, wr_mas.slots, mas->offset);
+ mas_wr_end_piv(&wr_mas);
+ }
+
+store:
+ trace_ma_write(TP_FCT, mas, 0, entry);
mas_wr_store_entry(&wr_mas);
- BUG_ON(mas_is_err(mas));
+ MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
mas_destroy(mas);
}
EXPORT_SYMBOL_GPL(mas_store_prealloc);
@@ -5719,20 +5182,30 @@ EXPORT_SYMBOL_GPL(mas_store_prealloc);
*/
int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
{
- int ret;
+ MA_WR_STATE(wr_mas, mas, entry);
- mas_node_count_gfp(mas, 1 + mas_mt_height(mas) * 3, gfp);
- mas->mas_flags |= MA_STATE_PREALLOC;
- if (likely(!mas_is_err(mas)))
- return 0;
+ mas_wr_prealloc_setup(&wr_mas);
+ mas->store_type = mas_wr_store_type(&wr_mas);
+ mas_prealloc_calc(&wr_mas, entry);
+ if (!mas->node_request)
+ goto set_flag;
- mas_set_alloc_req(mas, 0);
- ret = xa_err(mas->node);
- mas_reset(mas);
- mas_destroy(mas);
- mas_reset(mas);
- return ret;
+ mas->mas_flags &= ~MA_STATE_PREALLOC;
+ mas_alloc_nodes(mas, gfp);
+ if (mas_is_err(mas)) {
+ int ret = xa_err(mas->node);
+
+ mas->node_request = 0;
+ mas_destroy(mas);
+ mas_reset(mas);
+ return ret;
+ }
+
+set_flag:
+ mas->mas_flags |= MA_STATE_PREALLOC;
+ return 0;
}
+EXPORT_SYMBOL_GPL(mas_preallocate);
/*
* mas_destroy() - destroy a maple state.
@@ -5744,102 +5217,80 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
*/
void mas_destroy(struct ma_state *mas)
{
- struct maple_alloc *node;
-
- /*
- * When using mas_for_each() to insert an expected number of elements,
- * it is possible that the number inserted is less than the expected
- * number. To fix an invalid final node, a check is performed here to
- * rebalance the previous node with the final node.
- */
- if (mas->mas_flags & MA_STATE_REBALANCE) {
- unsigned char end;
-
- if (mas_is_start(mas))
- mas_start(mas);
-
- mtree_range_walk(mas);
- end = mas_data_end(mas) + 1;
- if (end < mt_min_slot_count(mas->node) - 1)
- mas_destroy_rebalance(mas, end);
-
- mas->mas_flags &= ~MA_STATE_REBALANCE;
- }
- mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
-
- while (mas->alloc && !((unsigned long)mas->alloc & 0x1)) {
- node = mas->alloc;
- mas->alloc = node->slot[0];
- if (node->node_count > 0)
- mt_free_bulk(node->node_count,
- (void __rcu **)&node->slot[1]);
- kmem_cache_free(maple_node_cache, node);
- }
- mas->alloc = NULL;
+ mas->mas_flags &= ~MA_STATE_PREALLOC;
+ mas_empty_nodes(mas);
}
EXPORT_SYMBOL_GPL(mas_destroy);
-/*
- * mas_expected_entries() - Set the expected number of entries that will be inserted.
- * @mas: The maple state
- * @nr_entries: The number of expected entries.
- *
- * This will attempt to pre-allocate enough nodes to store the expected number
- * of entries. The allocations will occur using the bulk allocator interface
- * for speed. Please call mas_destroy() on the @mas after inserting the entries
- * to ensure any unused nodes are freed.
- *
- * Return: 0 on success, -ENOMEM if memory could not be allocated.
- */
-int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
+static void mas_may_activate(struct ma_state *mas)
{
- int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2;
- struct maple_enode *enode = mas->node;
- int nr_nodes;
- int ret;
-
- /*
- * Sometimes it is necessary to duplicate a tree to a new tree, such as
- * forking a process and duplicating the VMAs from one tree to a new
- * tree. When such a situation arises, it is known that the new tree is
- * not going to be used until the entire tree is populated. For
- * performance reasons, it is best to use a bulk load with RCU disabled.
- * This allows for optimistic splitting that favours the left and reuse
- * of nodes during the operation.
- */
+ if (!mas->node) {
+ mas->status = ma_start;
+ } else if (mas->index > mas->max || mas->index < mas->min) {
+ mas->status = ma_start;
+ } else {
+ mas->status = ma_active;
+ }
+}
- /* Optimize splitting for bulk insert in-order */
- mas->mas_flags |= MA_STATE_BULK;
+static bool mas_next_setup(struct ma_state *mas, unsigned long max,
+ void **entry)
+{
+ bool was_none = mas_is_none(mas);
- /*
- * Avoid overflow, assume a gap between each entry and a trailing null.
- * If this is wrong, it just means allocation can happen during
- * insertion of entries.
- */
- nr_nodes = max(nr_entries, nr_entries * 2 + 1);
- if (!mt_is_alloc(mas->tree))
- nonleaf_cap = MAPLE_RANGE64_SLOTS - 2;
+ if (unlikely(mas->last >= max)) {
+ mas->status = ma_overflow;
+ return true;
+ }
- /* Leaves; reduce slots to keep space for expansion */
- nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2);
- /* Internal nodes */
- nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap);
- /* Add working room for split (2 nodes) + new parents */
- mas_node_count(mas, nr_nodes + 3);
+ switch (mas->status) {
+ case ma_active:
+ return false;
+ case ma_none:
+ fallthrough;
+ case ma_pause:
+ mas->status = ma_start;
+ fallthrough;
+ case ma_start:
+ mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
+ break;
+ case ma_overflow:
+ /* Overflowed before, but the max changed */
+ mas_may_activate(mas);
+ break;
+ case ma_underflow:
+ /* The user expects the mas to be one before where it is */
+ mas_may_activate(mas);
+ *entry = mas_walk(mas);
+ if (*entry)
+ return true;
+ break;
+ case ma_root:
+ break;
+ case ma_error:
+ return true;
+ }
- /* Detect if allocations run out */
- mas->mas_flags |= MA_STATE_PREALLOC;
+ if (likely(mas_is_active(mas))) /* Fast path */
+ return false;
- if (!mas_is_err(mas))
- return 0;
+ if (mas_is_ptr(mas)) {
+ *entry = NULL;
+ if (was_none && mas->index == 0) {
+ mas->index = mas->last = 0;
+ return true;
+ }
+ mas->index = 1;
+ mas->last = ULONG_MAX;
+ mas->status = ma_none;
+ return true;
+ }
- ret = xa_err(mas->node);
- mas->node = enode;
- mas_destroy(mas);
- return ret;
+ if (mas_is_none(mas))
+ return true;
+ return false;
}
-EXPORT_SYMBOL_GPL(mas_expected_entries);
/**
* mas_next() - Get the next entry.
@@ -5854,27 +5305,38 @@ EXPORT_SYMBOL_GPL(mas_expected_entries);
*/
void *mas_next(struct ma_state *mas, unsigned long max)
{
- if (mas_is_none(mas) || mas_is_paused(mas))
- mas->node = MAS_START;
+ void *entry = NULL;
- if (mas_is_start(mas))
- mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
+ if (mas_next_setup(mas, max, &entry))
+ return entry;
- if (mas_is_ptr(mas)) {
- if (!mas->index) {
- mas->index = 1;
- mas->last = ULONG_MAX;
- }
- return NULL;
- }
+ /* Retries on dead nodes handled by mas_next_slot */
+ return mas_next_slot(mas, max, false);
+}
+EXPORT_SYMBOL_GPL(mas_next);
- if (mas->last == ULONG_MAX)
- return NULL;
+/**
+ * mas_next_range() - Advance the maple state to the next range
+ * @mas: The maple state
+ * @max: The maximum index to check.
+ *
+ * Sets @mas->index and @mas->last to the range.
+ * Must hold rcu_read_lock or the write lock.
+ * Can return the zero entry.
+ *
+ * Return: The next entry or %NULL
+ */
+void *mas_next_range(struct ma_state *mas, unsigned long max)
+{
+ void *entry = NULL;
+
+ if (mas_next_setup(mas, max, &entry))
+ return entry;
- /* Retries on dead nodes handled by mas_next_entry */
- return mas_next_entry(mas, max);
+ /* Retries on dead nodes handled by mas_next_slot */
+ return mas_next_slot(mas, max, true);
}
-EXPORT_SYMBOL_GPL(mas_next);
+EXPORT_SYMBOL_GPL(mas_next_range);
/**
* mt_next() - get the next value in the maple tree
@@ -5882,7 +5344,11 @@ EXPORT_SYMBOL_GPL(mas_next);
* @index: The start index
* @max: The maximum index to check
*
- * Return: The entry at @index or higher, or %NULL if nothing is found.
+ * Takes RCU read lock internally to protect the search, which does not
+ * protect the returned pointer after dropping RCU read lock.
+ * See also: Documentation/core-api/maple_tree.rst
+ *
+ * Return: The entry higher than @index or %NULL if nothing is found.
*/
void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max)
{
@@ -5896,49 +5362,111 @@ void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max)
}
EXPORT_SYMBOL_GPL(mt_next);
+static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry)
+{
+ if (unlikely(mas->index <= min)) {
+ mas->status = ma_underflow;
+ return true;
+ }
+
+ switch (mas->status) {
+ case ma_active:
+ return false;
+ case ma_start:
+ break;
+ case ma_none:
+ fallthrough;
+ case ma_pause:
+ mas->status = ma_start;
+ break;
+ case ma_underflow:
+ /* underflowed before but the min changed */
+ mas_may_activate(mas);
+ break;
+ case ma_overflow:
+ /* User expects mas to be one after where it is */
+ mas_may_activate(mas);
+ *entry = mas_walk(mas);
+ if (*entry)
+ return true;
+ break;
+ case ma_root:
+ break;
+ case ma_error:
+ return true;
+ }
+
+ if (mas_is_start(mas))
+ mas_walk(mas);
+
+ if (unlikely(mas_is_ptr(mas))) {
+ if (!mas->index) {
+ mas->status = ma_none;
+ return true;
+ }
+ mas->index = mas->last = 0;
+ *entry = mas_root(mas);
+ return true;
+ }
+
+ if (mas_is_none(mas)) {
+ if (mas->index) {
+ /* Walked to out-of-range pointer? */
+ mas->index = mas->last = 0;
+ mas->status = ma_root;
+ *entry = mas_root(mas);
+ return true;
+ }
+ return true;
+ }
+
+ return false;
+}
+
/**
* mas_prev() - Get the previous entry
* @mas: The maple state
* @min: The minimum value to check.
*
* Must hold rcu_read_lock or the write lock.
- * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not
+ * Will reset mas to ma_start if the status is ma_none. Will stop on not
* searchable nodes.
*
* Return: the previous value or %NULL.
*/
void *mas_prev(struct ma_state *mas, unsigned long min)
{
- if (!mas->index) {
- /* Nothing comes before 0 */
- mas->last = 0;
- return NULL;
- }
+ void *entry = NULL;
- if (unlikely(mas_is_ptr(mas)))
- return NULL;
+ if (mas_prev_setup(mas, min, &entry))
+ return entry;
- if (mas_is_none(mas) || mas_is_paused(mas))
- mas->node = MAS_START;
+ return mas_prev_slot(mas, min, false);
+}
+EXPORT_SYMBOL_GPL(mas_prev);
- if (mas_is_start(mas)) {
- mas_walk(mas);
- if (!mas->index)
- return NULL;
- }
+/**
+ * mas_prev_range() - Advance to the previous range
+ * @mas: The maple state
+ * @min: The minimum value to check.
+ *
+ * Sets @mas->index and @mas->last to the range.
+ * Must hold rcu_read_lock or the write lock.
+ * Will reset mas to ma_start if the node is ma_none. Will stop on not
+ * searchable nodes.
+ *
+ * Return: the previous value or %NULL.
+ */
+void *mas_prev_range(struct ma_state *mas, unsigned long min)
+{
+ void *entry = NULL;
- if (mas_is_ptr(mas)) {
- if (!mas->index) {
- mas->last = 0;
- return NULL;
- }
+ if (mas_prev_setup(mas, min, &entry))
+ return entry;
- mas->index = mas->last = 0;
- return mas_root_locked(mas);
- }
- return mas_prev_entry(mas, min);
+ return mas_prev_slot(mas, min, true);
}
-EXPORT_SYMBOL_GPL(mas_prev);
+EXPORT_SYMBOL_GPL(mas_prev_range);
/**
* mt_prev() - get the previous value in the maple tree
@@ -5946,7 +5474,11 @@ EXPORT_SYMBOL_GPL(mas_prev);
* @index: The start index
* @min: The minimum index to check
*
- * Return: The entry at @index or lower, or %NULL if nothing is found.
+ * Takes RCU read lock internally to protect the search, which does not
+ * protect the returned pointer after dropping RCU read lock.
+ * See also: Documentation/core-api/maple_tree.rst
+ *
+ * Return: The entry before @index or %NULL if nothing is found.
*/
void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min)
{
@@ -5975,11 +5507,99 @@ EXPORT_SYMBOL_GPL(mt_prev);
*/
void mas_pause(struct ma_state *mas)
{
- mas->node = MAS_PAUSE;
+ mas->status = ma_pause;
+ mas->node = NULL;
}
EXPORT_SYMBOL_GPL(mas_pause);
/**
+ * mas_find_setup() - Internal function to set up mas_find*().
+ * @mas: The maple state
+ * @max: The maximum index
+ * @entry: Pointer to the entry
+ *
+ * Returns: True if entry is the answer, false otherwise.
+ */
+static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry)
+{
+ switch (mas->status) {
+ case ma_active:
+ if (mas->last < max)
+ return false;
+ return true;
+ case ma_start:
+ break;
+ case ma_pause:
+ if (unlikely(mas->last >= max))
+ return true;
+
+ mas->index = ++mas->last;
+ mas->status = ma_start;
+ break;
+ case ma_none:
+ if (unlikely(mas->last >= max))
+ return true;
+
+ mas->index = mas->last;
+ mas->status = ma_start;
+ break;
+ case ma_underflow:
+ /* mas is pointing at entry before unable to go lower */
+ if (unlikely(mas->index >= max)) {
+ mas->status = ma_overflow;
+ return true;
+ }
+
+ mas_may_activate(mas);
+ *entry = mas_walk(mas);
+ if (*entry)
+ return true;
+ break;
+ case ma_overflow:
+ if (unlikely(mas->last >= max))
+ return true;
+
+ mas_may_activate(mas);
+ *entry = mas_walk(mas);
+ if (*entry)
+ return true;
+ break;
+ case ma_root:
+ break;
+ case ma_error:
+ return true;
+ }
+
+ if (mas_is_start(mas)) {
+ /* First run or continue */
+ if (mas->index > max)
+ return true;
+
+ *entry = mas_walk(mas);
+ if (*entry)
+ return true;
+
+ }
+
+ if (unlikely(mas_is_ptr(mas)))
+ goto ptr_out_of_range;
+
+ if (unlikely(mas_is_none(mas)))
+ return true;
+
+ if (mas->index == max)
+ return true;
+
+ return false;
+
+ptr_out_of_range:
+ mas->status = ma_none;
+ mas->index = 1;
+ mas->last = ULONG_MAX;
+ return true;
+}
+
+/**
* mas_find() - On the first call, find the entry at or after mas->index up to
* %max. Otherwise, find the entry after mas->index.
* @mas: The maple state
@@ -5987,40 +5607,135 @@ EXPORT_SYMBOL_GPL(mas_pause);
*
* Must hold rcu_read_lock or the write lock.
* If an entry exists, last and index are updated accordingly.
- * May set @mas->node to MAS_NONE.
+ * May set @mas->status to ma_overflow.
*
* Return: The entry or %NULL.
*/
void *mas_find(struct ma_state *mas, unsigned long max)
{
- if (unlikely(mas_is_paused(mas))) {
- if (unlikely(mas->last == ULONG_MAX)) {
- mas->node = MAS_NONE;
- return NULL;
+ void *entry = NULL;
+
+ if (mas_find_setup(mas, max, &entry))
+ return entry;
+
+ /* Retries on dead nodes handled by mas_next_slot */
+ entry = mas_next_slot(mas, max, false);
+ /* Ignore overflow */
+ mas->status = ma_active;
+ return entry;
+}
+EXPORT_SYMBOL_GPL(mas_find);
+
+/**
+ * mas_find_range() - On the first call, find the entry at or after
+ * mas->index up to %max. Otherwise, advance to the next slot mas->index.
+ * @mas: The maple state
+ * @max: The maximum value to check.
+ *
+ * Must hold rcu_read_lock or the write lock.
+ * If an entry exists, last and index are updated accordingly.
+ * May set @mas->status to ma_overflow.
+ *
+ * Return: The entry or %NULL.
+ */
+void *mas_find_range(struct ma_state *mas, unsigned long max)
+{
+ void *entry = NULL;
+
+ if (mas_find_setup(mas, max, &entry))
+ return entry;
+
+ /* Retries on dead nodes handled by mas_next_slot */
+ return mas_next_slot(mas, max, true);
+}
+EXPORT_SYMBOL_GPL(mas_find_range);
+
+/**
+ * mas_find_rev_setup() - Internal function to set up mas_find_*_rev()
+ * @mas: The maple state
+ * @min: The minimum index
+ * @entry: Pointer to the entry
+ *
+ * Returns: True if entry is the answer, false otherwise.
+ */
+static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min,
+ void **entry)
+{
+
+ switch (mas->status) {
+ case ma_active:
+ goto active;
+ case ma_start:
+ break;
+ case ma_pause:
+ if (unlikely(mas->index <= min)) {
+ mas->status = ma_underflow;
+ return true;
}
- mas->node = MAS_START;
- mas->index = ++mas->last;
+ mas->last = --mas->index;
+ mas->status = ma_start;
+ break;
+ case ma_none:
+ if (mas->index <= min)
+ goto none;
+
+ mas->last = mas->index;
+ mas->status = ma_start;
+ break;
+ case ma_overflow: /* user expects the mas to be one after where it is */
+ if (unlikely(mas->index <= min)) {
+ mas->status = ma_underflow;
+ return true;
+ }
+
+ mas->status = ma_active;
+ break;
+ case ma_underflow: /* user expects the mas to be one before where it is */
+ if (unlikely(mas->index <= min))
+ return true;
+
+ mas->status = ma_active;
+ break;
+ case ma_root:
+ break;
+ case ma_error:
+ return true;
}
- if (unlikely(mas_is_start(mas))) {
+ if (mas_is_start(mas)) {
/* First run or continue */
- void *entry;
+ if (mas->index < min)
+ return true;
- if (mas->index > max)
- return NULL;
+ *entry = mas_walk(mas);
+ if (*entry)
+ return true;
+ }
- entry = mas_walk(mas);
- if (entry)
- return entry;
+ if (unlikely(mas_is_ptr(mas)))
+ goto none;
+
+ if (unlikely(mas_is_none(mas))) {
+ /*
+ * Walked to the location, and there was nothing so the previous
+ * location is 0.
+ */
+ mas->last = mas->index = 0;
+ mas->status = ma_root;
+ *entry = mas_root(mas);
+ return true;
}
- if (unlikely(!mas_searchable(mas)))
- return NULL;
+active:
+ if (mas->index < min)
+ return true;
+
+ return false;
- /* Retries on dead nodes handled by mas_next_entry */
- return mas_next_entry(mas, max);
+none:
+ mas->status = ma_none;
+ return true;
}
-EXPORT_SYMBOL_GPL(mas_find);
/**
* mas_find_rev: On the first call, find the first non-null entry at or below
@@ -6031,43 +5746,47 @@ EXPORT_SYMBOL_GPL(mas_find);
*
* Must hold rcu_read_lock or the write lock.
* If an entry exists, last and index are updated accordingly.
- * May set @mas->node to MAS_NONE.
+ * May set @mas->status to ma_underflow.
*
* Return: The entry or %NULL.
*/
void *mas_find_rev(struct ma_state *mas, unsigned long min)
{
- if (unlikely(mas_is_paused(mas))) {
- if (unlikely(mas->last == ULONG_MAX)) {
- mas->node = MAS_NONE;
- return NULL;
- }
- mas->node = MAS_START;
- mas->last = --mas->index;
- }
+ void *entry = NULL;
- if (unlikely(mas_is_start(mas))) {
- /* First run or continue */
- void *entry;
+ if (mas_find_rev_setup(mas, min, &entry))
+ return entry;
- if (mas->index < min)
- return NULL;
+ /* Retries on dead nodes handled by mas_prev_slot */
+ return mas_prev_slot(mas, min, false);
- entry = mas_walk(mas);
- if (entry)
- return entry;
- }
+}
+EXPORT_SYMBOL_GPL(mas_find_rev);
- if (unlikely(!mas_searchable(mas)))
- return NULL;
+/**
+ * mas_find_range_rev: On the first call, find the first non-null entry at or
+ * below mas->index down to %min. Otherwise advance to the previous slot after
+ * mas->index down to %min.
+ * @mas: The maple state
+ * @min: The minimum value to check.
+ *
+ * Must hold rcu_read_lock or the write lock.
+ * If an entry exists, last and index are updated accordingly.
+ * May set @mas->status to ma_underflow.
+ *
+ * Return: The entry or %NULL.
+ */
+void *mas_find_range_rev(struct ma_state *mas, unsigned long min)
+{
+ void *entry = NULL;
- if (mas->index < min)
- return NULL;
+ if (mas_find_rev_setup(mas, min, &entry))
+ return entry;
- /* Retries on dead nodes handled by mas_prev_entry */
- return mas_prev_entry(mas, min);
+ /* Retries on dead nodes handled by mas_prev_slot */
+ return mas_prev_slot(mas, min, true);
}
-EXPORT_SYMBOL_GPL(mas_find_rev);
+EXPORT_SYMBOL_GPL(mas_find_range_rev);
/**
* mas_erase() - Find the range in which index resides and erase the entire
@@ -6083,24 +5802,32 @@ EXPORT_SYMBOL_GPL(mas_find_rev);
void *mas_erase(struct ma_state *mas)
{
void *entry;
+ unsigned long index = mas->index;
MA_WR_STATE(wr_mas, mas, NULL);
- if (mas_is_none(mas) || mas_is_paused(mas))
- mas->node = MAS_START;
+ if (!mas_is_active(mas) || !mas_is_start(mas))
+ mas->status = ma_start;
- /* Retry unnecessary when holding the write lock. */
+write_retry:
entry = mas_state_walk(mas);
if (!entry)
return NULL;
-write_retry:
/* Must reset to ensure spanning writes of last slot are detected */
mas_reset(mas);
- mas_wr_store_setup(&wr_mas);
- mas_wr_store_entry(&wr_mas);
- if (mas_nomem(mas, GFP_KERNEL))
+ mas_wr_preallocate(&wr_mas, NULL);
+ if (mas_nomem(mas, GFP_KERNEL)) {
+ /* in case the range of entry changed when unlocked */
+ mas->index = mas->last = index;
goto write_retry;
+ }
+
+ if (mas_is_err(mas))
+ goto out;
+ mas_wr_store_entry(&wr_mas);
+out:
+ mas_destroy(mas);
return entry;
}
EXPORT_SYMBOL_GPL(mas_erase);
@@ -6113,12 +5840,10 @@ EXPORT_SYMBOL_GPL(mas_erase);
* Return: true on allocation, false otherwise.
*/
bool mas_nomem(struct ma_state *mas, gfp_t gfp)
- __must_hold(mas->tree->lock)
+ __must_hold(mas->tree->ma_lock)
{
- if (likely(mas->node != MA_ERROR(-ENOMEM))) {
- mas_destroy(mas);
+ if (likely(mas->node != MA_ERROR(-ENOMEM)))
return false;
- }
if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) {
mtree_unlock(mas->tree);
@@ -6128,18 +5853,23 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp)
mas_alloc_nodes(mas, gfp);
}
- if (!mas_allocated(mas))
+ if (!mas->sheaf && !mas->alloc)
return false;
- mas->node = MAS_START;
+ mas->status = ma_start;
return true;
}
void __init maple_tree_init(void)
{
+ struct kmem_cache_args args = {
+ .align = sizeof(struct maple_node),
+ .sheaf_capacity = 32,
+ };
+
maple_node_cache = kmem_cache_create("maple_node",
- sizeof(struct maple_node), sizeof(struct maple_node),
- SLAB_PANIC, NULL);
+ sizeof(struct maple_node), &args,
+ SLAB_PANIC);
}
/**
@@ -6154,7 +5884,7 @@ void *mtree_load(struct maple_tree *mt, unsigned long index)
MA_STATE(mas, mt, index, index);
void *entry;
- trace_ma_read(__func__, &mas);
+ trace_ma_read(TP_FCT, &mas);
rcu_read_lock();
retry:
entry = mas_start(&mas);
@@ -6195,9 +5925,9 @@ int mtree_store_range(struct maple_tree *mt, unsigned long index,
unsigned long last, void *entry, gfp_t gfp)
{
MA_STATE(mas, mt, index, last);
- MA_WR_STATE(wr_mas, &mas, entry);
+ int ret = 0;
- trace_ma_write(__func__, &mas, 0, entry);
+ trace_ma_write(TP_FCT, &mas, 0, entry);
if (WARN_ON_ONCE(xa_is_advanced(entry)))
return -EINVAL;
@@ -6205,16 +5935,10 @@ int mtree_store_range(struct maple_tree *mt, unsigned long index,
return -EINVAL;
mtree_lock(mt);
-retry:
- mas_wr_store_entry(&wr_mas);
- if (mas_nomem(&mas, gfp))
- goto retry;
-
+ ret = mas_store_gfp(&mas, entry, gfp);
mtree_unlock(mt);
- if (mas_is_err(&mas))
- return xa_err(mas.node);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(mtree_store_range);
@@ -6236,7 +5960,7 @@ int mtree_store(struct maple_tree *mt, unsigned long index, void *entry,
EXPORT_SYMBOL(mtree_store);
/**
- * mtree_insert_range() - Insert an entry at a give range if there is no value.
+ * mtree_insert_range() - Insert an entry at a given range if there is no value.
* @mt: The maple tree
* @first: The start of the range
* @last: The end of the range
@@ -6250,6 +5974,7 @@ int mtree_insert_range(struct maple_tree *mt, unsigned long first,
unsigned long last, void *entry, gfp_t gfp)
{
MA_STATE(ms, mt, first, last);
+ int ret = 0;
if (WARN_ON_ONCE(xa_is_advanced(entry)))
return -EINVAL;
@@ -6265,18 +5990,19 @@ retry:
mtree_unlock(mt);
if (mas_is_err(&ms))
- return xa_err(ms.node);
+ ret = xa_err(ms.node);
- return 0;
+ mas_destroy(&ms);
+ return ret;
}
EXPORT_SYMBOL(mtree_insert_range);
/**
- * mtree_insert() - Insert an entry at a give index if there is no value.
+ * mtree_insert() - Insert an entry at a given index if there is no value.
* @mt: The maple tree
* @index : The index to store the value
* @entry: The entry to store
- * @gfp: The FGP_FLAGS to use for allocations.
+ * @gfp: The GFP_FLAGS to use for allocations.
*
* Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
* request, -ENOMEM if memory could not be allocated.
@@ -6294,65 +6020,117 @@ int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
{
int ret = 0;
- MA_STATE(mas, mt, min, max - size);
+ MA_STATE(mas, mt, 0, 0);
if (!mt_is_alloc(mt))
return -EINVAL;
if (WARN_ON_ONCE(mt_is_reserved(entry)))
return -EINVAL;
- if (min > max)
- return -EINVAL;
-
- if (max < size)
- return -EINVAL;
-
- if (!size)
- return -EINVAL;
-
mtree_lock(mt);
retry:
- mas.offset = 0;
- mas.index = min;
- mas.last = max - size;
- ret = mas_alloc(&mas, entry, size, startp);
+ ret = mas_empty_area(&mas, min, max, size);
+ if (ret)
+ goto unlock;
+
+ mas_insert(&mas, entry);
+ /*
+ * mas_nomem() may release the lock, causing the allocated area
+ * to be unavailable, so try to allocate a free area again.
+ */
if (mas_nomem(&mas, gfp))
goto retry;
+ if (mas_is_err(&mas))
+ ret = xa_err(mas.node);
+ else
+ *startp = mas.index;
+
+unlock:
mtree_unlock(mt);
+ mas_destroy(&mas);
return ret;
}
EXPORT_SYMBOL(mtree_alloc_range);
-int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
- void *entry, unsigned long size, unsigned long min,
- unsigned long max, gfp_t gfp)
+/**
+ * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree.
+ * @mt: The maple tree.
+ * @startp: Pointer to ID.
+ * @range_lo: Lower bound of range to search.
+ * @range_hi: Upper bound of range to search.
+ * @entry: The entry to store.
+ * @next: Pointer to next ID to allocate.
+ * @gfp: The GFP_FLAGS to use for allocations.
+ *
+ * Finds an empty entry in @mt after @next, stores the new index into
+ * the @id pointer, stores the entry at that index, then updates @next.
+ *
+ * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag.
+ *
+ * Context: Any context. Takes and releases the mt.lock. May sleep if
+ * the @gfp flags permit.
+ *
+ * Return: 0 if the allocation succeeded without wrapping, 1 if the
+ * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no
+ * free entries.
+ */
+int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
+ void *entry, unsigned long range_lo, unsigned long range_hi,
+ unsigned long *next, gfp_t gfp)
{
- int ret = 0;
+ int ret;
+
+ MA_STATE(mas, mt, 0, 0);
- MA_STATE(mas, mt, min, max - size);
if (!mt_is_alloc(mt))
return -EINVAL;
-
if (WARN_ON_ONCE(mt_is_reserved(entry)))
return -EINVAL;
+ mtree_lock(mt);
+ ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi,
+ next, gfp);
+ mtree_unlock(mt);
+ return ret;
+}
+EXPORT_SYMBOL(mtree_alloc_cyclic);
- if (min >= max)
- return -EINVAL;
+int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
+ void *entry, unsigned long size, unsigned long min,
+ unsigned long max, gfp_t gfp)
+{
+ int ret = 0;
- if (max < size - 1)
+ MA_STATE(mas, mt, 0, 0);
+ if (!mt_is_alloc(mt))
return -EINVAL;
- if (!size)
+ if (WARN_ON_ONCE(mt_is_reserved(entry)))
return -EINVAL;
mtree_lock(mt);
retry:
- ret = mas_rev_alloc(&mas, min, max, entry, size, startp);
+ ret = mas_empty_area_rev(&mas, min, max, size);
+ if (ret)
+ goto unlock;
+
+ mas_insert(&mas, entry);
+ /*
+ * mas_nomem() may release the lock, causing the allocated area
+ * to be unavailable, so try to allocate a free area again.
+ */
if (mas_nomem(&mas, gfp))
goto retry;
+ if (mas_is_err(&mas))
+ ret = xa_err(mas.node);
+ else
+ *startp = mas.index;
+
+unlock:
mtree_unlock(mt);
+ mas_destroy(&mas);
return ret;
}
EXPORT_SYMBOL(mtree_alloc_rrange);
@@ -6372,7 +6150,7 @@ void *mtree_erase(struct maple_tree *mt, unsigned long index)
void *entry = NULL;
MA_STATE(mas, mt, index, index);
- trace_ma_op(__func__, &mas);
+ trace_ma_op(TP_FCT, &mas);
mtree_lock(mt);
entry = mas_erase(&mas);
@@ -6382,6 +6160,277 @@ void *mtree_erase(struct maple_tree *mt, unsigned long index)
}
EXPORT_SYMBOL(mtree_erase);
+/*
+ * mas_dup_free() - Free an incomplete duplication of a tree.
+ * @mas: The maple state of a incomplete tree.
+ *
+ * The parameter @mas->node passed in indicates that the allocation failed on
+ * this node. This function frees all nodes starting from @mas->node in the
+ * reverse order of mas_dup_build(). There is no need to hold the source tree
+ * lock at this time.
+ */
+static void mas_dup_free(struct ma_state *mas)
+{
+ struct maple_node *node;
+ enum maple_type type;
+ void __rcu **slots;
+ unsigned char count, i;
+
+ /* Maybe the first node allocation failed. */
+ if (mas_is_none(mas))
+ return;
+
+ while (!mte_is_root(mas->node)) {
+ mas_ascend(mas);
+ if (mas->offset) {
+ mas->offset--;
+ do {
+ mas_descend(mas);
+ mas->offset = mas_data_end(mas);
+ } while (!mte_is_leaf(mas->node));
+
+ mas_ascend(mas);
+ }
+
+ node = mte_to_node(mas->node);
+ type = mte_node_type(mas->node);
+ slots = ma_slots(node, type);
+ count = mas_data_end(mas) + 1;
+ for (i = 0; i < count; i++)
+ ((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK;
+ mt_free_bulk(count, slots);
+ }
+
+ node = mte_to_node(mas->node);
+ kfree(node);
+}
+
+/*
+ * mas_copy_node() - Copy a maple node and replace the parent.
+ * @mas: The maple state of source tree.
+ * @new_mas: The maple state of new tree.
+ * @parent: The parent of the new node.
+ *
+ * Copy @mas->node to @new_mas->node, set @parent to be the parent of
+ * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM.
+ */
+static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas,
+ struct maple_pnode *parent)
+{
+ struct maple_node *node = mte_to_node(mas->node);
+ struct maple_node *new_node = mte_to_node(new_mas->node);
+ unsigned long val;
+
+ /* Copy the node completely. */
+ memcpy(new_node, node, sizeof(struct maple_node));
+ /* Update the parent node pointer. */
+ val = (unsigned long)node->parent & MAPLE_NODE_MASK;
+ new_node->parent = ma_parent_ptr(val | (unsigned long)parent);
+}
+
+/*
+ * mas_dup_alloc() - Allocate child nodes for a maple node.
+ * @mas: The maple state of source tree.
+ * @new_mas: The maple state of new tree.
+ * @gfp: The GFP_FLAGS to use for allocations.
+ *
+ * This function allocates child nodes for @new_mas->node during the duplication
+ * process. If memory allocation fails, @mas is set to -ENOMEM.
+ */
+static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas,
+ gfp_t gfp)
+{
+ struct maple_node *node = mte_to_node(mas->node);
+ struct maple_node *new_node = mte_to_node(new_mas->node);
+ enum maple_type type;
+ unsigned char count, i;
+ void __rcu **slots;
+ void __rcu **new_slots;
+ unsigned long val;
+
+ /* Allocate memory for child nodes. */
+ type = mte_node_type(mas->node);
+ new_slots = ma_slots(new_node, type);
+ count = mas->node_request = mas_data_end(mas) + 1;
+ mas_alloc_nodes(mas, gfp);
+ if (unlikely(mas_is_err(mas)))
+ return;
+
+ slots = ma_slots(node, type);
+ for (i = 0; i < count; i++) {
+ val = (unsigned long)mt_slot_locked(mas->tree, slots, i);
+ val &= MAPLE_NODE_MASK;
+ new_slots[i] = ma_mnode_ptr((unsigned long)mas_pop_node(mas) |
+ val);
+ }
+}
+
+/*
+ * mas_dup_build() - Build a new maple tree from a source tree
+ * @mas: The maple state of source tree, need to be in MAS_START state.
+ * @new_mas: The maple state of new tree, need to be in MAS_START state.
+ * @gfp: The GFP_FLAGS to use for allocations.
+ *
+ * This function builds a new tree in DFS preorder. If the memory allocation
+ * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the
+ * last node. mas_dup_free() will free the incomplete duplication of a tree.
+ *
+ * Note that the attributes of the two trees need to be exactly the same, and the
+ * new tree needs to be empty, otherwise -EINVAL will be set in @mas.
+ */
+static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas,
+ gfp_t gfp)
+{
+ struct maple_node *node;
+ struct maple_pnode *parent = NULL;
+ struct maple_enode *root;
+ enum maple_type type;
+
+ if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) ||
+ unlikely(!mtree_empty(new_mas->tree))) {
+ mas_set_err(mas, -EINVAL);
+ return;
+ }
+
+ root = mas_start(mas);
+ if (mas_is_ptr(mas) || mas_is_none(mas))
+ goto set_new_tree;
+
+ node = mt_alloc_one(gfp);
+ if (!node) {
+ new_mas->status = ma_none;
+ mas_set_err(mas, -ENOMEM);
+ return;
+ }
+
+ type = mte_node_type(mas->node);
+ root = mt_mk_node(node, type);
+ new_mas->node = root;
+ new_mas->min = 0;
+ new_mas->max = ULONG_MAX;
+ root = mte_mk_root(root);
+ while (1) {
+ mas_copy_node(mas, new_mas, parent);
+ if (!mte_is_leaf(mas->node)) {
+ /* Only allocate child nodes for non-leaf nodes. */
+ mas_dup_alloc(mas, new_mas, gfp);
+ if (unlikely(mas_is_err(mas)))
+ goto empty_mas;
+ } else {
+ /*
+ * This is the last leaf node and duplication is
+ * completed.
+ */
+ if (mas->max == ULONG_MAX)
+ goto done;
+
+ /* This is not the last leaf node and needs to go up. */
+ do {
+ mas_ascend(mas);
+ mas_ascend(new_mas);
+ } while (mas->offset == mas_data_end(mas));
+
+ /* Move to the next subtree. */
+ mas->offset++;
+ new_mas->offset++;
+ }
+
+ mas_descend(mas);
+ parent = ma_parent_ptr(mte_to_node(new_mas->node));
+ mas_descend(new_mas);
+ mas->offset = 0;
+ new_mas->offset = 0;
+ }
+done:
+ /* Specially handle the parent of the root node. */
+ mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas));
+set_new_tree:
+ /* Make them the same height */
+ new_mas->tree->ma_flags = mas->tree->ma_flags;
+ rcu_assign_pointer(new_mas->tree->ma_root, root);
+empty_mas:
+ mas_empty_nodes(mas);
+}
+
+/**
+ * __mt_dup(): Duplicate an entire maple tree
+ * @mt: The source maple tree
+ * @new: The new maple tree
+ * @gfp: The GFP_FLAGS to use for allocations
+ *
+ * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
+ * traversal. It uses memcpy() to copy nodes in the source tree and allocate
+ * new child nodes in non-leaf nodes. The new node is exactly the same as the
+ * source node except for all the addresses stored in it. It will be faster than
+ * traversing all elements in the source tree and inserting them one by one into
+ * the new tree.
+ * The user needs to ensure that the attributes of the source tree and the new
+ * tree are the same, and the new tree needs to be an empty tree, otherwise
+ * -EINVAL will be returned.
+ * Note that the user needs to manually lock the source tree and the new tree.
+ *
+ * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
+ * the attributes of the two trees are different or the new tree is not an empty
+ * tree.
+ */
+int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
+{
+ int ret = 0;
+ MA_STATE(mas, mt, 0, 0);
+ MA_STATE(new_mas, new, 0, 0);
+
+ mas_dup_build(&mas, &new_mas, gfp);
+ if (unlikely(mas_is_err(&mas))) {
+ ret = xa_err(mas.node);
+ if (ret == -ENOMEM)
+ mas_dup_free(&new_mas);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(__mt_dup);
+
+/**
+ * mtree_dup(): Duplicate an entire maple tree
+ * @mt: The source maple tree
+ * @new: The new maple tree
+ * @gfp: The GFP_FLAGS to use for allocations
+ *
+ * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
+ * traversal. It uses memcpy() to copy nodes in the source tree and allocate
+ * new child nodes in non-leaf nodes. The new node is exactly the same as the
+ * source node except for all the addresses stored in it. It will be faster than
+ * traversing all elements in the source tree and inserting them one by one into
+ * the new tree.
+ * The user needs to ensure that the attributes of the source tree and the new
+ * tree are the same, and the new tree needs to be an empty tree, otherwise
+ * -EINVAL will be returned.
+ *
+ * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
+ * the attributes of the two trees are different or the new tree is not an empty
+ * tree.
+ */
+int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
+{
+ int ret = 0;
+ MA_STATE(mas, mt, 0, 0);
+ MA_STATE(new_mas, new, 0, 0);
+
+ mas_lock(&new_mas);
+ mas_lock_nested(&mas, SINGLE_DEPTH_NESTING);
+ mas_dup_build(&mas, &new_mas, gfp);
+ mas_unlock(&mas);
+ if (unlikely(mas_is_err(&mas))) {
+ ret = xa_err(mas.node);
+ if (ret == -ENOMEM)
+ mas_dup_free(&new_mas);
+ }
+
+ mas_unlock(&new_mas);
+ return ret;
+}
+EXPORT_SYMBOL(mtree_dup);
+
/**
* __mt_destroy() - Walk and free all nodes of a locked maple tree.
* @mt: The maple tree
@@ -6396,7 +6445,7 @@ void __mt_destroy(struct maple_tree *mt)
if (xa_is_node(root))
mte_destroy_walk(root, mt);
- mt->ma_flags = 0;
+ mt->ma_flags = mt_attr(mt);
}
EXPORT_SYMBOL_GPL(__mt_destroy);
@@ -6418,9 +6467,15 @@ EXPORT_SYMBOL(mtree_destroy);
* mt_find() - Search from the start up until an entry is found.
* @mt: The maple tree
* @index: Pointer which contains the start location of the search
- * @max: The maximum value to check
+ * @max: The maximum value of the search range
*
- * Handles locking. @index will be incremented to one beyond the range.
+ * Takes RCU read lock internally to protect the search, which does not
+ * protect the returned pointer after dropping RCU read lock.
+ * See also: Documentation/core-api/maple_tree.rst
+ *
+ * In case that an entry is found @index is updated to point to the next
+ * possible entry independent whether the found entry is occupying a
+ * single index or a range if indices.
*
* Return: The entry at or after the @index or %NULL
*/
@@ -6432,7 +6487,7 @@ void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max)
unsigned long copy = *index;
#endif
- trace_ma_read(__func__, &mas);
+ trace_ma_read(TP_FCT, &mas);
if ((*index) > max)
return NULL;
@@ -6449,8 +6504,8 @@ retry:
if (entry)
goto unlock;
- while (mas_searchable(&mas) && (mas.index < max)) {
- entry = mas_next_entry(&mas, max);
+ while (mas_is_active(&mas) && (mas.last < max)) {
+ entry = mas_next_slot(&mas, max, false);
if (likely(entry && !xa_is_zero(entry)))
break;
}
@@ -6462,10 +6517,9 @@ unlock:
if (likely(entry)) {
*index = mas.last + 1;
#ifdef CONFIG_DEBUG_MAPLE_TREE
- if ((*index) && (*index) <= copy)
+ if (MT_WARN_ON(mt, (*index) && ((*index) <= copy)))
pr_err("index not increased! %lx <= %lx\n",
*index, copy);
- MT_BUG_ON(mt, (*index) && ((*index) <= copy));
#endif
}
@@ -6479,7 +6533,9 @@ EXPORT_SYMBOL(mt_find);
* @index: Pointer which contains the start location of the search
* @max: The maximum value to check
*
- * Handles locking, detects wrapping on index == 0
+ * Same as mt_find() except that it checks @index for 0 before
+ * searching. If @index == 0, the search is aborted. This covers a wrap
+ * around of @index to 0 in an iterator loop.
*
* Return: The entry at or after the @index or %NULL
*/
@@ -6506,6 +6562,19 @@ void mt_set_non_kernel(unsigned int val)
kmem_cache_set_non_kernel(maple_node_cache, val);
}
+extern void kmem_cache_set_callback(struct kmem_cache *cachep,
+ void (*callback)(void *));
+void mt_set_callback(void (*callback)(void *))
+{
+ kmem_cache_set_callback(maple_node_cache, callback);
+}
+
+extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private);
+void mt_set_private(void *private)
+{
+ kmem_cache_set_private(maple_node_cache, private);
+}
+
extern unsigned long kmem_cache_get_alloc(struct kmem_cache *);
unsigned long mt_get_alloc_size(void)
{
@@ -6530,26 +6599,6 @@ unsigned int mt_nr_allocated(void)
return kmem_cache_nr_allocated(maple_node_cache);
}
-/*
- * mas_dead_node() - Check if the maple state is pointing to a dead node.
- * @mas: The maple state
- * @index: The index to restore in @mas.
- *
- * Used in test code.
- * Return: 1 if @mas has been reset to MAS_START, 0 otherwise.
- */
-static inline int mas_dead_node(struct ma_state *mas, unsigned long index)
-{
- if (unlikely(!mas_searchable(mas) || mas_is_start(mas)))
- return 0;
-
- if (likely(!mte_dead_node(mas->node)))
- return 0;
-
- mas_rewalk(mas, index);
- return 1;
-}
-
void mt_cache_shrink(void)
{
}
@@ -6584,87 +6633,15 @@ static inline struct maple_enode *mas_get_slot(struct ma_state *mas,
offset);
}
-
-/*
- * mas_first_entry() - Go the first leaf and find the first entry.
- * @mas: the maple state.
- * @limit: the maximum index to check.
- * @*r_start: Pointer to set to the range start.
- *
- * Sets mas->offset to the offset of the entry, r_start to the range minimum.
- *
- * Return: The first entry or MAS_NONE.
- */
-static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,
- unsigned long limit, enum maple_type mt)
-
-{
- unsigned long max;
- unsigned long *pivots;
- void __rcu **slots;
- void *entry = NULL;
-
- mas->index = mas->min;
- if (mas->index > limit)
- goto none;
-
- max = mas->max;
- mas->offset = 0;
- while (likely(!ma_is_leaf(mt))) {
- MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
- slots = ma_slots(mn, mt);
- pivots = ma_pivots(mn, mt);
- max = pivots[0];
- entry = mas_slot(mas, slots, 0);
- if (unlikely(ma_dead_node(mn)))
- return NULL;
- mas->node = entry;
- mn = mas_mn(mas);
- mt = mte_node_type(mas->node);
- }
- MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
-
- mas->max = max;
- slots = ma_slots(mn, mt);
- entry = mas_slot(mas, slots, 0);
- if (unlikely(ma_dead_node(mn)))
- return NULL;
-
- /* Slot 0 or 1 must be set */
- if (mas->index > limit)
- goto none;
-
- if (likely(entry))
- return entry;
-
- pivots = ma_pivots(mn, mt);
- mas->index = pivots[0] + 1;
- mas->offset = 1;
- entry = mas_slot(mas, slots, 1);
- if (unlikely(ma_dead_node(mn)))
- return NULL;
-
- if (mas->index > limit)
- goto none;
-
- if (likely(entry))
- return entry;
-
-none:
- if (likely(!ma_dead_node(mn)))
- mas->node = MAS_NONE;
- return NULL;
-}
-
/* Depth first search, post-order */
static void mas_dfs_postorder(struct ma_state *mas, unsigned long max)
{
- struct maple_enode *p = MAS_NONE, *mn = mas->node;
+ struct maple_enode *p, *mn = mas->node;
unsigned long p_min, p_max;
mas_next_node(mas, mas_mn(mas), max);
- if (!mas_is_none(mas))
+ if (!mas_is_overflow(mas))
return;
if (mte_is_root(mn))
@@ -6672,15 +6649,12 @@ static void mas_dfs_postorder(struct ma_state *mas, unsigned long max)
mas->node = mn;
mas_ascend(mas);
- while (mas->node != MAS_NONE) {
+ do {
p = mas->node;
p_min = mas->min;
p_max = mas->max;
mas_prev_node(mas, 0);
- }
-
- if (p == MAS_NONE)
- return;
+ } while (!mas_is_underflow(mas));
mas->node = p;
mas->max = p_max;
@@ -6689,36 +6663,47 @@ static void mas_dfs_postorder(struct ma_state *mas, unsigned long max)
/* Tree validations */
static void mt_dump_node(const struct maple_tree *mt, void *entry,
- unsigned long min, unsigned long max, unsigned int depth);
+ unsigned long min, unsigned long max, unsigned int depth,
+ enum mt_dump_format format);
static void mt_dump_range(unsigned long min, unsigned long max,
- unsigned int depth)
+ unsigned int depth, enum mt_dump_format format)
{
static const char spaces[] = " ";
- if (min == max)
- pr_info("%.*s%lu: ", depth * 2, spaces, min);
- else
- pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max);
+ switch(format) {
+ case mt_dump_hex:
+ if (min == max)
+ pr_info("%.*s%lx: ", depth * 2, spaces, min);
+ else
+ pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max);
+ break;
+ case mt_dump_dec:
+ if (min == max)
+ pr_info("%.*s%lu: ", depth * 2, spaces, min);
+ else
+ pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max);
+ }
}
static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
- unsigned int depth)
+ unsigned int depth, enum mt_dump_format format)
{
- mt_dump_range(min, max, depth);
+ mt_dump_range(min, max, depth, format);
if (xa_is_value(entry))
- pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry),
- xa_to_value(entry), entry);
+ pr_cont("value %ld (0x%lx) [" PTR_FMT "]\n", xa_to_value(entry),
+ xa_to_value(entry), entry);
else if (xa_is_zero(entry))
pr_cont("zero (%ld)\n", xa_to_internal(entry));
else if (mt_is_reserved(entry))
- pr_cont("UNKNOWN ENTRY (%p)\n", entry);
+ pr_cont("UNKNOWN ENTRY (" PTR_FMT ")\n", entry);
else
- pr_cont("%p\n", entry);
+ pr_cont(PTR_FMT "\n", entry);
}
static void mt_dump_range64(const struct maple_tree *mt, void *entry,
- unsigned long min, unsigned long max, unsigned int depth)
+ unsigned long min, unsigned long max, unsigned int depth,
+ enum mt_dump_format format)
{
struct maple_range_64 *node = &mte_to_node(entry)->mr64;
bool leaf = mte_is_leaf(entry);
@@ -6726,51 +6711,78 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry,
int i;
pr_cont(" contents: ");
- for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++)
- pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
- pr_cont("%p\n", node->slot[i]);
+ for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) {
+ switch(format) {
+ case mt_dump_hex:
+ pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]);
+ break;
+ case mt_dump_dec:
+ pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]);
+ }
+ }
+ pr_cont(PTR_FMT "\n", node->slot[i]);
for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) {
unsigned long last = max;
if (i < (MAPLE_RANGE64_SLOTS - 1))
last = node->pivot[i];
- else if (!node->slot[i] && max != mt_max[mte_node_type(entry)])
+ else if (!node->slot[i] && max != mt_node_max(entry))
break;
if (last == 0 && i > 0)
break;
if (leaf)
mt_dump_entry(mt_slot(mt, node->slot, i),
- first, last, depth + 1);
+ first, last, depth + 1, format);
else if (node->slot[i])
mt_dump_node(mt, mt_slot(mt, node->slot, i),
- first, last, depth + 1);
+ first, last, depth + 1, format);
if (last == max)
break;
if (last > max) {
- pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
+ switch(format) {
+ case mt_dump_hex:
+ pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n",
node, last, max, i);
- break;
+ break;
+ case mt_dump_dec:
+ pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n",
+ node, last, max, i);
+ }
}
first = last + 1;
}
}
static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
- unsigned long min, unsigned long max, unsigned int depth)
+ unsigned long min, unsigned long max, unsigned int depth,
+ enum mt_dump_format format)
{
struct maple_arange_64 *node = &mte_to_node(entry)->ma64;
- bool leaf = mte_is_leaf(entry);
unsigned long first = min;
int i;
pr_cont(" contents: ");
- for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++)
- pr_cont("%lu ", node->gap[i]);
+ for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
+ switch (format) {
+ case mt_dump_hex:
+ pr_cont("%lx ", node->gap[i]);
+ break;
+ case mt_dump_dec:
+ pr_cont("%lu ", node->gap[i]);
+ }
+ }
pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap);
- for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++)
- pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
- pr_cont("%p\n", node->slot[i]);
+ for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) {
+ switch (format) {
+ case mt_dump_hex:
+ pr_cont(PTR_FMT " %lX ", node->slot[i], node->pivot[i]);
+ break;
+ case mt_dump_dec:
+ pr_cont(PTR_FMT " %lu ", node->slot[i], node->pivot[i]);
+ }
+ }
+ pr_cont(PTR_FMT "\n", node->slot[i]);
for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
unsigned long last = max;
@@ -6780,35 +6792,39 @@ static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
break;
if (last == 0 && i > 0)
break;
- if (leaf)
- mt_dump_entry(mt_slot(mt, node->slot, i),
- first, last, depth + 1);
- else if (node->slot[i])
+ if (node->slot[i])
mt_dump_node(mt, mt_slot(mt, node->slot, i),
- first, last, depth + 1);
+ first, last, depth + 1, format);
if (last == max)
break;
if (last > max) {
- pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
+ switch(format) {
+ case mt_dump_hex:
+ pr_err("node " PTR_FMT " last (%lx) > max (%lx) at pivot %d!\n",
node, last, max, i);
- break;
+ break;
+ case mt_dump_dec:
+ pr_err("node " PTR_FMT " last (%lu) > max (%lu) at pivot %d!\n",
+ node, last, max, i);
+ }
}
first = last + 1;
}
}
static void mt_dump_node(const struct maple_tree *mt, void *entry,
- unsigned long min, unsigned long max, unsigned int depth)
+ unsigned long min, unsigned long max, unsigned int depth,
+ enum mt_dump_format format)
{
struct maple_node *node = mte_to_node(entry);
unsigned int type = mte_node_type(entry);
unsigned int i;
- mt_dump_range(min, max, depth);
+ mt_dump_range(min, max, depth, format);
- pr_cont("node %p depth %d type %d parent %p", node, depth, type,
- node ? node->parent : NULL);
+ pr_cont("node " PTR_FMT " depth %d type %d parent " PTR_FMT, node,
+ depth, type, node ? node->parent : NULL);
switch (type) {
case maple_dense:
pr_cont("\n");
@@ -6816,15 +6832,15 @@ static void mt_dump_node(const struct maple_tree *mt, void *entry,
if (min + i > max)
pr_cont("OUT OF RANGE: ");
mt_dump_entry(mt_slot(mt, node->slot, i),
- min + i, min + i, depth);
+ min + i, min + i, depth, format);
}
break;
case maple_leaf_64:
case maple_range_64:
- mt_dump_range64(mt, entry, min, max, depth);
+ mt_dump_range64(mt, entry, min, max, depth, format);
break;
case maple_arange_64:
- mt_dump_arange64(mt, entry, min, max, depth);
+ mt_dump_arange64(mt, entry, min, max, depth, format);
break;
default:
@@ -6832,16 +6848,18 @@ static void mt_dump_node(const struct maple_tree *mt, void *entry,
}
}
-void mt_dump(const struct maple_tree *mt)
+void mt_dump(const struct maple_tree *mt, enum mt_dump_format format)
{
void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt));
- pr_info("maple_tree(%p) flags %X, height %u root %p\n",
+ pr_info("maple_tree(" PTR_FMT ") flags %X, height %u root " PTR_FMT "\n",
mt, mt->ma_flags, mt_height(mt), entry);
- if (!xa_is_node(entry))
- mt_dump_entry(entry, 0, 0, 0);
+ if (xa_is_node(entry))
+ mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format);
else if (entry)
- mt_dump_node(mt, entry, 0, mt_max[mte_node_type(entry)], 0);
+ mt_dump_entry(entry, 0, 0, 0, format);
+ else
+ pr_info("(empty)\n");
}
EXPORT_SYMBOL_GPL(mt_dump);
@@ -6852,15 +6870,16 @@ EXPORT_SYMBOL_GPL(mt_dump);
static void mas_validate_gaps(struct ma_state *mas)
{
struct maple_enode *mte = mas->node;
- struct maple_node *p_mn;
+ struct maple_node *p_mn, *node = mte_to_node(mte);
+ enum maple_type mt = mte_node_type(mas->node);
unsigned long gap = 0, max_gap = 0;
unsigned long p_end, p_start = mas->min;
- unsigned char p_slot;
+ unsigned char p_slot, offset;
unsigned long *gaps = NULL;
- unsigned long *pivots = ma_pivots(mte_to_node(mte), mte_node_type(mte));
- int i;
+ unsigned long *pivots = ma_pivots(node, mt);
+ unsigned int i;
- if (ma_is_dense(mte_node_type(mte))) {
+ if (ma_is_dense(mt)) {
for (i = 0; i < mt_slot_count(mte); i++) {
if (mas_get_slot(mas, i)) {
if (gap > max_gap)
@@ -6873,79 +6892,86 @@ static void mas_validate_gaps(struct ma_state *mas)
goto counted;
}
- gaps = ma_gaps(mte_to_node(mte), mte_node_type(mte));
+ gaps = ma_gaps(node, mt);
for (i = 0; i < mt_slot_count(mte); i++) {
- p_end = mas_logical_pivot(mas, pivots, i, mte_node_type(mte));
+ p_end = mas_safe_pivot(mas, pivots, i, mt);
if (!gaps) {
- if (mas_get_slot(mas, i)) {
- gap = 0;
- goto not_empty;
- }
-
- gap += p_end - p_start + 1;
+ if (!mas_get_slot(mas, i))
+ gap = p_end - p_start + 1;
} else {
void *entry = mas_get_slot(mas, i);
gap = gaps[i];
- if (!entry) {
- if (gap != p_end - p_start + 1) {
- pr_err("%p[%u] -> %p %lu != %lu - %lu + 1\n",
- mas_mn(mas), i,
- mas_get_slot(mas, i), gap,
- p_end, p_start);
- mt_dump(mas->tree);
-
- MT_BUG_ON(mas->tree,
- gap != p_end - p_start + 1);
- }
- } else {
- if (gap > p_end - p_start + 1) {
- pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n",
- mas_mn(mas), i, gap, p_end, p_start,
- p_end - p_start + 1);
- MT_BUG_ON(mas->tree,
- gap > p_end - p_start + 1);
- }
+ MT_BUG_ON(mas->tree, !entry);
+
+ if (gap > p_end - p_start + 1) {
+ pr_err(PTR_FMT "[%u] %lu >= %lu - %lu + 1 (%lu)\n",
+ mas_mn(mas), i, gap, p_end, p_start,
+ p_end - p_start + 1);
+ MT_BUG_ON(mas->tree, gap > p_end - p_start + 1);
}
}
if (gap > max_gap)
max_gap = gap;
-not_empty:
+
p_start = p_end + 1;
if (p_end >= mas->max)
break;
}
counted:
+ if (mt == maple_arange_64) {
+ MT_BUG_ON(mas->tree, !gaps);
+ offset = ma_meta_gap(node);
+ if (offset > i) {
+ pr_err("gap offset " PTR_FMT "[%u] is invalid\n", node, offset);
+ MT_BUG_ON(mas->tree, 1);
+ }
+
+ if (gaps[offset] != max_gap) {
+ pr_err("gap " PTR_FMT "[%u] is not the largest gap %lu\n",
+ node, offset, max_gap);
+ MT_BUG_ON(mas->tree, 1);
+ }
+
+ for (i++ ; i < mt_slot_count(mte); i++) {
+ if (gaps[i] != 0) {
+ pr_err("gap " PTR_FMT "[%u] beyond node limit != 0\n",
+ node, i);
+ MT_BUG_ON(mas->tree, 1);
+ }
+ }
+ }
+
if (mte_is_root(mte))
return;
p_slot = mte_parent_slot(mas->node);
p_mn = mte_parent(mte);
MT_BUG_ON(mas->tree, max_gap > mas->max);
- if (ma_gaps(p_mn, mas_parent_enum(mas, mte))[p_slot] != max_gap) {
- pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap);
- mt_dump(mas->tree);
+ if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) {
+ pr_err("gap " PTR_FMT "[%u] != %lu\n", p_mn, p_slot, max_gap);
+ mt_dump(mas->tree, mt_dump_hex);
+ MT_BUG_ON(mas->tree, 1);
}
-
- MT_BUG_ON(mas->tree,
- ma_gaps(p_mn, mas_parent_enum(mas, mte))[p_slot] != max_gap);
}
static void mas_validate_parent_slot(struct ma_state *mas)
{
struct maple_node *parent;
struct maple_enode *node;
- enum maple_type p_type = mas_parent_enum(mas, mas->node);
- unsigned char p_slot = mte_parent_slot(mas->node);
+ enum maple_type p_type;
+ unsigned char p_slot;
void __rcu **slots;
int i;
if (mte_is_root(mas->node))
return;
+ p_slot = mte_parent_slot(mas->node);
+ p_type = mas_parent_type(mas, mas->node);
parent = mte_parent(mas->node);
slots = ma_slots(parent, p_type);
MT_BUG_ON(mas->tree, mas_mn(mas) == parent);
@@ -6956,11 +6982,11 @@ static void mas_validate_parent_slot(struct ma_state *mas)
node = mas_slot(mas, slots, i);
if (i == p_slot) {
if (node != mas->node)
- pr_err("parent %p[%u] does not have %p\n",
+ pr_err("parent " PTR_FMT "[%u] does not have " PTR_FMT "\n",
parent, i, mas_mn(mas));
MT_BUG_ON(mas->tree, node != mas->node);
} else if (node == mas->node) {
- pr_err("Invalid child %p at parent %p[%u] p_slot %u\n",
+ pr_err("Invalid child " PTR_FMT " at parent " PTR_FMT "[%u] p_slot %u\n",
mas_mn(mas), parent, i, p_slot);
MT_BUG_ON(mas->tree, node == mas->node);
}
@@ -6980,30 +7006,36 @@ static void mas_validate_child_slot(struct ma_state *mas)
for (i = 0; i < mt_slots[type]; i++) {
child = mas_slot(mas, slots, i);
- if (!pivots[i] || pivots[i] == mas->max)
- break;
- if (!child)
- break;
+ if (!child) {
+ pr_err("Non-leaf node lacks child at " PTR_FMT "[%u]\n",
+ mas_mn(mas), i);
+ MT_BUG_ON(mas->tree, 1);
+ }
if (mte_parent_slot(child) != i) {
- pr_err("Slot error at %p[%u]: child %p has pslot %u\n",
+ pr_err("Slot error at " PTR_FMT "[%u]: child " PTR_FMT " has pslot %u\n",
mas_mn(mas), i, mte_to_node(child),
mte_parent_slot(child));
MT_BUG_ON(mas->tree, 1);
}
if (mte_parent(child) != mte_to_node(mas->node)) {
- pr_err("child %p has parent %p not %p\n",
+ pr_err("child " PTR_FMT " has parent " PTR_FMT " not " PTR_FMT "\n",
mte_to_node(child), mte_parent(child),
mte_to_node(mas->node));
MT_BUG_ON(mas->tree, 1);
}
+
+ if (i < mt_pivots[type] && pivots[i] == mas->max)
+ break;
}
}
/*
- * Validate all pivots are within mas->min and mas->max.
+ * Validate all pivots are within mas->min and mas->max, check metadata ends
+ * where the maximum ends and ensure there is no slots or pivots set outside of
+ * the end of the data.
*/
static void mas_validate_limits(struct ma_state *mas)
{
@@ -7013,54 +7045,50 @@ static void mas_validate_limits(struct ma_state *mas)
void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
unsigned long *pivots = ma_pivots(mas_mn(mas), type);
- /* all limits are fine here. */
- if (mte_is_root(mas->node))
- return;
-
for (i = 0; i < mt_slots[type]; i++) {
unsigned long piv;
piv = mas_safe_pivot(mas, pivots, i, type);
- if (!piv && (i != 0))
- break;
-
- if (!mte_is_leaf(mas->node)) {
- void *entry = mas_slot(mas, slots, i);
-
- if (!entry)
- pr_err("%p[%u] cannot be null\n",
- mas_mn(mas), i);
-
- MT_BUG_ON(mas->tree, !entry);
+ if (!piv && (i != 0)) {
+ pr_err("Missing node limit pivot at " PTR_FMT "[%u]",
+ mas_mn(mas), i);
+ MAS_WARN_ON(mas, 1);
}
if (prev_piv > piv) {
- pr_err("%p[%u] piv %lu < prev_piv %lu\n",
+ pr_err(PTR_FMT "[%u] piv %lu < prev_piv %lu\n",
mas_mn(mas), i, piv, prev_piv);
- MT_BUG_ON(mas->tree, piv < prev_piv);
+ MAS_WARN_ON(mas, piv < prev_piv);
}
if (piv < mas->min) {
- pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i,
+ pr_err(PTR_FMT "[%u] %lu < %lu\n", mas_mn(mas), i,
piv, mas->min);
- MT_BUG_ON(mas->tree, piv < mas->min);
+ MAS_WARN_ON(mas, piv < mas->min);
}
if (piv > mas->max) {
- pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i,
+ pr_err(PTR_FMT "[%u] %lu > %lu\n", mas_mn(mas), i,
piv, mas->max);
- MT_BUG_ON(mas->tree, piv > mas->max);
+ MAS_WARN_ON(mas, piv > mas->max);
}
prev_piv = piv;
if (piv == mas->max)
break;
}
+
+ if (mas_data_end(mas) != i) {
+ pr_err("node" PTR_FMT ": data_end %u != the last slot offset %u\n",
+ mas_mn(mas), mas_data_end(mas), i);
+ MT_BUG_ON(mas->tree, 1);
+ }
+
for (i += 1; i < mt_slots[type]; i++) {
void *entry = mas_slot(mas, slots, i);
if (entry && (i != mt_slots[type] - 1)) {
- pr_err("%p[%u] should not have entry %p\n", mas_mn(mas),
- i, entry);
+ pr_err(PTR_FMT "[%u] should not have entry " PTR_FMT "\n",
+ mas_mn(mas), i, entry);
MT_BUG_ON(mas->tree, entry != NULL);
}
@@ -7070,9 +7098,9 @@ static void mas_validate_limits(struct ma_state *mas)
if (!piv)
continue;
- pr_err("%p[%u] should not have piv %lu\n",
+ pr_err(PTR_FMT "[%u] should not have piv %lu\n",
mas_mn(mas), i, piv);
- MT_BUG_ON(mas->tree, i < mt_pivots[type] - 1);
+ MAS_WARN_ON(mas, i < mt_pivots[type] - 1);
}
}
}
@@ -7085,7 +7113,7 @@ static void mt_validate_nulls(struct maple_tree *mt)
MA_STATE(mas, mt, 0, 0);
mas_start(&mas);
- if (mas_is_none(&mas) || (mas.node == MAS_ROOT))
+ if (mas_is_none(&mas) || (mas_is_ptr(&mas)))
return;
while (!mte_is_leaf(mas.node))
@@ -7095,14 +7123,14 @@ static void mt_validate_nulls(struct maple_tree *mt)
do {
entry = mas_slot(&mas, slots, offset);
if (!last && !entry) {
- pr_err("Sequential nulls end at %p[%u]\n",
+ pr_err("Sequential nulls end at " PTR_FMT "[%u]\n",
mas_mn(&mas), offset);
}
MT_BUG_ON(mt, !last && !entry);
last = entry;
if (offset == mas_data_end(&mas)) {
mas_next_node(&mas, mas_mn(&mas), ULONG_MAX);
- if (mas_is_none(&mas))
+ if (mas_is_overflow(&mas))
return;
offset = 0;
slots = ma_slots(mte_to_node(mas.node),
@@ -7111,7 +7139,7 @@ static void mt_validate_nulls(struct maple_tree *mt)
offset++;
}
- } while (!mas_is_none(&mas));
+ } while (!mas_is_overflow(&mas));
}
/*
@@ -7120,40 +7148,121 @@ static void mt_validate_nulls(struct maple_tree *mt)
* 2. The gap is correctly set in the parents
*/
void mt_validate(struct maple_tree *mt)
+ __must_hold(mas->tree->ma_lock)
{
unsigned char end;
MA_STATE(mas, mt, 0, 0);
- rcu_read_lock();
mas_start(&mas);
- if (!mas_searchable(&mas))
- goto done;
+ if (!mas_is_active(&mas))
+ return;
- mas_first_entry(&mas, mas_mn(&mas), ULONG_MAX, mte_node_type(mas.node));
- while (!mas_is_none(&mas)) {
- MT_BUG_ON(mas.tree, mte_dead_node(mas.node));
- if (!mte_is_root(mas.node)) {
- end = mas_data_end(&mas);
- if ((end < mt_min_slot_count(mas.node)) &&
- (mas.max != ULONG_MAX)) {
- pr_err("Invalid size %u of %p\n", end,
- mas_mn(&mas));
- MT_BUG_ON(mas.tree, 1);
- }
+ while (!mte_is_leaf(mas.node))
+ mas_descend(&mas);
+ while (!mas_is_overflow(&mas)) {
+ MAS_WARN_ON(&mas, mte_dead_node(mas.node));
+ end = mas_data_end(&mas);
+ if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) &&
+ (!mte_is_root(mas.node)))) {
+ pr_err("Invalid size %u of " PTR_FMT "\n",
+ end, mas_mn(&mas));
}
+
mas_validate_parent_slot(&mas);
- mas_validate_child_slot(&mas);
mas_validate_limits(&mas);
+ mas_validate_child_slot(&mas);
if (mt_is_alloc(mt))
mas_validate_gaps(&mas);
mas_dfs_postorder(&mas, ULONG_MAX);
}
mt_validate_nulls(mt);
-done:
- rcu_read_unlock();
-
}
EXPORT_SYMBOL_GPL(mt_validate);
+void mas_dump(const struct ma_state *mas)
+{
+ pr_err("MAS: tree=" PTR_FMT " enode=" PTR_FMT " ",
+ mas->tree, mas->node);
+ switch (mas->status) {
+ case ma_active:
+ pr_err("(ma_active)");
+ break;
+ case ma_none:
+ pr_err("(ma_none)");
+ break;
+ case ma_root:
+ pr_err("(ma_root)");
+ break;
+ case ma_start:
+ pr_err("(ma_start) ");
+ break;
+ case ma_pause:
+ pr_err("(ma_pause) ");
+ break;
+ case ma_overflow:
+ pr_err("(ma_overflow) ");
+ break;
+ case ma_underflow:
+ pr_err("(ma_underflow) ");
+ break;
+ case ma_error:
+ pr_err("(ma_error) ");
+ break;
+ }
+
+ pr_err("Store Type: ");
+ switch (mas->store_type) {
+ case wr_invalid:
+ pr_err("invalid store type\n");
+ break;
+ case wr_new_root:
+ pr_err("new_root\n");
+ break;
+ case wr_store_root:
+ pr_err("store_root\n");
+ break;
+ case wr_exact_fit:
+ pr_err("exact_fit\n");
+ break;
+ case wr_split_store:
+ pr_err("split_store\n");
+ break;
+ case wr_slot_store:
+ pr_err("slot_store\n");
+ break;
+ case wr_append:
+ pr_err("append\n");
+ break;
+ case wr_node_store:
+ pr_err("node_store\n");
+ break;
+ case wr_spanning_store:
+ pr_err("spanning_store\n");
+ break;
+ case wr_rebalance:
+ pr_err("rebalance\n");
+ break;
+ }
+
+ pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end,
+ mas->index, mas->last);
+ pr_err(" min=%lx max=%lx sheaf=" PTR_FMT ", request %lu depth=%u, flags=%x\n",
+ mas->min, mas->max, mas->sheaf, mas->node_request, mas->depth,
+ mas->mas_flags);
+ if (mas->index > mas->last)
+ pr_err("Check index & last\n");
+}
+EXPORT_SYMBOL_GPL(mas_dump);
+
+void mas_wr_dump(const struct ma_wr_state *wr_mas)
+{
+ pr_err("WR_MAS: node=" PTR_FMT " r_min=%lx r_max=%lx\n",
+ wr_mas->node, wr_mas->r_min, wr_mas->r_max);
+ pr_err(" type=%u off_end=%u, node_end=%u, end_piv=%lx\n",
+ wr_mas->type, wr_mas->offset_end, wr_mas->mas->end,
+ wr_mas->end_piv);
+}
+EXPORT_SYMBOL_GPL(mas_wr_dump);
+
#endif /* CONFIG_DEBUG_MAPLE_TREE */