diff options
author | Keir Fraser <keir@xen.org> | 2011-05-09 09:25:23 +0100 |
---|---|---|
committer | Keir Fraser <keir@xen.org> | 2011-05-09 09:25:23 +0100 |
commit | 8dc6738dbb3c6117fb7e30617609f6d41e19d3b4 (patch) | |
tree | a59eea5fb0832e92f0c51c06807821db76a252a4 /xen/common/radix-tree.c | |
parent | 31915b333f5413d3be1f9f90aace5bf8b3cedab0 (diff) | |
download | xen-8dc6738dbb3c6117fb7e30617609f6d41e19d3b4.tar.gz xen-8dc6738dbb3c6117fb7e30617609f6d41e19d3b4.tar.bz2 xen-8dc6738dbb3c6117fb7e30617609f6d41e19d3b4.zip |
Update radix-tree.[ch] from upstream Linux to gain RCU awareness.
We still leave behind features we don't need such as tagged nodes.
Other changes:
- Allow callers to define their own node alloc routines.
- Only allocate per-node rcu_head when using the default RCU-safe
alloc routines.
- Keep our own radix_tree_destroy().
In future it may also be worth getting rid of the complex and
pointless special-casing of radix-tree height==0, in which a single
data item can be stored directly in radix_tree_root. It introduces a
whole lot of special cases and complicates RCU handling. If we get rid
of it we can reclaim the 'indirect pointer' tag in bit 0 of every slot
entry.
Signed-off-by: Keir Fraser <keir@xen.org>
Diffstat (limited to 'xen/common/radix-tree.c')
-rw-r--r-- | xen/common/radix-tree.c | 1001 |
1 files changed, 655 insertions, 346 deletions
diff --git a/xen/common/radix-tree.c b/xen/common/radix-tree.c index e6e213c0a0..6f2d8b68cd 100644 --- a/xen/common/radix-tree.c +++ b/xen/common/radix-tree.c @@ -1,7 +1,8 @@ /* * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig - * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> + * Copyright (C) 2005 SGI, Christoph Lameter + * Copyright (C) 2006 Nick Piggin * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -18,432 +19,740 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/* - * Copyright (C) 2009 adaption for Xen tmem by Dan Magenheimer, Oracle Corp. - * Changed: - * o Linux 2.6.18 source used (prior to read-copy-update addition) - * o constants and data structures moved out to radix-tree.h header - * o tagging code removed - * o radix_tree_insert has func parameter for dynamic data struct allocation - * o radix_tree_destroy added (including recursive helper function) - * o __init functions must be called explicitly - * o other include files adapted to Xen - */ - #include <xen/config.h> #include <xen/init.h> -#include <xen/lib.h> -#include <xen/types.h> -#include <xen/errno.h> #include <xen/radix-tree.h> -#include <asm/cache.h> +struct radix_tree_path { + struct radix_tree_node *node; + int offset; +}; + +#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) +#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ + RADIX_TREE_MAP_SHIFT)) + +/* + * The height_to_maxindex array needs to be one deeper than the maximum + * path as height 0 holds only 1 entry. + */ static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; +static inline void *ptr_to_indirect(void *ptr) +{ + return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR); +} + +static inline void *indirect_to_ptr(void *ptr) +{ + return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); +} + +struct rcu_node { + struct radix_tree_node node; + struct rcu_head rcu_head; +}; + +static struct radix_tree_node *rcu_node_alloc(void *arg) +{ + struct rcu_node *rcu_node = xmalloc(struct rcu_node); + return rcu_node ? &rcu_node->node : NULL; +} + +static void _rcu_node_free(struct rcu_head *head) +{ + struct rcu_node *rcu_node = + container_of(head, struct rcu_node, rcu_head); + xfree(rcu_node); +} + +static void rcu_node_free(struct radix_tree_node *node, void *arg) +{ + struct rcu_node *rcu_node = container_of(node, struct rcu_node, node); + call_rcu(&rcu_node->rcu_head, _rcu_node_free); +} + +static struct radix_tree_node *radix_tree_node_alloc( + struct radix_tree_root *root) +{ + struct radix_tree_node *ret; + ret = root->node_alloc(root->node_alloc_free_arg); + if (ret) + memset(ret, 0, sizeof(*ret)); + return ret; +} + +static void radix_tree_node_free( + struct radix_tree_root *root, struct radix_tree_node *node) +{ + root->node_free(node, root->node_alloc_free_arg); +} + /* - * Return the maximum key which can be store into a - * radix tree with height HEIGHT. + * Return the maximum key which can be store into a + * radix tree with height HEIGHT. */ static inline unsigned long radix_tree_maxindex(unsigned int height) { - return height_to_maxindex[height]; + return height_to_maxindex[height]; } /* - * Extend a radix tree so it can store key @index. + * Extend a radix tree so it can store key @index. */ -static int radix_tree_extend(struct radix_tree_root *root, unsigned long index, - struct radix_tree_node *(*node_alloc)(void *), void *arg) +static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) { - struct radix_tree_node *node; - unsigned int height; - - /* Figure out what the height should be. */ - height = root->height + 1; - if (index > radix_tree_maxindex(height)) - while (index > radix_tree_maxindex(height)) - height++; - - if (root->rnode == NULL) { - root->height = height; - goto out; - } - - do { - if (!(node = node_alloc(arg))) - return -ENOMEM; - - /* Increase the height. */ - node->slots[0] = root->rnode; - - node->count = 1; - root->rnode = node; - root->height++; - } while (height > root->height); - out: - return 0; + struct radix_tree_node *node; + unsigned int height; + + /* Figure out what the height should be. */ + height = root->height + 1; + while (index > radix_tree_maxindex(height)) + height++; + + if (root->rnode == NULL) { + root->height = height; + goto out; + } + + do { + unsigned int newheight; + if (!(node = radix_tree_node_alloc(root))) + return -ENOMEM; + + /* Increase the height. */ + node->slots[0] = indirect_to_ptr(root->rnode); + + newheight = root->height+1; + node->height = newheight; + node->count = 1; + node = ptr_to_indirect(node); + rcu_assign_pointer(root->rnode, node); + root->height = newheight; + } while (height > root->height); +out: + return 0; } /** - * radix_tree_insert - insert into a radix tree - * @root: radix tree root - * @index: index key - * @item: item to insert + * radix_tree_insert - insert into a radix tree + * @root: radix tree root + * @index: index key + * @item: item to insert * - * Insert an item into the radix tree at position @index. + * Insert an item into the radix tree at position @index. */ -int radix_tree_insert(struct radix_tree_root *root, unsigned long index, - void *item, struct radix_tree_node *(*node_alloc)(void *), void *arg) +int radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) { - struct radix_tree_node *node = NULL, *slot; - unsigned int height, shift; - int offset; - int error; - - /* Make sure the tree is high enough. */ - if (index > radix_tree_maxindex(root->height)) { - error = radix_tree_extend(root, index, node_alloc, arg); - if (error) - return error; - } - - slot = root->rnode; - height = root->height; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - - offset = 0; /* uninitialised var warning */ - while (height > 0) { - if (slot == NULL) { - /* Have to add a child node. */ - if (!(slot = node_alloc(arg))) - return -ENOMEM; - if (node) { - - node->slots[offset] = slot; - node->count++; - } else - root->rnode = slot; - } - - /* Go a level down */ - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - node = slot; - slot = node->slots[offset]; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } - - if (slot != NULL) - return -EEXIST; - - if (node) { - node->count++; - node->slots[offset] = item; - } else { - root->rnode = item; - } - - return 0; + struct radix_tree_node *node = NULL, *slot; + unsigned int height, shift; + int offset; + int error; + + BUG_ON(radix_tree_is_indirect_ptr(item)); + + /* Make sure the tree is high enough. */ + if (index > radix_tree_maxindex(root->height)) { + error = radix_tree_extend(root, index); + if (error) + return error; + } + + slot = indirect_to_ptr(root->rnode); + + height = root->height; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + + offset = 0; /* uninitialised var warning */ + while (height > 0) { + if (slot == NULL) { + /* Have to add a child node. */ + if (!(slot = radix_tree_node_alloc(root))) + return -ENOMEM; + slot->height = height; + if (node) { + rcu_assign_pointer(node->slots[offset], slot); + node->count++; + } else + rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); + } + + /* Go a level down */ + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + node = slot; + slot = node->slots[offset]; + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } + + if (slot != NULL) + return -EEXIST; + + if (node) { + node->count++; + rcu_assign_pointer(node->slots[offset], item); + } else { + rcu_assign_pointer(root->rnode, item); + } + + return 0; } EXPORT_SYMBOL(radix_tree_insert); -static inline void **__lookup_slot(struct radix_tree_root *root, - unsigned long index) +/* + * is_slot == 1 : search for the slot. + * is_slot == 0 : search for the node. + */ +static void *radix_tree_lookup_element(struct radix_tree_root *root, + unsigned long index, int is_slot) { - unsigned int height, shift; - struct radix_tree_node **slot; - - height = root->height; - - if (index > radix_tree_maxindex(height)) - return NULL; - - if (height == 0 && root->rnode) - return (void **)&root->rnode; - - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = &root->rnode; - - while (height > 0) { - if (*slot == NULL) - return NULL; - - slot = (struct radix_tree_node **) - ((*slot)->slots + - ((index >> shift) & RADIX_TREE_MAP_MASK)); - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } - - return (void **)slot; + unsigned int height, shift; + struct radix_tree_node *node, **slot; + + node = rcu_dereference(root->rnode); + if (node == NULL) + return NULL; + + if (!radix_tree_is_indirect_ptr(node)) { + if (index > 0) + return NULL; + return is_slot ? (void *)&root->rnode : node; + } + node = indirect_to_ptr(node); + + height = node->height; + if (index > radix_tree_maxindex(height)) + return NULL; + + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + + do { + slot = (struct radix_tree_node **) + (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); + node = rcu_dereference(*slot); + if (node == NULL) + return NULL; + + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } while (height > 0); + + return is_slot ? (void *)slot : indirect_to_ptr(node); } /** - * radix_tree_lookup_slot - lookup a slot in a radix tree - * @root: radix tree root - * @index: index key + * radix_tree_lookup_slot - lookup a slot in a radix tree + * @root: radix tree root + * @index: index key + * + * Returns: the slot corresponding to the position @index in the + * radix tree @root. This is useful for update-if-exists operations. * - * Lookup the slot corresponding to the position @index in the radix tree - * @root. This is useful for update-if-exists operations. + * This function can be called under rcu_read_lock iff the slot is not + * modified by radix_tree_replace_slot, otherwise it must be called + * exclusive from other writers. Any dereference of the slot must be done + * using radix_tree_deref_slot. */ void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) { - return __lookup_slot(root, index); + return (void **)radix_tree_lookup_element(root, index, 1); } EXPORT_SYMBOL(radix_tree_lookup_slot); /** - * radix_tree_lookup - perform lookup operation on a radix tree - * @root: radix tree root - * @index: index key + * radix_tree_lookup - perform lookup operation on a radix tree + * @root: radix tree root + * @index: index key * - * Lookup the item at the position @index in the radix tree @root. + * Lookup the item at the position @index in the radix tree @root. + * + * This function can be called under rcu_read_lock, however the caller + * must manage lifetimes of leaf nodes (eg. RCU may also be used to free + * them safely). No RCU barriers are required to access or modify the + * returned item, however. */ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) { - void **slot; - - slot = __lookup_slot(root, index); - return slot != NULL ? *slot : NULL; + return radix_tree_lookup_element(root, index, 0); } EXPORT_SYMBOL(radix_tree_lookup); +/** + * radix_tree_next_hole - find the next hole (not-present entry) + * @root: tree root + * @index: index key + * @max_scan: maximum range to search + * + * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest + * indexed hole. + * + * Returns: the index of the hole if found, otherwise returns an index + * outside of the set specified (in which case 'return - index >= max_scan' + * will be true). In rare cases of index wrap-around, 0 will be returned. + * + * radix_tree_next_hole may be called under rcu_read_lock. However, like + * radix_tree_gang_lookup, this will not atomically search a snapshot of + * the tree at a single point in time. For example, if a hole is created + * at index 5, then subsequently a hole is created at index 10, + * radix_tree_next_hole covering both indexes may return 10 if called + * under rcu_read_lock. + */ +unsigned long radix_tree_next_hole(struct radix_tree_root *root, + unsigned long index, unsigned long max_scan) +{ + unsigned long i; + + for (i = 0; i < max_scan; i++) { + if (!radix_tree_lookup(root, index)) + break; + index++; + if (index == 0) + break; + } + + return index; +} +EXPORT_SYMBOL(radix_tree_next_hole); + +/** + * radix_tree_prev_hole - find the prev hole (not-present entry) + * @root: tree root + * @index: index key + * @max_scan: maximum range to search + * + * Search backwards in the range [max(index-max_scan+1, 0), index] + * for the first hole. + * + * Returns: the index of the hole if found, otherwise returns an index + * outside of the set specified (in which case 'index - return >= max_scan' + * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. + * + * radix_tree_next_hole may be called under rcu_read_lock. However, like + * radix_tree_gang_lookup, this will not atomically search a snapshot of + * the tree at a single point in time. For example, if a hole is created + * at index 10, then subsequently a hole is created at index 5, + * radix_tree_prev_hole covering both indexes may return 5 if called under + * rcu_read_lock. + */ +unsigned long radix_tree_prev_hole(struct radix_tree_root *root, + unsigned long index, unsigned long max_scan) +{ + unsigned long i; + + for (i = 0; i < max_scan; i++) { + if (!radix_tree_lookup(root, index)) + break; + index--; + if (index == ULONG_MAX) + break; + } + + return index; +} +EXPORT_SYMBOL(radix_tree_prev_hole); + static unsigned int -__lookup(struct radix_tree_root *root, void **results, unsigned long index, - unsigned int max_items, unsigned long *next_index) +__lookup(struct radix_tree_node *slot, void ***results, unsigned long index, + unsigned int max_items, unsigned long *next_index) { - unsigned int nr_found = 0; - unsigned int shift, height; - struct radix_tree_node *slot; - unsigned long i; - - height = root->height; - if (index > radix_tree_maxindex(height)) - if (height == 0) { - if (root->rnode && index == 0) - results[nr_found++] = root->rnode; - goto out; - } - - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; - - for ( ; height > 1; height--) { - - for (i = (index >> shift) & RADIX_TREE_MAP_MASK ; - i < RADIX_TREE_MAP_SIZE; i++) { - if (slot->slots[i] != NULL) - break; - index &= ~((1UL << shift) - 1); - index += 1UL << shift; - if (index == 0) - goto out; /* 32-bit wraparound */ - } - if (i == RADIX_TREE_MAP_SIZE) - goto out; - - shift -= RADIX_TREE_MAP_SHIFT; - slot = slot->slots[i]; - } - - /* Bottom level: grab some items */ - for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { - index++; - if (slot->slots[i]) { - results[nr_found++] = slot->slots[i]; - if (nr_found == max_items) - goto out; - } - } - out: - *next_index = index; - return nr_found; + unsigned int nr_found = 0; + unsigned int shift, height; + unsigned long i; + + height = slot->height; + if (height == 0) + goto out; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + + for ( ; height > 1; height--) { + i = (index >> shift) & RADIX_TREE_MAP_MASK; + for (;;) { + if (slot->slots[i] != NULL) + break; + index &= ~((1UL << shift) - 1); + index += 1UL << shift; + if (index == 0) + goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; + } + + shift -= RADIX_TREE_MAP_SHIFT; + slot = rcu_dereference(slot->slots[i]); + if (slot == NULL) + goto out; + } + + /* Bottom level: grab some items */ + for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { + index++; + if (slot->slots[i]) { + results[nr_found++] = &(slot->slots[i]); + if (nr_found == max_items) + goto out; + } + } +out: + *next_index = index; + return nr_found; } /** - * radix_tree_gang_lookup - perform multiple lookup on a radix tree - * @root: radix tree root - * @results: where the results of the lookup are placed - * @first_index: start the lookup from this key - * @max_items: place up to this many items at *results + * radix_tree_gang_lookup - perform multiple lookup on a radix tree + * @root: radix tree root + * @results: where the results of the lookup are placed + * @first_index: start the lookup from this key + * @max_items: place up to this many items at *results * - * Performs an index-ascending scan of the tree for present items. Places - * them at *@results and returns the number of items which were placed at - * *@results. + * Performs an index-ascending scan of the tree for present items. Places + * them at *@results and returns the number of items which were placed at + * *@results. * - * The implementation is naive. + * The implementation is naive. + * + * Like radix_tree_lookup, radix_tree_gang_lookup may be called under + * rcu_read_lock. In this case, rather than the returned results being + * an atomic snapshot of the tree at a single point in time, the semantics + * of an RCU protected gang lookup are as though multiple radix_tree_lookups + * have been issued in individual locks, and results stored in 'results'. */ unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items) + unsigned long first_index, unsigned int max_items) { - const unsigned long max_index = radix_tree_maxindex(root->height); - unsigned long cur_index = first_index; - unsigned int ret = 0; - - while (ret < max_items) { - unsigned int nr_found; - unsigned long next_index; /* Index of next search */ - - if (cur_index > max_index) - break; - nr_found = __lookup(root, results + ret, cur_index, - max_items - ret, &next_index); - ret += nr_found; - if (next_index == 0) - break; - cur_index = next_index; - } - return ret; + unsigned long max_index; + struct radix_tree_node *node; + unsigned long cur_index = first_index; + unsigned int ret; + + node = rcu_dereference(root->rnode); + if (!node) + return 0; + + if (!radix_tree_is_indirect_ptr(node)) { + if (first_index > 0) + return 0; + results[0] = node; + return 1; + } + node = indirect_to_ptr(node); + + max_index = radix_tree_maxindex(node->height); + + ret = 0; + while (ret < max_items) { + unsigned int nr_found, slots_found, i; + unsigned long next_index; /* Index of next search */ + + if (cur_index > max_index) + break; + slots_found = __lookup(node, (void ***)results + ret, cur_index, + max_items - ret, &next_index); + nr_found = 0; + for (i = 0; i < slots_found; i++) { + struct radix_tree_node *slot; + slot = *(((void ***)results)[ret + i]); + if (!slot) + continue; + results[ret + nr_found] = + indirect_to_ptr(rcu_dereference(slot)); + nr_found++; + } + ret += nr_found; + if (next_index == 0) + break; + cur_index = next_index; + } + + return ret; } EXPORT_SYMBOL(radix_tree_gang_lookup); /** - * radix_tree_shrink - shrink height of a radix tree to minimal - * @root radix tree root + * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree + * @root: radix tree root + * @results: where the results of the lookup are placed + * @first_index: start the lookup from this key + * @max_items: place up to this many items at *results + * + * Performs an index-ascending scan of the tree for present items. Places + * their slots at *@results and returns the number of items which were + * placed at *@results. + * + * The implementation is naive. + * + * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must + * be dereferenced with radix_tree_deref_slot, and if using only RCU + * protection, radix_tree_deref_slot may fail requiring a retry. */ -static inline void radix_tree_shrink(struct radix_tree_root *root, - void (*node_free)(struct radix_tree_node *)) +unsigned int +radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, + unsigned long first_index, unsigned int max_items) { - /* try to shrink tree height */ - while (root->height > 0 && - root->rnode->count == 1 && - root->rnode->slots[0]) { - struct radix_tree_node *to_free = root->rnode; - - root->rnode = to_free->slots[0]; - root->height--; - to_free->slots[0] = NULL; - to_free->count = 0; - node_free(to_free); - } + unsigned long max_index; + struct radix_tree_node *node; + unsigned long cur_index = first_index; + unsigned int ret; + + node = rcu_dereference(root->rnode); + if (!node) + return 0; + + if (!radix_tree_is_indirect_ptr(node)) { + if (first_index > 0) + return 0; + results[0] = (void **)&root->rnode; + return 1; + } + node = indirect_to_ptr(node); + + max_index = radix_tree_maxindex(node->height); + + ret = 0; + while (ret < max_items) { + unsigned int slots_found; + unsigned long next_index; /* Index of next search */ + + if (cur_index > max_index) + break; + slots_found = __lookup(node, results + ret, cur_index, + max_items - ret, &next_index); + ret += slots_found; + if (next_index == 0) + break; + cur_index = next_index; + } + + return ret; } +EXPORT_SYMBOL(radix_tree_gang_lookup_slot); /** - * radix_tree_delete - delete an item from a radix tree - * @root: radix tree root - * @index: index key + * radix_tree_shrink - shrink height of a radix tree to minimal + * @root radix tree root + */ +static inline void radix_tree_shrink(struct radix_tree_root *root) +{ + /* try to shrink tree height */ + while (root->height > 0) { + struct radix_tree_node *to_free = root->rnode; + void *newptr; + + BUG_ON(!radix_tree_is_indirect_ptr(to_free)); + to_free = indirect_to_ptr(to_free); + + /* + * The candidate node has more than one child, or its child + * is not at the leftmost slot, we cannot shrink. + */ + if (to_free->count != 1) + break; + if (!to_free->slots[0]) + break; + + /* + * We don't need rcu_assign_pointer(), since we are simply + * moving the node from one part of the tree to another: if it + * was safe to dereference the old pointer to it + * (to_free->slots[0]), it will be safe to dereference the new + * one (root->rnode) as far as dependent read barriers go. + */ + newptr = to_free->slots[0]; + if (root->height > 1) + newptr = ptr_to_indirect(newptr); + root->rnode = newptr; + root->height--; + + /* + * We have a dilemma here. The node's slot[0] must not be + * NULLed in case there are concurrent lookups expecting to + * find the item. However if this was a bottom-level node, + * then it may be subject to the slot pointer being visible + * to callers dereferencing it. If item corresponding to + * slot[0] is subsequently deleted, these callers would expect + * their slot to become empty sooner or later. + * + * For example, lockless pagecache will look up a slot, deref + * the page pointer, and if the page is 0 refcount it means it + * was concurrently deleted from pagecache so try the deref + * again. Fortunately there is already a requirement for logic + * to retry the entire slot lookup -- the indirect pointer + * problem (replacing direct root node with an indirect pointer + * also results in a stale slot). So tag the slot as indirect + * to force callers to retry. + */ + if (root->height == 0) + *((unsigned long *)&to_free->slots[0]) |= + RADIX_TREE_INDIRECT_PTR; + + radix_tree_node_free(root, to_free); + } +} + +/** + * radix_tree_delete - delete an item from a radix tree + * @root: radix tree root + * @index: index key * - * Remove the item at @index from the radix tree rooted at @root. + * Remove the item at @index from the radix tree rooted at @root. * - * Returns the address of the deleted item, or NULL if it was not present. + * Returns the address of the deleted item, or NULL if it was not present. */ -void *radix_tree_delete(struct radix_tree_root *root, unsigned long index, - void(*node_free)(struct radix_tree_node *)) +void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) { - struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; - struct radix_tree_node *slot = NULL; - unsigned int height, shift; - int offset; - - height = root->height; - if (index > radix_tree_maxindex(height)) - goto out; - - slot = root->rnode; - if (height == 0 && root->rnode) { - root->rnode = NULL; - goto out; - } - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - pathp->node = NULL; - - do { - if (slot == NULL) - goto out; - - pathp++; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - pathp->offset = offset; - pathp->node = slot; - slot = slot->slots[offset]; - shift -= RADIX_TREE_MAP_SHIFT; - height--; - } while (height > 0); - - if (slot == NULL) - goto out; - - /* Now free the nodes we do not need anymore */ - while (pathp->node) { - pathp->node->slots[pathp->offset] = NULL; - pathp->node->count--; - - if (pathp->node->count) { - if (pathp->node == root->rnode) - radix_tree_shrink(root, node_free); - goto out; - } - - /* Node with zero slots in use so free it */ - node_free(pathp->node); - - pathp--; - } - root->height = 0; - root->rnode = NULL; - - out: - return slot; + /* + * The radix tree path needs to be one longer than the maximum path + * since the "list" is null terminated. + */ + struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; + struct radix_tree_node *slot = NULL; + struct radix_tree_node *to_free; + unsigned int height, shift; + int offset; + + height = root->height; + if (index > radix_tree_maxindex(height)) + goto out; + + slot = root->rnode; + if (height == 0) { + root->rnode = NULL; + goto out; + } + slot = indirect_to_ptr(slot); + + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + pathp->node = NULL; + + do { + if (slot == NULL) + goto out; + + pathp++; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + pathp->offset = offset; + pathp->node = slot; + slot = slot->slots[offset]; + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } while (height > 0); + + if (slot == NULL) + goto out; + + to_free = NULL; + /* Now free the nodes we do not need anymore */ + while (pathp->node) { + pathp->node->slots[pathp->offset] = NULL; + pathp->node->count--; + /* + * Queue the node for deferred freeing after the + * last reference to it disappears (set NULL, above). + */ + if (to_free) + radix_tree_node_free(root, to_free); + + if (pathp->node->count) { + if (pathp->node == indirect_to_ptr(root->rnode)) + radix_tree_shrink(root); + goto out; + } + + /* Node with zero slots in use so free it */ + to_free = pathp->node; + pathp--; + + } + root->height = 0; + root->rnode = NULL; + if (to_free) + radix_tree_node_free(root, to_free); + +out: + return slot; } EXPORT_SYMBOL(radix_tree_delete); static void -radix_tree_node_destroy(struct radix_tree_node *node, unsigned int height, - void (*slot_free)(void *), void (*node_free)(struct radix_tree_node *)) +radix_tree_node_destroy( + struct radix_tree_root *root, struct radix_tree_node *node, + void (*slot_free)(void *)) { - int i; - - if (height == 0) - return; - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { - if (node->slots[i]) { - if (height == 1) { - slot_free(node->slots[i]); - node->slots[i] = NULL; - continue; - } - radix_tree_node_destroy(node->slots[i], height-1, - slot_free, node_free); - node_free(node->slots[i]); - node->slots[i] = NULL; - } - } + int i; + + for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { + struct radix_tree_node *slot = node->slots[i]; + BUG_ON(radix_tree_is_indirect_ptr(slot)); + if (slot == NULL) + continue; + if (node->height == 1) { + if (slot_free) + slot_free(slot); + } else { + radix_tree_node_destroy(root, slot, slot_free); + } + } + + radix_tree_node_free(root, node); } -void radix_tree_destroy(struct radix_tree_root *root, - void (*slot_free)(void *), void (*node_free)(struct radix_tree_node *)) +void radix_tree_destroy( + struct radix_tree_root *root, + void (*slot_free)(void *)) { - if (root->rnode == NULL) - return; - if (root->height == 0) - slot_free(root->rnode); - else { - radix_tree_node_destroy(root->rnode, root->height, - slot_free, node_free); - node_free(root->rnode); - root->height = 0; - } - root->rnode = NULL; - /* caller must delete root if desired */ + struct radix_tree_node *node = root->rnode; + if (node == NULL) + return; + if (!radix_tree_is_indirect_ptr(node)) { + if (slot_free) + slot_free(node); + } else { + node = indirect_to_ptr(node); + radix_tree_node_destroy(root, node, slot_free); + } + radix_tree_init(root); } -EXPORT_SYMBOL(radix_tree_destroy); -static unsigned long __init __maxindex(unsigned int height) +void radix_tree_init(struct radix_tree_root *root) { - unsigned int tmp = height * RADIX_TREE_MAP_SHIFT; - unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1; + memset(root, 0, sizeof(*root)); + root->node_alloc = rcu_node_alloc; + root->node_free = rcu_node_free; +} - if (tmp >= RADIX_TREE_INDEX_BITS) - index = ~0UL; - return index; +void radix_tree_set_alloc_callbacks( + struct radix_tree_root *root, + radix_tree_alloc_fn_t *node_alloc, + radix_tree_free_fn_t *node_free, + void *node_alloc_free_arg) +{ + root->node_alloc = node_alloc; + root->node_free = node_free; + root->node_alloc_free_arg = node_alloc_free_arg; } -void __init radix_tree_init(void) +static __init unsigned long __maxindex(unsigned int height) { - unsigned int i; + unsigned int width = height * RADIX_TREE_MAP_SHIFT; + int shift = RADIX_TREE_INDEX_BITS - width; + + if (shift < 0) + return ~0UL; + if (shift >= BITS_PER_LONG) + return 0UL; + return ~0UL >> shift; +} + +static __init int radix_tree_init_maxindex(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) + height_to_maxindex[i] = __maxindex(i); - for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) - height_to_maxindex[i] = __maxindex(i); + return 0; } +/* pre-SMP just so it runs before 'normal' initcalls */ +presmp_initcall(radix_tree_init_maxindex); |