aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Wilcox <mawilcox@microsoft.com>2016-12-20 10:27:56 -0500
committerMatthew Wilcox <mawilcox@microsoft.com>2017-02-13 21:44:01 -0500
commit0a835c4f090af2c76fc2932c539c3b32fd21fbbb (patch)
tree729c24514309afc323ee08e6d8336eb1e558406e
parent0ac398ef391b53122976325ab6953456ce8e8310 (diff)
Reimplement IDR and IDA using the radix tree
The IDR is very similar to the radix tree. It has some functionality that the radix tree did not have (alloc next free, cyclic allocation, a callback-based for_each, destroy tree), which is readily implementable on top of the radix tree. A few small changes were needed in order to use a tag to represent nodes with free space below them. More extensive changes were needed to support storing NULL as a valid entry in an IDR. Plain radix trees still interpret NULL as a not-present entry. The IDA is reimplemented as a client of the newly enhanced radix tree. As in the current implementation, it uses a bitmap at the last level of the tree. Signed-off-by: Matthew Wilcox <willy@infradead.org> Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com> Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r--include/linux/idr.h145
-rw-r--r--include/linux/radix-tree.h49
-rw-r--r--init/main.c3
-rw-r--r--lib/idr.c1178
-rw-r--r--lib/radix-tree.c375
-rw-r--r--tools/testing/radix-tree/.gitignore1
-rw-r--r--tools/testing/radix-tree/Makefile10
-rw-r--r--tools/testing/radix-tree/idr-test.c342
-rw-r--r--tools/testing/radix-tree/linux/gfp.h8
-rw-r--r--tools/testing/radix-tree/linux/idr.h1
-rw-r--r--tools/testing/radix-tree/linux/kernel.h1
-rw-r--r--tools/testing/radix-tree/main.c6
-rw-r--r--tools/testing/radix-tree/test.h2
13 files changed, 995 insertions, 1126 deletions
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 3c01b89aed67..f58c0a3addc3 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -12,47 +12,28 @@
12#ifndef __IDR_H__ 12#ifndef __IDR_H__
13#define __IDR_H__ 13#define __IDR_H__
14 14
15#include <linux/types.h> 15#include <linux/radix-tree.h>
16#include <linux/bitops.h> 16#include <linux/gfp.h>
17#include <linux/init.h> 17
18#include <linux/rcupdate.h> 18struct idr {
19 struct radix_tree_root idr_rt;
20 unsigned int idr_next;
21};
19 22
20/* 23/*
21 * Using 6 bits at each layer allows us to allocate 7 layers out of each page. 24 * The IDR API does not expose the tagging functionality of the radix tree
22 * 8 bits only gave us 3 layers out of every pair of pages, which is less 25 * to users. Use tag 0 to track whether a node has free space below it.
23 * efficient except for trees with a largest element between 192-255 inclusive.
24 */ 26 */
25#define IDR_BITS 6 27#define IDR_FREE 0
26#define IDR_SIZE (1 << IDR_BITS)
27#define IDR_MASK ((1 << IDR_BITS)-1)
28
29struct idr_layer {
30 int prefix; /* the ID prefix of this idr_layer */
31 int layer; /* distance from leaf */
32 struct idr_layer __rcu *ary[1<<IDR_BITS];
33 int count; /* When zero, we can release it */
34 union {
35 /* A zero bit means "space here" */
36 DECLARE_BITMAP(bitmap, IDR_SIZE);
37 struct rcu_head rcu_head;
38 };
39};
40 28
41struct idr { 29/* Set the IDR flag and the IDR_FREE tag */
42 struct idr_layer __rcu *hint; /* the last layer allocated from */ 30#define IDR_RT_MARKER ((__force gfp_t)(3 << __GFP_BITS_SHIFT))
43 struct idr_layer __rcu *top;
44 int layers; /* only valid w/o concurrent changes */
45 int cur; /* current pos for cyclic allocation */
46 spinlock_t lock;
47 int id_free_cnt;
48 struct idr_layer *id_free;
49};
50 31
51#define IDR_INIT(name) \ 32#define IDR_INIT \
52{ \ 33{ \
53 .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ 34 .idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER) \
54} 35}
55#define DEFINE_IDR(name) struct idr name = IDR_INIT(name) 36#define DEFINE_IDR(name) struct idr name = IDR_INIT
56 37
57/** 38/**
58 * idr_get_cursor - Return the current position of the cyclic allocator 39 * idr_get_cursor - Return the current position of the cyclic allocator
@@ -62,9 +43,9 @@ struct idr {
62 * idr_alloc_cyclic() if it is free (otherwise the search will start from 43 * idr_alloc_cyclic() if it is free (otherwise the search will start from
63 * this position). 44 * this position).
64 */ 45 */
65static inline unsigned int idr_get_cursor(struct idr *idr) 46static inline unsigned int idr_get_cursor(const struct idr *idr)
66{ 47{
67 return READ_ONCE(idr->cur); 48 return READ_ONCE(idr->idr_next);
68} 49}
69 50
70/** 51/**
@@ -77,7 +58,7 @@ static inline unsigned int idr_get_cursor(struct idr *idr)
77 */ 58 */
78static inline void idr_set_cursor(struct idr *idr, unsigned int val) 59static inline void idr_set_cursor(struct idr *idr, unsigned int val)
79{ 60{
80 WRITE_ONCE(idr->cur, val); 61 WRITE_ONCE(idr->idr_next, val);
81} 62}
82 63
83/** 64/**
@@ -97,22 +78,31 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val)
97 * period). 78 * period).
98 */ 79 */
99 80
100/*
101 * This is what we export.
102 */
103
104void *idr_find_slowpath(struct idr *idp, int id);
105void idr_preload(gfp_t gfp_mask); 81void idr_preload(gfp_t gfp_mask);
106int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask); 82int idr_alloc(struct idr *, void *entry, int start, int end, gfp_t);
107int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask); 83int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t);
108int idr_for_each(struct idr *idp, 84int idr_for_each(const struct idr *,
109 int (*fn)(int id, void *p, void *data), void *data); 85 int (*fn)(int id, void *p, void *data), void *data);
110void *idr_get_next(struct idr *idp, int *nextid); 86void *idr_get_next(struct idr *, int *nextid);
111void *idr_replace(struct idr *idp, void *ptr, int id); 87void *idr_replace(struct idr *, void *, int id);
112void idr_remove(struct idr *idp, int id); 88void idr_destroy(struct idr *);
113void idr_destroy(struct idr *idp); 89
114void idr_init(struct idr *idp); 90static inline void idr_remove(struct idr *idr, int id)
115bool idr_is_empty(struct idr *idp); 91{
92 radix_tree_delete(&idr->idr_rt, id);
93}
94
95static inline void idr_init(struct idr *idr)
96{
97 INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
98 idr->idr_next = 0;
99}
100
101static inline bool idr_is_empty(const struct idr *idr)
102{
103 return radix_tree_empty(&idr->idr_rt) &&
104 radix_tree_tagged(&idr->idr_rt, IDR_FREE);
105}
116 106
117/** 107/**
118 * idr_preload_end - end preload section started with idr_preload() 108 * idr_preload_end - end preload section started with idr_preload()
@@ -137,19 +127,14 @@ static inline void idr_preload_end(void)
137 * This function can be called under rcu_read_lock(), given that the leaf 127 * This function can be called under rcu_read_lock(), given that the leaf
138 * pointers lifetimes are correctly managed. 128 * pointers lifetimes are correctly managed.
139 */ 129 */
140static inline void *idr_find(struct idr *idr, int id) 130static inline void *idr_find(const struct idr *idr, int id)
141{ 131{
142 struct idr_layer *hint = rcu_dereference_raw(idr->hint); 132 return radix_tree_lookup(&idr->idr_rt, id);
143
144 if (hint && (id & ~IDR_MASK) == hint->prefix)
145 return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
146
147 return idr_find_slowpath(idr, id);
148} 133}
149 134
150/** 135/**
151 * idr_for_each_entry - iterate over an idr's elements of a given type 136 * idr_for_each_entry - iterate over an idr's elements of a given type
152 * @idp: idr handle 137 * @idr: idr handle
153 * @entry: the type * to use as cursor 138 * @entry: the type * to use as cursor
154 * @id: id entry's key 139 * @id: id entry's key
155 * 140 *
@@ -157,57 +142,60 @@ static inline void *idr_find(struct idr *idr, int id)
157 * after normal terminatinon @entry is left with the value NULL. This 142 * after normal terminatinon @entry is left with the value NULL. This
158 * is convenient for a "not found" value. 143 * is convenient for a "not found" value.
159 */ 144 */
160#define idr_for_each_entry(idp, entry, id) \ 145#define idr_for_each_entry(idr, entry, id) \
161 for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id) 146 for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id)
162 147
163/** 148/**
164 * idr_for_each_entry - continue iteration over an idr's elements of a given type 149 * idr_for_each_entry_continue - continue iteration over an idr's elements of a given type
165 * @idp: idr handle 150 * @idr: idr handle
166 * @entry: the type * to use as cursor 151 * @entry: the type * to use as cursor
167 * @id: id entry's key 152 * @id: id entry's key
168 * 153 *
169 * Continue to iterate over list of given type, continuing after 154 * Continue to iterate over list of given type, continuing after
170 * the current position. 155 * the current position.
171 */ 156 */
172#define idr_for_each_entry_continue(idp, entry, id) \ 157#define idr_for_each_entry_continue(idr, entry, id) \
173 for ((entry) = idr_get_next((idp), &(id)); \ 158 for ((entry) = idr_get_next((idr), &(id)); \
174 entry; \ 159 entry; \
175 ++id, (entry) = idr_get_next((idp), &(id))) 160 ++id, (entry) = idr_get_next((idr), &(id)))
176 161
177/* 162/*
178 * IDA - IDR based id allocator, use when translation from id to 163 * IDA - IDR based id allocator, use when translation from id to
179 * pointer isn't necessary. 164 * pointer isn't necessary.
180 *
181 * IDA_BITMAP_LONGS is calculated to be one less to accommodate
182 * ida_bitmap->nr_busy so that the whole struct fits in 128 bytes.
183 */ 165 */
184#define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */ 166#define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */
185#define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long) - 1) 167#define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long))
186#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8) 168#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8)
187 169
188struct ida_bitmap { 170struct ida_bitmap {
189 long nr_busy;
190 unsigned long bitmap[IDA_BITMAP_LONGS]; 171 unsigned long bitmap[IDA_BITMAP_LONGS];
191}; 172};
192 173
193struct ida { 174struct ida {
194 struct idr idr; 175 struct radix_tree_root ida_rt;
195 struct ida_bitmap *free_bitmap; 176 struct ida_bitmap *free_bitmap;
196}; 177};
197 178
198#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, } 179#define IDA_INIT { \
199#define DEFINE_IDA(name) struct ida name = IDA_INIT(name) 180 .ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT), \
181}
182#define DEFINE_IDA(name) struct ida name = IDA_INIT
200 183
201int ida_pre_get(struct ida *ida, gfp_t gfp_mask); 184int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
202int ida_get_new_above(struct ida *ida, int starting_id, int *p_id); 185int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
203void ida_remove(struct ida *ida, int id); 186void ida_remove(struct ida *ida, int id);
204void ida_destroy(struct ida *ida); 187void ida_destroy(struct ida *ida);
205void ida_init(struct ida *ida);
206 188
207int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, 189int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
208 gfp_t gfp_mask); 190 gfp_t gfp_mask);
209void ida_simple_remove(struct ida *ida, unsigned int id); 191void ida_simple_remove(struct ida *ida, unsigned int id);
210 192
193static inline void ida_init(struct ida *ida)
194{
195 INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
196 ida->free_bitmap = NULL;
197}
198
211/** 199/**
212 * ida_get_new - allocate new ID 200 * ida_get_new - allocate new ID
213 * @ida: idr handle 201 * @ida: idr handle
@@ -220,11 +208,8 @@ static inline int ida_get_new(struct ida *ida, int *p_id)
220 return ida_get_new_above(ida, 0, p_id); 208 return ida_get_new_above(ida, 0, p_id);
221} 209}
222 210
223static inline bool ida_is_empty(struct ida *ida) 211static inline bool ida_is_empty(const struct ida *ida)
224{ 212{
225 return idr_is_empty(&ida->idr); 213 return radix_tree_empty(&ida->ida_rt);
226} 214}
227
228void __init idr_init_cache(void);
229
230#endif /* __IDR_H__ */ 215#endif /* __IDR_H__ */
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 05f715cb8062..2ba0c1f46c84 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -105,7 +105,10 @@ struct radix_tree_node {
105 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 105 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
106}; 106};
107 107
108/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */ 108/* The top bits of gfp_mask are used to store the root tags and the IDR flag */
109#define ROOT_IS_IDR ((__force gfp_t)(1 << __GFP_BITS_SHIFT))
110#define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT + 1)
111
109struct radix_tree_root { 112struct radix_tree_root {
110 gfp_t gfp_mask; 113 gfp_t gfp_mask;
111 struct radix_tree_node __rcu *rnode; 114 struct radix_tree_node __rcu *rnode;
@@ -358,10 +361,14 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index,
358 unsigned new_order); 361 unsigned new_order);
359int radix_tree_join(struct radix_tree_root *, unsigned long index, 362int radix_tree_join(struct radix_tree_root *, unsigned long index,
360 unsigned new_order, void *); 363 unsigned new_order, void *);
364void **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *,
365 gfp_t, int end);
361 366
362#define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */ 367enum {
363#define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */ 368 RADIX_TREE_ITER_TAG_MASK = 0x0f, /* tag index in lower nybble */
364#define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */ 369 RADIX_TREE_ITER_TAGGED = 0x10, /* lookup tagged slots */
370 RADIX_TREE_ITER_CONTIG = 0x20, /* stop at first hole */
371};
365 372
366/** 373/**
367 * radix_tree_iter_init - initialize radix tree iterator 374 * radix_tree_iter_init - initialize radix tree iterator
@@ -403,6 +410,40 @@ void **radix_tree_next_chunk(const struct radix_tree_root *,
403 struct radix_tree_iter *iter, unsigned flags); 410 struct radix_tree_iter *iter, unsigned flags);
404 411
405/** 412/**
413 * radix_tree_iter_lookup - look up an index in the radix tree
414 * @root: radix tree root
415 * @iter: iterator state
416 * @index: key to look up
417 *
418 * If @index is present in the radix tree, this function returns the slot
419 * containing it and updates @iter to describe the entry. If @index is not
420 * present, it returns NULL.
421 */
422static inline void **radix_tree_iter_lookup(const struct radix_tree_root *root,
423 struct radix_tree_iter *iter, unsigned long index)
424{
425 radix_tree_iter_init(iter, index);
426 return radix_tree_next_chunk(root, iter, RADIX_TREE_ITER_CONTIG);
427}
428
429/**
430 * radix_tree_iter_find - find a present entry
431 * @root: radix tree root
432 * @iter: iterator state
433 * @index: start location
434 *
435 * This function returns the slot containing the entry with the lowest index
436 * which is at least @index. If @index is larger than any present entry, this
437 * function returns NULL. The @iter is updated to describe the entry found.
438 */
439static inline void **radix_tree_iter_find(const struct radix_tree_root *root,
440 struct radix_tree_iter *iter, unsigned long index)
441{
442 radix_tree_iter_init(iter, index);
443 return radix_tree_next_chunk(root, iter, 0);
444}
445
446/**
406 * radix_tree_iter_retry - retry this chunk of the iteration 447 * radix_tree_iter_retry - retry this chunk of the iteration
407 * @iter: iterator state 448 * @iter: iterator state
408 * 449 *
diff --git a/init/main.c b/init/main.c
index b0c9d6facef9..a65e3aad31bc 100644
--- a/init/main.c
+++ b/init/main.c
@@ -553,7 +553,7 @@ asmlinkage __visible void __init start_kernel(void)
553 if (WARN(!irqs_disabled(), 553 if (WARN(!irqs_disabled(),
554 "Interrupts were enabled *very* early, fixing it\n")) 554 "Interrupts were enabled *very* early, fixing it\n"))
555 local_irq_disable(); 555 local_irq_disable();
556 idr_init_cache(); 556 radix_tree_init();
557 557
558 /* 558 /*
559 * Allow workqueue creation and work item queueing/cancelling 559 * Allow workqueue creation and work item queueing/cancelling
@@ -568,7 +568,6 @@ asmlinkage __visible void __init start_kernel(void)
568 trace_init(); 568 trace_init();
569 569
570 context_tracking_init(); 570 context_tracking_init();
571 radix_tree_init();
572 /* init some links before init_ISA_irqs() */ 571 /* init some links before init_ISA_irqs() */
573 early_irq_init(); 572 early_irq_init();
574 init_IRQ(); 573 init_IRQ();
diff --git a/lib/idr.c b/lib/idr.c
index 52d2979a05e8..b87056e2cc4c 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -1,1068 +1,369 @@
1/* 1#include <linux/bitmap.h>
2 * 2002-10-18 written by Jim Houston jim.houston@ccur.com
3 * Copyright (C) 2002 by Concurrent Computer Corporation
4 * Distributed under the GNU GPL license version 2.
5 *
6 * Modified by George Anzinger to reuse immediately and to use
7 * find bit instructions. Also removed _irq on spinlocks.
8 *
9 * Modified by Nadia Derbey to make it RCU safe.
10 *
11 * Small id to pointer translation service.
12 *
13 * It uses a radix tree like structure as a sparse array indexed
14 * by the id to obtain the pointer. The bitmap makes allocating
15 * a new id quick.
16 *
17 * You call it to allocate an id (an int) an associate with that id a
18 * pointer or what ever, we treat it as a (void *). You can pass this
19 * id to a user for him to pass back at a later time. You then pass
20 * that id to this code and it returns your pointer.
21 */
22
23#ifndef TEST // to test in user space...
24#include <linux/slab.h>
25#include <linux/init.h>
26#include <linux/export.h> 2#include <linux/export.h>
27#endif
28#include <linux/err.h>
29#include <linux/string.h>
30#include <linux/idr.h> 3#include <linux/idr.h>
4#include <linux/slab.h>
31#include <linux/spinlock.h> 5#include <linux/spinlock.h>
32#include <linux/percpu.h>
33 6
34#define MAX_IDR_SHIFT (sizeof(int) * 8 - 1)
35#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
36
37/* Leave the possibility of an incomplete final layer */
38#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS)
39
40/* Number of id_layer structs to leave in free list */
41#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2)
42
43static struct kmem_cache *idr_layer_cache;
44static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head);
45static DEFINE_PER_CPU(int, idr_preload_cnt);
46static DEFINE_SPINLOCK(simple_ida_lock); 7static DEFINE_SPINLOCK(simple_ida_lock);
47 8
48/* the maximum ID which can be allocated given idr->layers */
49static int idr_max(int layers)
50{
51 int bits = min_t(int, layers * IDR_BITS, MAX_IDR_SHIFT);
52
53 return (1 << bits) - 1;
54}
55
56/*
57 * Prefix mask for an idr_layer at @layer. For layer 0, the prefix mask is
58 * all bits except for the lower IDR_BITS. For layer 1, 2 * IDR_BITS, and
59 * so on.
60 */
61static int idr_layer_prefix_mask(int layer)
62{
63 return ~idr_max(layer + 1);
64}
65
66static struct idr_layer *get_from_free_list(struct idr *idp)
67{
68 struct idr_layer *p;
69 unsigned long flags;
70
71 spin_lock_irqsave(&idp->lock, flags);
72 if ((p = idp->id_free)) {
73 idp->id_free = p->ary[0];
74 idp->id_free_cnt--;
75 p->ary[0] = NULL;
76 }
77 spin_unlock_irqrestore(&idp->lock, flags);
78 return(p);
79}
80
81/**
82 * idr_layer_alloc - allocate a new idr_layer
83 * @gfp_mask: allocation mask
84 * @layer_idr: optional idr to allocate from
85 *
86 * If @layer_idr is %NULL, directly allocate one using @gfp_mask or fetch
87 * one from the per-cpu preload buffer. If @layer_idr is not %NULL, fetch
88 * an idr_layer from @idr->id_free.
89 *
90 * @layer_idr is to maintain backward compatibility with the old alloc
91 * interface - idr_pre_get() and idr_get_new*() - and will be removed
92 * together with per-pool preload buffer.
93 */
94static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr)
95{
96 struct idr_layer *new;
97
98 /* this is the old path, bypass to get_from_free_list() */
99 if (layer_idr)
100 return get_from_free_list(layer_idr);
101
102 /*
103 * Try to allocate directly from kmem_cache. We want to try this
104 * before preload buffer; otherwise, non-preloading idr_alloc()
105 * users will end up taking advantage of preloading ones. As the
106 * following is allowed to fail for preloaded cases, suppress
107 * warning this time.
108 */
109 new = kmem_cache_zalloc(idr_layer_cache, gfp_mask | __GFP_NOWARN);
110 if (new)
111 return new;
112
113 /*
114 * Try to fetch one from the per-cpu preload buffer if in process
115 * context. See idr_preload() for details.
116 */
117 if (!in_interrupt()) {
118 preempt_disable();
119 new = __this_cpu_read(idr_preload_head);
120 if (new) {
121 __this_cpu_write(idr_preload_head, new->ary[0]);
122 __this_cpu_dec(idr_preload_cnt);
123 new->ary[0] = NULL;
124 }
125 preempt_enable();
126 if (new)
127 return new;
128 }
129
130 /*
131 * Both failed. Try kmem_cache again w/o adding __GFP_NOWARN so
132 * that memory allocation failure warning is printed as intended.
133 */
134 return kmem_cache_zalloc(idr_layer_cache, gfp_mask);
135}
136
137static void idr_layer_rcu_free(struct rcu_head *head)
138{
139 struct idr_layer *layer;
140
141 layer = container_of(head, struct idr_layer, rcu_head);
142 kmem_cache_free(idr_layer_cache, layer);
143}
144
145static inline void free_layer(struct idr *idr, struct idr_layer *p)
146{
147 if (idr->hint == p)
148 RCU_INIT_POINTER(idr->hint, NULL);
149 call_rcu(&p->rcu_head, idr_layer_rcu_free);
150}
151
152/* only called when idp->lock is held */
153static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
154{
155 p->ary[0] = idp->id_free;
156 idp->id_free = p;
157 idp->id_free_cnt++;
158}
159
160static void move_to_free_list(struct idr *idp, struct idr_layer *p)
161{
162 unsigned long flags;
163
164 /*
165 * Depends on the return element being zeroed.
166 */
167 spin_lock_irqsave(&idp->lock, flags);
168 __move_to_free_list(idp, p);
169 spin_unlock_irqrestore(&idp->lock, flags);
170}
171
172static void idr_mark_full(struct idr_layer **pa, int id)
173{
174 struct idr_layer *p = pa[0];
175 int l = 0;
176
177 __set_bit(id & IDR_MASK, p->bitmap);
178 /*
179 * If this layer is full mark the bit in the layer above to
180 * show that this part of the radix tree is full. This may
181 * complete the layer above and require walking up the radix
182 * tree.
183 */
184 while (bitmap_full(p->bitmap, IDR_SIZE)) {
185 if (!(p = pa[++l]))
186 break;
187 id = id >> IDR_BITS;
188 __set_bit((id & IDR_MASK), p->bitmap);
189 }
190}
191
192static int __idr_pre_get(struct idr *idp, gfp_t gfp_mask)
193{
194 while (idp->id_free_cnt < MAX_IDR_FREE) {
195 struct idr_layer *new;
196 new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
197 if (new == NULL)
198 return (0);
199 move_to_free_list(idp, new);
200 }
201 return 1;
202}
203
204/**
205 * sub_alloc - try to allocate an id without growing the tree depth
206 * @idp: idr handle
207 * @starting_id: id to start search at
208 * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer
209 * @gfp_mask: allocation mask for idr_layer_alloc()
210 * @layer_idr: optional idr passed to idr_layer_alloc()
211 *
212 * Allocate an id in range [@starting_id, INT_MAX] from @idp without
213 * growing its depth. Returns
214 *
215 * the allocated id >= 0 if successful,
216 * -EAGAIN if the tree needs to grow for allocation to succeed,
217 * -ENOSPC if the id space is exhausted,
218 * -ENOMEM if more idr_layers need to be allocated.
219 */
220static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa,
221 gfp_t gfp_mask, struct idr *layer_idr)
222{
223 int n, m, sh;
224 struct idr_layer *p, *new;
225 int l, id, oid;
226
227 id = *starting_id;
228 restart:
229 p = idp->top;
230 l = idp->layers;
231 pa[l--] = NULL;
232 while (1) {
233 /*
234 * We run around this while until we reach the leaf node...
235 */
236 n = (id >> (IDR_BITS*l)) & IDR_MASK;
237 m = find_next_zero_bit(p->bitmap, IDR_SIZE, n);
238 if (m == IDR_SIZE) {
239 /* no space available go back to previous layer. */
240 l++;
241 oid = id;
242 id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
243
244 /* if already at the top layer, we need to grow */
245 if (id > idr_max(idp->layers)) {
246 *starting_id = id;
247 return -EAGAIN;
248 }
249 p = pa[l];
250 BUG_ON(!p);
251
252 /* If we need to go up one layer, continue the
253 * loop; otherwise, restart from the top.
254 */
255 sh = IDR_BITS * (l + 1);
256 if (oid >> sh == id >> sh)
257 continue;
258 else
259 goto restart;
260 }
261 if (m != n) {
262 sh = IDR_BITS*l;
263 id = ((id >> sh) ^ n ^ m) << sh;
264 }
265 if ((id >= MAX_IDR_BIT) || (id < 0))
266 return -ENOSPC;
267 if (l == 0)
268 break;
269 /*
270 * Create the layer below if it is missing.
271 */
272 if (!p->ary[m]) {
273 new = idr_layer_alloc(gfp_mask, layer_idr);
274 if (!new)
275 return -ENOMEM;
276 new->layer = l-1;
277 new->prefix = id & idr_layer_prefix_mask(new->layer);
278 rcu_assign_pointer(p->ary[m], new);
279 p->count++;
280 }
281 pa[l--] = p;
282 p = p->ary[m];
283 }
284
285 pa[l] = p;
286 return id;
287}
288
289static int idr_get_empty_slot(struct idr *idp, int starting_id,
290 struct idr_layer **pa, gfp_t gfp_mask,
291 struct idr *layer_idr)
292{
293 struct idr_layer *p, *new;
294 int layers, v, id;
295 unsigned long flags;
296
297 id = starting_id;
298build_up:
299 p = idp->top;
300 layers = idp->layers;
301 if (unlikely(!p)) {
302 if (!(p = idr_layer_alloc(gfp_mask, layer_idr)))
303 return -ENOMEM;
304 p->layer = 0;
305 layers = 1;
306 }
307 /*
308 * Add a new layer to the top of the tree if the requested
309 * id is larger than the currently allocated space.
310 */
311 while (id > idr_max(layers)) {
312 layers++;
313 if (!p->count) {
314 /* special case: if the tree is currently empty,
315 * then we grow the tree by moving the top node
316 * upwards.
317 */
318 p->layer++;
319 WARN_ON_ONCE(p->prefix);
320 continue;
321 }
322 if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) {
323 /*
324 * The allocation failed. If we built part of
325 * the structure tear it down.
326 */
327 spin_lock_irqsave(&idp->lock, flags);
328 for (new = p; p && p != idp->top; new = p) {
329 p = p->ary[0];
330 new->ary[0] = NULL;
331 new->count = 0;
332 bitmap_clear(new->bitmap, 0, IDR_SIZE);
333 __move_to_free_list(idp, new);
334 }
335 spin_unlock_irqrestore(&idp->lock, flags);
336 return -ENOMEM;
337 }
338 new->ary[0] = p;
339 new->count = 1;
340 new->layer = layers-1;
341 new->prefix = id & idr_layer_prefix_mask(new->layer);
342 if (bitmap_full(p->bitmap, IDR_SIZE))
343 __set_bit(0, new->bitmap);
344 p = new;
345 }
346 rcu_assign_pointer(idp->top, p);
347 idp->layers = layers;
348 v = sub_alloc(idp, &id, pa, gfp_mask, layer_idr);
349 if (v == -EAGAIN)
350 goto build_up;
351 return(v);
352}
353
354/*
355 * @id and @pa are from a successful allocation from idr_get_empty_slot().
356 * Install the user pointer @ptr and mark the slot full.
357 */
358static void idr_fill_slot(struct idr *idr, void *ptr, int id,
359 struct idr_layer **pa)
360{
361 /* update hint used for lookup, cleared from free_layer() */
362 rcu_assign_pointer(idr->hint, pa[0]);
363
364 rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], (struct idr_layer *)ptr);
365 pa[0]->count++;
366 idr_mark_full(pa, id);
367}
368
369
370/**
371 * idr_preload - preload for idr_alloc()
372 * @gfp_mask: allocation mask to use for preloading
373 *
374 * Preload per-cpu layer buffer for idr_alloc(). Can only be used from
375 * process context and each idr_preload() invocation should be matched with
376 * idr_preload_end(). Note that preemption is disabled while preloaded.
377 *
378 * The first idr_alloc() in the preloaded section can be treated as if it
379 * were invoked with @gfp_mask used for preloading. This allows using more
380 * permissive allocation masks for idrs protected by spinlocks.
381 *
382 * For example, if idr_alloc() below fails, the failure can be treated as
383 * if idr_alloc() were called with GFP_KERNEL rather than GFP_NOWAIT.
384 *
385 * idr_preload(GFP_KERNEL);
386 * spin_lock(lock);
387 *
388 * id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT);
389 *
390 * spin_unlock(lock);
391 * idr_preload_end();
392 * if (id < 0)
393 * error;
394 */
395void idr_preload(gfp_t gfp_mask)
396{
397 /*
398 * Consuming preload buffer from non-process context breaks preload
399 * allocation guarantee. Disallow usage from those contexts.
400 */
401 WARN_ON_ONCE(in_interrupt());
402 might_sleep_if(gfpflags_allow_blocking(gfp_mask));
403
404 preempt_disable();
405
406 /*
407 * idr_alloc() is likely to succeed w/o full idr_layer buffer and
408 * return value from idr_alloc() needs to be checked for failure
409 * anyway. Silently give up if allocation fails. The caller can
410 * treat failures from idr_alloc() as if idr_alloc() were called
411 * with @gfp_mask which should be enough.
412 */
413 while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
414 struct idr_layer *new;
415
416 preempt_enable();
417 new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
418 preempt_disable();
419 if (!new)
420 break;
421
422 /* link the new one to per-cpu preload list */
423 new->ary[0] = __this_cpu_read(idr_preload_head);
424 __this_cpu_write(idr_preload_head, new);
425 __this_cpu_inc(idr_preload_cnt);
426 }
427}
428EXPORT_SYMBOL(idr_preload);
429
430/** 9/**
431 * idr_alloc - allocate new idr entry 10 * idr_alloc - allocate an id
432 * @idr: the (initialized) idr 11 * @idr: idr handle
433 * @ptr: pointer to be associated with the new id 12 * @ptr: pointer to be associated with the new id
434 * @start: the minimum id (inclusive) 13 * @start: the minimum id (inclusive)
435 * @end: the maximum id (exclusive, <= 0 for max) 14 * @end: the maximum id (exclusive)
436 * @gfp_mask: memory allocation flags 15 * @gfp: memory allocation flags
437 * 16 *
438 * Allocate an id in [start, end) and associate it with @ptr. If no ID is 17 * Allocates an unused ID in the range [start, end). Returns -ENOSPC
439 * available in the specified range, returns -ENOSPC. On memory allocation 18 * if there are no unused IDs in that range.
440 * failure, returns -ENOMEM.
441 * 19 *
442 * Note that @end is treated as max when <= 0. This is to always allow 20 * Note that @end is treated as max when <= 0. This is to always allow
443 * using @start + N as @end as long as N is inside integer range. 21 * using @start + N as @end as long as N is inside integer range.
444 * 22 *
445 * The user is responsible for exclusively synchronizing all operations 23 * Simultaneous modifications to the @idr are not allowed and should be
446 * which may modify @idr. However, read-only accesses such as idr_find() 24 * prevented by the user, usually with a lock. idr_alloc() may be called
447 * or iteration can be performed under RCU read lock provided the user 25 * concurrently with read-only accesses to the @idr, such as idr_find() and
448 * destroys @ptr in RCU-safe way after removal from idr. 26 * idr_for_each_entry().
449 */ 27 */
450int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask) 28int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
451{ 29{
452 int max = end > 0 ? end - 1 : INT_MAX; /* inclusive upper limit */ 30 void **slot;
453 struct idr_layer *pa[MAX_IDR_LEVEL + 1]; 31 struct radix_tree_iter iter;
454 int id;
455
456 might_sleep_if(gfpflags_allow_blocking(gfp_mask));
457 32
458 /* sanity checks */
459 if (WARN_ON_ONCE(start < 0)) 33 if (WARN_ON_ONCE(start < 0))
460 return -EINVAL; 34 return -EINVAL;
461 if (unlikely(max < start)) 35 if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
462 return -ENOSPC; 36 return -EINVAL;
463 37
464 /* allocate id */ 38 radix_tree_iter_init(&iter, start);
465 id = idr_get_empty_slot(idr, start, pa, gfp_mask, NULL); 39 slot = idr_get_free(&idr->idr_rt, &iter, gfp, end);
466 if (unlikely(id < 0)) 40 if (IS_ERR(slot))
467 return id; 41 return PTR_ERR(slot);
468 if (unlikely(id > max))
469 return -ENOSPC;
470 42
471 idr_fill_slot(idr, ptr, id, pa); 43 radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
472 return id; 44 radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
45 return iter.index;
473} 46}
474EXPORT_SYMBOL_GPL(idr_alloc); 47EXPORT_SYMBOL_GPL(idr_alloc);
475 48
476/** 49/**
477 * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion 50 * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion
478 * @idr: the (initialized) idr 51 * @idr: idr handle
479 * @ptr: pointer to be associated with the new id 52 * @ptr: pointer to be associated with the new id
480 * @start: the minimum id (inclusive) 53 * @start: the minimum id (inclusive)
481 * @end: the maximum id (exclusive, <= 0 for max) 54 * @end: the maximum id (exclusive)
482 * @gfp_mask: memory allocation flags 55 * @gfp: memory allocation flags
483 *
484 * Essentially the same as idr_alloc, but prefers to allocate progressively
485 * higher ids if it can. If the "cur" counter wraps, then it will start again
486 * at the "start" end of the range and allocate one that has already been used.
487 */
488int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end,
489 gfp_t gfp_mask)
490{
491 int id;
492
493 id = idr_alloc(idr, ptr, max(start, idr->cur), end, gfp_mask);
494 if (id == -ENOSPC)
495 id = idr_alloc(idr, ptr, start, end, gfp_mask);
496
497 if (likely(id >= 0))
498 idr->cur = id + 1;
499 return id;
500}
501EXPORT_SYMBOL(idr_alloc_cyclic);
502
503static void idr_remove_warning(int id)
504{
505 WARN(1, "idr_remove called for id=%d which is not allocated.\n", id);
506}
507
508static void sub_remove(struct idr *idp, int shift, int id)
509{
510 struct idr_layer *p = idp->top;
511 struct idr_layer **pa[MAX_IDR_LEVEL + 1];
512 struct idr_layer ***paa = &pa[0];
513 struct idr_layer *to_free;
514 int n;
515
516 *paa = NULL;
517 *++paa = &idp->top;
518
519 while ((shift > 0) && p) {
520 n = (id >> shift) & IDR_MASK;
521 __clear_bit(n, p->bitmap);
522 *++paa = &p->ary[n];
523 p = p->ary[n];
524 shift -= IDR_BITS;
525 }
526 n = id & IDR_MASK;
527 if (likely(p != NULL && test_bit(n, p->bitmap))) {
528 __clear_bit(n, p->bitmap);
529 RCU_INIT_POINTER(p->ary[n], NULL);
530 to_free = NULL;
531 while(*paa && ! --((**paa)->count)){
532 if (to_free)
533 free_layer(idp, to_free);
534 to_free = **paa;
535 **paa-- = NULL;
536 }
537 if (!*paa)
538 idp->layers = 0;
539 if (to_free)
540 free_layer(idp, to_free);
541 } else
542 idr_remove_warning(id);
543}
544
545/**
546 * idr_remove - remove the given id and free its slot
547 * @idp: idr handle
548 * @id: unique key
549 */
550void idr_remove(struct idr *idp, int id)
551{
552 struct idr_layer *p;
553 struct idr_layer *to_free;
554
555 if (id < 0)
556 return;
557
558 if (id > idr_max(idp->layers)) {
559 idr_remove_warning(id);
560 return;
561 }
562
563 sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
564 if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
565 idp->top->ary[0]) {
566 /*
567 * Single child at leftmost slot: we can shrink the tree.
568 * This level is not needed anymore since when layers are
569 * inserted, they are inserted at the top of the existing
570 * tree.
571 */
572 to_free = idp->top;
573 p = idp->top->ary[0];
574 rcu_assign_pointer(idp->top, p);
575 --idp->layers;
576 to_free->count = 0;
577 bitmap_clear(to_free->bitmap, 0, IDR_SIZE);
578 free_layer(idp, to_free);
579 }
580}
581EXPORT_SYMBOL(idr_remove);
582
583static void __idr_remove_all(struct idr *idp)
584{
585 int n, id, max;
586 int bt_mask;
587 struct idr_layer *p;
588 struct idr_layer *pa[MAX_IDR_LEVEL + 1];
589 struct idr_layer **paa = &pa[0];
590
591 n = idp->layers * IDR_BITS;
592 *paa = idp->top;
593 RCU_INIT_POINTER(idp->top, NULL);
594 max = idr_max(idp->layers);
595
596 id = 0;
597 while (id >= 0 && id <= max) {
598 p = *paa;
599 while (n > IDR_BITS && p) {
600 n -= IDR_BITS;
601 p = p->ary[(id >> n) & IDR_MASK];
602 *++paa = p;
603 }
604
605 bt_mask = id;
606 id += 1 << n;
607 /* Get the highest bit that the above add changed from 0->1. */
608 while (n < fls(id ^ bt_mask)) {
609 if (*paa)
610 free_layer(idp, *paa);
611 n += IDR_BITS;
612 --paa;
613 }
614 }
615 idp->layers = 0;
616}
617
618/**
619 * idr_destroy - release all cached layers within an idr tree
620 * @idp: idr handle
621 *
622 * Free all id mappings and all idp_layers. After this function, @idp is
623 * completely unused and can be freed / recycled. The caller is
624 * responsible for ensuring that no one else accesses @idp during or after
625 * idr_destroy().
626 * 56 *
627 * A typical clean-up sequence for objects stored in an idr tree will use 57 * Allocates an ID larger than the last ID allocated if one is available.
628 * idr_for_each() to free all objects, if necessary, then idr_destroy() to 58 * If not, it will attempt to allocate the smallest ID that is larger or
629 * free up the id mappings and cached idr_layers. 59 * equal to @start.
630 */ 60 */
631void idr_destroy(struct idr *idp) 61int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
632{
633 __idr_remove_all(idp);
634
635 while (idp->id_free_cnt) {
636 struct idr_layer *p = get_from_free_list(idp);
637 kmem_cache_free(idr_layer_cache, p);
638 }
639}
640EXPORT_SYMBOL(idr_destroy);
641
642void *idr_find_slowpath(struct idr *idp, int id)
643{ 62{
644 int n; 63 int id, curr = idr->idr_next;
645 struct idr_layer *p;
646 64
647 if (id < 0) 65 if (curr < start)
648 return NULL; 66 curr = start;
649 67
650 p = rcu_dereference_raw(idp->top); 68 id = idr_alloc(idr, ptr, curr, end, gfp);
651 if (!p) 69 if ((id == -ENOSPC) && (curr > start))
652 return NULL; 70 id = idr_alloc(idr, ptr, start, curr, gfp);
653 n = (p->layer+1) * IDR_BITS;
654 71
655 if (id > idr_max(p->layer + 1)) 72 if (id >= 0)
656 return NULL; 73 idr->idr_next = id + 1U;
657 BUG_ON(n == 0);
658 74
659 while (n > 0 && p) { 75 return id;
660 n -= IDR_BITS;
661 BUG_ON(n != p->layer*IDR_BITS);
662 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
663 }
664 return((void *)p);
665} 76}
666EXPORT_SYMBOL(idr_find_slowpath); 77EXPORT_SYMBOL(idr_alloc_cyclic);
667 78
668/** 79/**
669 * idr_for_each - iterate through all stored pointers 80 * idr_for_each - iterate through all stored pointers
670 * @idp: idr handle 81 * @idr: idr handle
671 * @fn: function to be called for each pointer 82 * @fn: function to be called for each pointer
672 * @data: data passed back to callback function 83 * @data: data passed to callback function
673 * 84 *
674 * Iterate over the pointers registered with the given idr. The 85 * The callback function will be called for each entry in @idr, passing
675 * callback function will be called for each pointer currently 86 * the id, the pointer and the data pointer passed to this function.
676 * registered, passing the id, the pointer and the data pointer passed
677 * to this function. It is not safe to modify the idr tree while in
678 * the callback, so functions such as idr_get_new and idr_remove are
679 * not allowed.
680 * 87 *
681 * We check the return of @fn each time. If it returns anything other 88 * If @fn returns anything other than %0, the iteration stops and that
682 * than %0, we break out and return that value. 89 * value is returned from this function.
683 * 90 *
684 * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove(). 91 * idr_for_each() can be called concurrently with idr_alloc() and
92 * idr_remove() if protected by RCU. Newly added entries may not be
93 * seen and deleted entries may be seen, but adding and removing entries
94 * will not cause other entries to be skipped, nor spurious ones to be seen.
685 */ 95 */
686int idr_for_each(struct idr *idp, 96int idr_for_each(const struct idr *idr,
687 int (*fn)(int id, void *p, void *data), void *data) 97 int (*fn)(int id, void *p, void *data), void *data)
688{ 98{
689 int n, id, max, error = 0; 99 struct radix_tree_iter iter;
690 struct idr_layer *p; 100 void **slot;
691 struct idr_layer *pa[MAX_IDR_LEVEL + 1];
692 struct idr_layer **paa = &pa[0];
693
694 n = idp->layers * IDR_BITS;
695 *paa = rcu_dereference_raw(idp->top);
696 max = idr_max(idp->layers);
697
698 id = 0;
699 while (id >= 0 && id <= max) {
700 p = *paa;
701 while (n > 0 && p) {
702 n -= IDR_BITS;
703 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
704 *++paa = p;
705 }
706
707 if (p) {
708 error = fn(id, (void *)p, data);
709 if (error)
710 break;
711 }
712 101
713 id += 1 << n; 102 radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
714 while (n < fls(id)) { 103 int ret = fn(iter.index, rcu_dereference_raw(*slot), data);
715 n += IDR_BITS; 104 if (ret)
716 --paa; 105 return ret;
717 }
718 } 106 }
719 107
720 return error; 108 return 0;
721} 109}
722EXPORT_SYMBOL(idr_for_each); 110EXPORT_SYMBOL(idr_for_each);
723 111
724/** 112/**
725 * idr_get_next - lookup next object of id to given id. 113 * idr_get_next - Find next populated entry
726 * @idp: idr handle 114 * @idr: idr handle
727 * @nextidp: pointer to lookup key 115 * @nextid: Pointer to lowest possible ID to return
728 * 116 *
729 * Returns pointer to registered object with id, which is next number to 117 * Returns the next populated entry in the tree with an ID greater than
730 * given id. After being looked up, *@nextidp will be updated for the next 118 * or equal to the value pointed to by @nextid. On exit, @nextid is updated
731 * iteration. 119 * to the ID of the found value. To use in a loop, the value pointed to by
732 * 120 * nextid must be incremented by the user.
733 * This function can be called under rcu_read_lock(), given that the leaf
734 * pointers lifetimes are correctly managed.
735 */ 121 */
736void *idr_get_next(struct idr *idp, int *nextidp) 122void *idr_get_next(struct idr *idr, int *nextid)
737{ 123{
738 struct idr_layer *p, *pa[MAX_IDR_LEVEL + 1]; 124 struct radix_tree_iter iter;
739 struct idr_layer **paa = &pa[0]; 125 void **slot;
740 int id = *nextidp;
741 int n, max;
742 126
743 /* find first ent */ 127 slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
744 p = *paa = rcu_dereference_raw(idp->top); 128 if (!slot)
745 if (!p)
746 return NULL; 129 return NULL;
747 n = (p->layer + 1) * IDR_BITS;
748 max = idr_max(p->layer + 1);
749
750 while (id >= 0 && id <= max) {
751 p = *paa;
752 while (n > 0 && p) {
753 n -= IDR_BITS;
754 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
755 *++paa = p;
756 }
757 130
758 if (p) { 131 *nextid = iter.index;
759 *nextidp = id; 132 return rcu_dereference_raw(*slot);
760 return p;
761 }
762
763 /*
764 * Proceed to the next layer at the current level. Unlike
765 * idr_for_each(), @id isn't guaranteed to be aligned to
766 * layer boundary at this point and adding 1 << n may
767 * incorrectly skip IDs. Make sure we jump to the
768 * beginning of the next layer using round_up().
769 */
770 id = round_up(id + 1, 1 << n);
771 while (n < fls(id)) {
772 n += IDR_BITS;
773 --paa;
774 }
775 }
776 return NULL;
777} 133}
778EXPORT_SYMBOL(idr_get_next); 134EXPORT_SYMBOL(idr_get_next);
779 135
780
781/** 136/**
782 * idr_replace - replace pointer for given id 137 * idr_replace - replace pointer for given id
783 * @idp: idr handle 138 * @idr: idr handle
784 * @ptr: pointer you want associated with the id 139 * @ptr: New pointer to associate with the ID
785 * @id: lookup key 140 * @id: Lookup key
786 * 141 *
787 * Replace the pointer registered with an id and return the old value. 142 * Replace the pointer registered with an ID and return the old value.
788 * A %-ENOENT return indicates that @id was not found. 143 * This function can be called under the RCU read lock concurrently with
789 * A %-EINVAL return indicates that @id was not within valid constraints. 144 * idr_alloc() and idr_remove() (as long as the ID being removed is not
145 * the one being replaced!).
790 * 146 *
791 * The caller must serialize with writers. 147 * Returns: 0 on success. %-ENOENT indicates that @id was not found.
148 * %-EINVAL indicates that @id or @ptr were not valid.
792 */ 149 */
793void *idr_replace(struct idr *idp, void *ptr, int id) 150void *idr_replace(struct idr *idr, void *ptr, int id)
794{ 151{
795 int n; 152 struct radix_tree_node *node;
796 struct idr_layer *p, *old_p; 153 void **slot = NULL;
154 void *entry;
797 155
798 if (id < 0) 156 if (WARN_ON_ONCE(id < 0))
157 return ERR_PTR(-EINVAL);
158 if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
799 return ERR_PTR(-EINVAL); 159 return ERR_PTR(-EINVAL);
800 160
801 p = idp->top; 161 entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
802 if (!p) 162 if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
803 return ERR_PTR(-ENOENT);
804
805 if (id > idr_max(p->layer + 1))
806 return ERR_PTR(-ENOENT);
807
808 n = p->layer * IDR_BITS;
809 while ((n > 0) && p) {
810 p = p->ary[(id >> n) & IDR_MASK];
811 n -= IDR_BITS;
812 }
813
814 n = id & IDR_MASK;
815 if (unlikely(p == NULL || !test_bit(n, p->bitmap)))
816 return ERR_PTR(-ENOENT); 163 return ERR_PTR(-ENOENT);
817 164
818 old_p = p->ary[n]; 165 __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL, NULL);
819 rcu_assign_pointer(p->ary[n], ptr);
820 166
821 return old_p; 167 return entry;
822} 168}
823EXPORT_SYMBOL(idr_replace); 169EXPORT_SYMBOL(idr_replace);
824 170
825void __init idr_init_cache(void)
826{
827 idr_layer_cache = kmem_cache_create("idr_layer_cache",
828 sizeof(struct idr_layer), 0, SLAB_PANIC, NULL);
829}
830
831/**
832 * idr_init - initialize idr handle
833 * @idp: idr handle
834 *
835 * This function is use to set up the handle (@idp) that you will pass
836 * to the rest of the functions.
837 */
838void idr_init(struct idr *idp)
839{
840 memset(idp, 0, sizeof(struct idr));
841 spin_lock_init(&idp->lock);
842}
843EXPORT_SYMBOL(idr_init);
844
845static int idr_has_entry(int id, void *p, void *data)
846{
847 return 1;
848}
849
850bool idr_is_empty(struct idr *idp)
851{
852 return !idr_for_each(idp, idr_has_entry, NULL);
853}
854EXPORT_SYMBOL(idr_is_empty);
855
856/** 171/**
857 * DOC: IDA description 172 * DOC: IDA description
858 * IDA - IDR based ID allocator
859 *
860 * This is id allocator without id -> pointer translation. Memory
861 * usage is much lower than full blown idr because each id only
862 * occupies a bit. ida uses a custom leaf node which contains
863 * IDA_BITMAP_BITS slots.
864 * 173 *
865 * 2007-04-25 written by Tejun Heo <htejun@gmail.com> 174 * The IDA is an ID allocator which does not provide the ability to
175 * associate an ID with a pointer. As such, it only needs to store one
176 * bit per ID, and so is more space efficient than an IDR. To use an IDA,
177 * define it using DEFINE_IDA() (or embed a &struct ida in a data structure,
178 * then initialise it using ida_init()). To allocate a new ID, call
179 * ida_simple_get(). To free an ID, call ida_simple_remove().
180 *
181 * If you have more complex locking requirements, use a loop around
182 * ida_pre_get() and ida_get_new() to allocate a new ID. Then use
183 * ida_remove() to free an ID. You must make sure that ida_get_new() and
184 * ida_remove() cannot be called at the same time as each other for the
185 * same IDA.
186 *
187 * You can also use ida_get_new_above() if you need an ID to be allocated
188 * above a particular number. ida_destroy() can be used to dispose of an
189 * IDA without needing to free the individual IDs in it. You can use
190 * ida_is_empty() to find out whether the IDA has any IDs currently allocated.
191 *
192 * IDs are currently limited to the range [0-INT_MAX]. If this is an awkward
193 * limitation, it should be quite straightforward to raise the maximum.
866 */ 194 */
867 195
868static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
869{
870 unsigned long flags;
871
872 if (!ida->free_bitmap) {
873 spin_lock_irqsave(&ida->idr.lock, flags);
874 if (!ida->free_bitmap) {
875 ida->free_bitmap = bitmap;
876 bitmap = NULL;
877 }
878 spin_unlock_irqrestore(&ida->idr.lock, flags);
879 }
880
881 kfree(bitmap);
882}
883
884/** 196/**
885 * ida_pre_get - reserve resources for ida allocation 197 * ida_pre_get - reserve resources for ida allocation
886 * @ida: ida handle 198 * @ida: ida handle
887 * @gfp_mask: memory allocation flag 199 * @gfp: memory allocation flags
888 *
889 * This function should be called prior to locking and calling the
890 * following function. It preallocates enough memory to satisfy the
891 * worst possible allocation.
892 * 200 *
893 * If the system is REALLY out of memory this function returns %0, 201 * This function should be called before calling ida_get_new_above(). If it
894 * otherwise %1. 202 * is unable to allocate memory, it will return %0. On success, it returns %1.
895 */ 203 */
896int ida_pre_get(struct ida *ida, gfp_t gfp_mask) 204int ida_pre_get(struct ida *ida, gfp_t gfp)
897{ 205{
898 /* allocate idr_layers */ 206 struct ida_bitmap *bitmap;
899 if (!__idr_pre_get(&ida->idr, gfp_mask))
900 return 0;
901 207
902 /* allocate free_bitmap */ 208 /*
903 if (!ida->free_bitmap) { 209 * This looks weird, but the IDA API has no preload_end() equivalent.
904 struct ida_bitmap *bitmap; 210 * Instead, ida_get_new() can return -EAGAIN, prompting the caller
211 * to return to the ida_pre_get() step.
212 */
213 idr_preload(gfp);
214 idr_preload_end();
905 215
906 bitmap = kmalloc(sizeof(struct ida_bitmap), gfp_mask); 216 if (!ida->free_bitmap) {
217 bitmap = kmalloc(sizeof(struct ida_bitmap), gfp);
907 if (!bitmap) 218 if (!bitmap)
908 return 0; 219 return 0;
909 220 bitmap = xchg(&ida->free_bitmap, bitmap);
910 free_bitmap(ida, bitmap); 221 kfree(bitmap);
911 } 222 }
912 223
913 return 1; 224 return 1;
914} 225}
915EXPORT_SYMBOL(ida_pre_get); 226EXPORT_SYMBOL(ida_pre_get);
916 227
228#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS)
229
917/** 230/**
918 * ida_get_new_above - allocate new ID above or equal to a start id 231 * ida_get_new_above - allocate new ID above or equal to a start id
919 * @ida: ida handle 232 * @ida: ida handle
920 * @starting_id: id to start search at 233 * @start: id to start search at
921 * @p_id: pointer to the allocated handle 234 * @id: pointer to the allocated handle
922 * 235 *
923 * Allocate new ID above or equal to @starting_id. It should be called 236 * Allocate new ID above or equal to @start. It should be called
924 * with any required locks. 237 * with any required locks to ensure that concurrent calls to
238 * ida_get_new_above() / ida_get_new() / ida_remove() are not allowed.
239 * Consider using ida_simple_get() if you do not have complex locking
240 * requirements.
925 * 241 *
926 * If memory is required, it will return %-EAGAIN, you should unlock 242 * If memory is required, it will return %-EAGAIN, you should unlock
927 * and go back to the ida_pre_get() call. If the ida is full, it will 243 * and go back to the ida_pre_get() call. If the ida is full, it will
928 * return %-ENOSPC. 244 * return %-ENOSPC. On success, it will return 0.
929 *
930 * Note that callers must ensure that concurrent access to @ida is not possible.
931 * See ida_simple_get() for a varaint which takes care of locking.
932 * 245 *
933 * @p_id returns a value in the range @starting_id ... %0x7fffffff. 246 * @id returns a value in the range @start ... %0x7fffffff.
934 */ 247 */
935int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) 248int ida_get_new_above(struct ida *ida, int start, int *id)
936{ 249{
937 struct idr_layer *pa[MAX_IDR_LEVEL + 1]; 250 struct radix_tree_root *root = &ida->ida_rt;
251 void **slot;
252 struct radix_tree_iter iter;
938 struct ida_bitmap *bitmap; 253 struct ida_bitmap *bitmap;
939 unsigned long flags; 254 unsigned long index;
940 int idr_id = starting_id / IDA_BITMAP_BITS; 255 unsigned bit;
941 int offset = starting_id % IDA_BITMAP_BITS; 256 int new;
942 int t, id; 257
943 258 index = start / IDA_BITMAP_BITS;
944 restart: 259 bit = start % IDA_BITMAP_BITS;
945 /* get vacant slot */ 260
946 t = idr_get_empty_slot(&ida->idr, idr_id, pa, 0, &ida->idr); 261 slot = radix_tree_iter_init(&iter, index);
947 if (t < 0) 262 for (;;) {
948 return t == -ENOMEM ? -EAGAIN : t; 263 if (slot)
949 264 slot = radix_tree_next_slot(slot, &iter,
950 if (t * IDA_BITMAP_BITS >= MAX_IDR_BIT) 265 RADIX_TREE_ITER_TAGGED);
951 return -ENOSPC; 266 if (!slot) {
952 267 slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX);
953 if (t != idr_id) 268 if (IS_ERR(slot)) {
954 offset = 0; 269 if (slot == ERR_PTR(-ENOMEM))
955 idr_id = t; 270 return -EAGAIN;
956 271 return PTR_ERR(slot);
957 /* if bitmap isn't there, create a new one */ 272 }
958 bitmap = (void *)pa[0]->ary[idr_id & IDR_MASK]; 273 }
959 if (!bitmap) { 274 if (iter.index > index)
960 spin_lock_irqsave(&ida->idr.lock, flags); 275 bit = 0;
961 bitmap = ida->free_bitmap; 276 new = iter.index * IDA_BITMAP_BITS;
962 ida->free_bitmap = NULL; 277 bitmap = rcu_dereference_raw(*slot);
963 spin_unlock_irqrestore(&ida->idr.lock, flags); 278 if (bitmap) {
964 279 bit = find_next_zero_bit(bitmap->bitmap,
965 if (!bitmap) 280 IDA_BITMAP_BITS, bit);
966 return -EAGAIN; 281 new += bit;
967 282 if (new < 0)
968 memset(bitmap, 0, sizeof(struct ida_bitmap)); 283 return -ENOSPC;
969 rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK], 284 if (bit == IDA_BITMAP_BITS)
970 (void *)bitmap); 285 continue;
971 pa[0]->count++;
972 }
973
974 /* lookup for empty slot */
975 t = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, offset);
976 if (t == IDA_BITMAP_BITS) {
977 /* no empty slot after offset, continue to the next chunk */
978 idr_id++;
979 offset = 0;
980 goto restart;
981 }
982
983 id = idr_id * IDA_BITMAP_BITS + t;
984 if (id >= MAX_IDR_BIT)
985 return -ENOSPC;
986
987 __set_bit(t, bitmap->bitmap);
988 if (++bitmap->nr_busy == IDA_BITMAP_BITS)
989 idr_mark_full(pa, idr_id);
990 286
991 *p_id = id; 287 __set_bit(bit, bitmap->bitmap);
288 if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
289 radix_tree_iter_tag_clear(root, &iter,
290 IDR_FREE);
291 } else {
292 new += bit;
293 if (new < 0)
294 return -ENOSPC;
295 bitmap = ida->free_bitmap;
296 if (!bitmap)
297 return -EAGAIN;
298 ida->free_bitmap = NULL;
299 memset(bitmap, 0, sizeof(*bitmap));
300 __set_bit(bit, bitmap->bitmap);
301 radix_tree_iter_replace(root, &iter, slot, bitmap);
302 }
992 303
993 /* Each leaf node can handle nearly a thousand slots and the 304 *id = new;
994 * whole idea of ida is to have small memory foot print. 305 return 0;
995 * Throw away extra resources one by one after each successful
996 * allocation.
997 */
998 if (ida->idr.id_free_cnt || ida->free_bitmap) {
999 struct idr_layer *p = get_from_free_list(&ida->idr);
1000 if (p)
1001 kmem_cache_free(idr_layer_cache, p);
1002 } 306 }
1003
1004 return 0;
1005} 307}
1006EXPORT_SYMBOL(ida_get_new_above); 308EXPORT_SYMBOL(ida_get_new_above);
1007 309
1008/** 310/**
1009 * ida_remove - remove the given ID 311 * ida_remove - Free the given ID
1010 * @ida: ida handle 312 * @ida: ida handle
1011 * @id: ID to free 313 * @id: ID to free
314 *
315 * This function should not be called at the same time as ida_get_new_above().
1012 */ 316 */
1013void ida_remove(struct ida *ida, int id) 317void ida_remove(struct ida *ida, int id)
1014{ 318{
1015 struct idr_layer *p = ida->idr.top; 319 unsigned long index = id / IDA_BITMAP_BITS;
1016 int shift = (ida->idr.layers - 1) * IDR_BITS; 320 unsigned offset = id % IDA_BITMAP_BITS;
1017 int idr_id = id / IDA_BITMAP_BITS;
1018 int offset = id % IDA_BITMAP_BITS;
1019 int n;
1020 struct ida_bitmap *bitmap; 321 struct ida_bitmap *bitmap;
322 struct radix_tree_iter iter;
323 void **slot;
1021 324
1022 if (idr_id > idr_max(ida->idr.layers)) 325 slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index);
326 if (!slot)
1023 goto err; 327 goto err;
1024 328
1025 /* clear full bits while looking up the leaf idr_layer */ 329 bitmap = rcu_dereference_raw(*slot);
1026 while ((shift > 0) && p) { 330 if (!test_bit(offset, bitmap->bitmap))
1027 n = (idr_id >> shift) & IDR_MASK;
1028 __clear_bit(n, p->bitmap);
1029 p = p->ary[n];
1030 shift -= IDR_BITS;
1031 }
1032
1033 if (p == NULL)
1034 goto err;
1035
1036 n = idr_id & IDR_MASK;
1037 __clear_bit(n, p->bitmap);
1038
1039 bitmap = (void *)p->ary[n];
1040 if (!bitmap || !test_bit(offset, bitmap->bitmap))
1041 goto err; 331 goto err;
1042 332
1043 /* update bitmap and remove it if empty */
1044 __clear_bit(offset, bitmap->bitmap); 333 __clear_bit(offset, bitmap->bitmap);
1045 if (--bitmap->nr_busy == 0) { 334 radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
1046 __set_bit(n, p->bitmap); /* to please idr_remove() */ 335 if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
1047 idr_remove(&ida->idr, idr_id); 336 kfree(bitmap);
1048 free_bitmap(ida, bitmap); 337 radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
1049 } 338 }
1050
1051 return; 339 return;
1052
1053 err: 340 err:
1054 WARN(1, "ida_remove called for id=%d which is not allocated.\n", id); 341 WARN(1, "ida_remove called for id=%d which is not allocated.\n", id);
1055} 342}
1056EXPORT_SYMBOL(ida_remove); 343EXPORT_SYMBOL(ida_remove);
1057 344
1058/** 345/**
1059 * ida_destroy - release all cached layers within an ida tree 346 * ida_destroy - Free the contents of an ida
1060 * @ida: ida handle 347 * @ida: ida handle
348 *
349 * Calling this function releases all resources associated with an IDA. When
350 * this call returns, the IDA is empty and can be reused or freed. The caller
351 * should not allow ida_remove() or ida_get_new_above() to be called at the
352 * same time.
1061 */ 353 */
1062void ida_destroy(struct ida *ida) 354void ida_destroy(struct ida *ida)
1063{ 355{
1064 idr_destroy(&ida->idr); 356 struct radix_tree_iter iter;
357 void **slot;
358
359 radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
360 struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
361 kfree(bitmap);
362 radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
363 }
364
1065 kfree(ida->free_bitmap); 365 kfree(ida->free_bitmap);
366 ida->free_bitmap = NULL;
1066} 367}
1067EXPORT_SYMBOL(ida_destroy); 368EXPORT_SYMBOL(ida_destroy);
1068 369
@@ -1141,18 +442,3 @@ void ida_simple_remove(struct ida *ida, unsigned int id)
1141 spin_unlock_irqrestore(&simple_ida_lock, flags); 442 spin_unlock_irqrestore(&simple_ida_lock, flags);
1142} 443}
1143EXPORT_SYMBOL(ida_simple_remove); 444EXPORT_SYMBOL(ida_simple_remove);
1144
1145/**
1146 * ida_init - initialize ida handle
1147 * @ida: ida handle
1148 *
1149 * This function is use to set up the handle (@ida) that you will pass
1150 * to the rest of the functions.
1151 */
1152void ida_init(struct ida *ida)
1153{
1154 memset(ida, 0, sizeof(struct ida));
1155 idr_init(&ida->idr);
1156
1157}
1158EXPORT_SYMBOL(ida_init);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7bf7d4e60e43..eaea14b8f2ca 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -22,20 +22,21 @@
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */ 23 */
24 24
25#include <linux/bitmap.h>
26#include <linux/bitops.h>
25#include <linux/cpu.h> 27#include <linux/cpu.h>
26#include <linux/errno.h> 28#include <linux/errno.h>
29#include <linux/export.h>
30#include <linux/idr.h>
27#include <linux/init.h> 31#include <linux/init.h>
28#include <linux/kernel.h> 32#include <linux/kernel.h>
29#include <linux/export.h> 33#include <linux/kmemleak.h>
30#include <linux/radix-tree.h>
31#include <linux/percpu.h> 34#include <linux/percpu.h>
35#include <linux/preempt.h> /* in_interrupt() */
36#include <linux/radix-tree.h>
37#include <linux/rcupdate.h>
32#include <linux/slab.h> 38#include <linux/slab.h>
33#include <linux/kmemleak.h>
34#include <linux/cpu.h>
35#include <linux/string.h> 39#include <linux/string.h>
36#include <linux/bitops.h>
37#include <linux/rcupdate.h>
38#include <linux/preempt.h> /* in_interrupt() */
39 40
40 41
41/* Number of nodes in fully populated tree of given height */ 42/* Number of nodes in fully populated tree of given height */
@@ -60,6 +61,15 @@ static struct kmem_cache *radix_tree_node_cachep;
60#define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) 61#define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
61 62
62/* 63/*
64 * The IDR does not have to be as high as the radix tree since it uses
65 * signed integers, not unsigned longs.
66 */
67#define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1)
68#define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \
69 RADIX_TREE_MAP_SHIFT))
70#define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1)
71
72/*
63 * Per-cpu pool of preloaded nodes 73 * Per-cpu pool of preloaded nodes
64 */ 74 */
65struct radix_tree_preload { 75struct radix_tree_preload {
@@ -149,27 +159,32 @@ static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
149 159
150static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) 160static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
151{ 161{
152 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); 162 root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
153} 163}
154 164
155static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) 165static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
156{ 166{
157 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); 167 root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
158} 168}
159 169
160static inline void root_tag_clear_all(struct radix_tree_root *root) 170static inline void root_tag_clear_all(struct radix_tree_root *root)
161{ 171{
162 root->gfp_mask &= __GFP_BITS_MASK; 172 root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1;
163} 173}
164 174
165static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) 175static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
166{ 176{
167 return (__force int)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); 177 return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT));
168} 178}
169 179
170static inline unsigned root_tags_get(const struct radix_tree_root *root) 180static inline unsigned root_tags_get(const struct radix_tree_root *root)
171{ 181{
172 return (__force unsigned)root->gfp_mask >> __GFP_BITS_SHIFT; 182 return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT;
183}
184
185static inline bool is_idr(const struct radix_tree_root *root)
186{
187 return !!(root->gfp_mask & ROOT_IS_IDR);
173} 188}
174 189
175/* 190/*
@@ -187,6 +202,11 @@ static inline int any_tag_set(const struct radix_tree_node *node,
187 return 0; 202 return 0;
188} 203}
189 204
205static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
206{
207 bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
208}
209
190/** 210/**
191 * radix_tree_find_next_bit - find the next set bit in a memory region 211 * radix_tree_find_next_bit - find the next set bit in a memory region
192 * 212 *
@@ -240,6 +260,13 @@ static inline unsigned long node_maxindex(const struct radix_tree_node *node)
240 return shift_maxindex(node->shift); 260 return shift_maxindex(node->shift);
241} 261}
242 262
263static unsigned long next_index(unsigned long index,
264 const struct radix_tree_node *node,
265 unsigned long offset)
266{
267 return (index & ~node_maxindex(node)) + (offset << node->shift);
268}
269
243#ifndef __KERNEL__ 270#ifndef __KERNEL__
244static void dump_node(struct radix_tree_node *node, unsigned long index) 271static void dump_node(struct radix_tree_node *node, unsigned long index)
245{ 272{
@@ -278,11 +305,52 @@ static void radix_tree_dump(struct radix_tree_root *root)
278{ 305{
279 pr_debug("radix root: %p rnode %p tags %x\n", 306 pr_debug("radix root: %p rnode %p tags %x\n",
280 root, root->rnode, 307 root, root->rnode,
281 root->gfp_mask >> __GFP_BITS_SHIFT); 308 root->gfp_mask >> ROOT_TAG_SHIFT);
282 if (!radix_tree_is_internal_node(root->rnode)) 309 if (!radix_tree_is_internal_node(root->rnode))
283 return; 310 return;
284 dump_node(entry_to_node(root->rnode), 0); 311 dump_node(entry_to_node(root->rnode), 0);
285} 312}
313
314static void dump_ida_node(void *entry, unsigned long index)
315{
316 unsigned long i;
317
318 if (!entry)
319 return;
320
321 if (radix_tree_is_internal_node(entry)) {
322 struct radix_tree_node *node = entry_to_node(entry);
323
324 pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n",
325 node, node->offset, index * IDA_BITMAP_BITS,
326 ((index | node_maxindex(node)) + 1) *
327 IDA_BITMAP_BITS - 1,
328 node->parent, node->tags[0][0], node->shift,
329 node->count);
330 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
331 dump_ida_node(node->slots[i],
332 index | (i << node->shift));
333 } else {
334 struct ida_bitmap *bitmap = entry;
335
336 pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap,
337 (int)(index & RADIX_TREE_MAP_MASK),
338 index * IDA_BITMAP_BITS,
339 (index + 1) * IDA_BITMAP_BITS - 1);
340 for (i = 0; i < IDA_BITMAP_LONGS; i++)
341 pr_cont(" %lx", bitmap->bitmap[i]);
342 pr_cont("\n");
343 }
344}
345
346static void ida_dump(struct ida *ida)
347{
348 struct radix_tree_root *root = &ida->ida_rt;
349 pr_debug("ida: %p %p free %d bitmap %p\n", ida, root->rnode,
350 root->gfp_mask >> ROOT_TAG_SHIFT,
351 ida->free_bitmap);
352 dump_ida_node(root->rnode, 0);
353}
286#endif 354#endif
287 355
288/* 356/*
@@ -290,13 +358,11 @@ static void radix_tree_dump(struct radix_tree_root *root)
290 * that the caller has pinned this thread of control to the current CPU. 358 * that the caller has pinned this thread of control to the current CPU.
291 */ 359 */
292static struct radix_tree_node * 360static struct radix_tree_node *
293radix_tree_node_alloc(struct radix_tree_root *root, 361radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
294 struct radix_tree_node *parent,
295 unsigned int shift, unsigned int offset, 362 unsigned int shift, unsigned int offset,
296 unsigned int count, unsigned int exceptional) 363 unsigned int count, unsigned int exceptional)
297{ 364{
298 struct radix_tree_node *ret = NULL; 365 struct radix_tree_node *ret = NULL;
299 gfp_t gfp_mask = root_gfp_mask(root);
300 366
301 /* 367 /*
302 * Preload code isn't irq safe and it doesn't make sense to use 368 * Preload code isn't irq safe and it doesn't make sense to use
@@ -533,7 +599,7 @@ static unsigned radix_tree_load_root(const struct radix_tree_root *root,
533/* 599/*
534 * Extend a radix tree so it can store key @index. 600 * Extend a radix tree so it can store key @index.
535 */ 601 */
536static int radix_tree_extend(struct radix_tree_root *root, 602static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
537 unsigned long index, unsigned int shift) 603 unsigned long index, unsigned int shift)
538{ 604{
539 struct radix_tree_node *slot; 605 struct radix_tree_node *slot;
@@ -546,19 +612,27 @@ static int radix_tree_extend(struct radix_tree_root *root,
546 maxshift += RADIX_TREE_MAP_SHIFT; 612 maxshift += RADIX_TREE_MAP_SHIFT;
547 613
548 slot = root->rnode; 614 slot = root->rnode;
549 if (!slot) 615 if (!slot && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
550 goto out; 616 goto out;
551 617
552 do { 618 do {
553 struct radix_tree_node *node = radix_tree_node_alloc(root, 619 struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
554 NULL, shift, 0, 1, 0); 620 shift, 0, 1, 0);
555 if (!node) 621 if (!node)
556 return -ENOMEM; 622 return -ENOMEM;
557 623
558 /* Propagate the aggregated tag info into the new root */ 624 if (is_idr(root)) {
559 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 625 all_tag_set(node, IDR_FREE);
560 if (root_tag_get(root, tag)) 626 if (!root_tag_get(root, IDR_FREE)) {
561 tag_set(node, tag, 0); 627 tag_clear(node, IDR_FREE, 0);
628 root_tag_set(root, IDR_FREE);
629 }
630 } else {
631 /* Propagate the aggregated tag info to the new child */
632 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
633 if (root_tag_get(root, tag))
634 tag_set(node, tag, 0);
635 }
562 } 636 }
563 637
564 BUG_ON(shift > BITS_PER_LONG); 638 BUG_ON(shift > BITS_PER_LONG);
@@ -619,6 +693,8 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
619 * one (root->rnode) as far as dependent read barriers go. 693 * one (root->rnode) as far as dependent read barriers go.
620 */ 694 */
621 root->rnode = child; 695 root->rnode = child;
696 if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
697 root_tag_clear(root, IDR_FREE);
622 698
623 /* 699 /*
624 * We have a dilemma here. The node's slot[0] must not be 700 * We have a dilemma here. The node's slot[0] must not be
@@ -674,7 +750,12 @@ static bool delete_node(struct radix_tree_root *root,
674 parent->slots[node->offset] = NULL; 750 parent->slots[node->offset] = NULL;
675 parent->count--; 751 parent->count--;
676 } else { 752 } else {
677 root_tag_clear_all(root); 753 /*
754 * Shouldn't the tags already have all been cleared
755 * by the caller?
756 */
757 if (!is_idr(root))
758 root_tag_clear_all(root);
678 root->rnode = NULL; 759 root->rnode = NULL;
679 } 760 }
680 761
@@ -714,6 +795,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
714 unsigned long maxindex; 795 unsigned long maxindex;
715 unsigned int shift, offset = 0; 796 unsigned int shift, offset = 0;
716 unsigned long max = index | ((1UL << order) - 1); 797 unsigned long max = index | ((1UL << order) - 1);
798 gfp_t gfp = root_gfp_mask(root);
717 799
718 shift = radix_tree_load_root(root, &child, &maxindex); 800 shift = radix_tree_load_root(root, &child, &maxindex);
719 801
@@ -721,7 +803,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
721 if (order > 0 && max == ((1UL << order) - 1)) 803 if (order > 0 && max == ((1UL << order) - 1))
722 max++; 804 max++;
723 if (max > maxindex) { 805 if (max > maxindex) {
724 int error = radix_tree_extend(root, max, shift); 806 int error = radix_tree_extend(root, gfp, max, shift);
725 if (error < 0) 807 if (error < 0)
726 return error; 808 return error;
727 shift = error; 809 shift = error;
@@ -732,7 +814,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
732 shift -= RADIX_TREE_MAP_SHIFT; 814 shift -= RADIX_TREE_MAP_SHIFT;
733 if (child == NULL) { 815 if (child == NULL) {
734 /* Have to add a child node. */ 816 /* Have to add a child node. */
735 child = radix_tree_node_alloc(root, node, shift, 817 child = radix_tree_node_alloc(gfp, node, shift,
736 offset, 0, 0); 818 offset, 0, 0);
737 if (!child) 819 if (!child)
738 return -ENOMEM; 820 return -ENOMEM;
@@ -755,7 +837,6 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
755 return 0; 837 return 0;
756} 838}
757 839
758#ifdef CONFIG_RADIX_TREE_MULTIORDER
759/* 840/*
760 * Free any nodes below this node. The tree is presumed to not need 841 * Free any nodes below this node. The tree is presumed to not need
761 * shrinking, and any user data in the tree is presumed to not need a 842 * shrinking, and any user data in the tree is presumed to not need a
@@ -791,6 +872,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
791 } 872 }
792} 873}
793 874
875#ifdef CONFIG_RADIX_TREE_MULTIORDER
794static inline int insert_entries(struct radix_tree_node *node, void **slot, 876static inline int insert_entries(struct radix_tree_node *node, void **slot,
795 void *item, unsigned order, bool replace) 877 void *item, unsigned order, bool replace)
796{ 878{
@@ -996,69 +1078,70 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
996} 1078}
997EXPORT_SYMBOL(radix_tree_lookup); 1079EXPORT_SYMBOL(radix_tree_lookup);
998 1080
999static inline int slot_count(struct radix_tree_node *node, 1081static inline void replace_sibling_entries(struct radix_tree_node *node,
1000 void **slot) 1082 void **slot, int count, int exceptional)
1001{ 1083{
1002 int n = 1;
1003#ifdef CONFIG_RADIX_TREE_MULTIORDER 1084#ifdef CONFIG_RADIX_TREE_MULTIORDER
1004 void *ptr = node_to_entry(slot); 1085 void *ptr = node_to_entry(slot);
1005 unsigned offset = get_slot_offset(node, slot); 1086 unsigned offset = get_slot_offset(node, slot) + 1;
1006 int i;
1007 1087
1008 for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { 1088 while (offset < RADIX_TREE_MAP_SIZE) {
1009 if (node->slots[offset + i] != ptr) 1089 if (node->slots[offset] != ptr)
1010 break; 1090 break;
1011 n++; 1091 if (count < 0) {
1092 node->slots[offset] = NULL;
1093 node->count--;
1094 }
1095 node->exceptional += exceptional;
1096 offset++;
1012 } 1097 }
1013#endif 1098#endif
1014 return n;
1015} 1099}
1016 1100
1017static void replace_slot(struct radix_tree_root *root, 1101static void replace_slot(void **slot, void *item, struct radix_tree_node *node,
1018 struct radix_tree_node *node, 1102 int count, int exceptional)
1019 void **slot, void *item,
1020 bool warn_typeswitch)
1021{ 1103{
1022 void *old = rcu_dereference_raw(*slot); 1104 if (WARN_ON_ONCE(radix_tree_is_internal_node(item)))
1023 int count, exceptional; 1105 return;
1024
1025 WARN_ON_ONCE(radix_tree_is_internal_node(item));
1026
1027 count = !!item - !!old;
1028 exceptional = !!radix_tree_exceptional_entry(item) -
1029 !!radix_tree_exceptional_entry(old);
1030
1031 WARN_ON_ONCE(warn_typeswitch && (count || exceptional));
1032 1106
1033 if (node) { 1107 if (node && (count || exceptional)) {
1034 node->count += count; 1108 node->count += count;
1035 if (exceptional) { 1109 node->exceptional += exceptional;
1036 exceptional *= slot_count(node, slot); 1110 replace_sibling_entries(node, slot, count, exceptional);
1037 node->exceptional += exceptional;
1038 }
1039 } 1111 }
1040 1112
1041 rcu_assign_pointer(*slot, item); 1113 rcu_assign_pointer(*slot, item);
1042} 1114}
1043 1115
1044static inline void delete_sibling_entries(struct radix_tree_node *node, 1116static bool node_tag_get(const struct radix_tree_root *root,
1045 void **slot) 1117 const struct radix_tree_node *node,
1118 unsigned int tag, unsigned int offset)
1046{ 1119{
1047#ifdef CONFIG_RADIX_TREE_MULTIORDER 1120 if (node)
1048 bool exceptional = radix_tree_exceptional_entry(*slot); 1121 return tag_get(node, tag, offset);
1049 void *ptr = node_to_entry(slot); 1122 return root_tag_get(root, tag);
1050 unsigned offset = get_slot_offset(node, slot); 1123}
1051 int i;
1052 1124
1053 for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { 1125/*
1054 if (node->slots[offset + i] != ptr) 1126 * IDR users want to be able to store NULL in the tree, so if the slot isn't
1055 break; 1127 * free, don't adjust the count, even if it's transitioning between NULL and
1056 node->slots[offset + i] = NULL; 1128 * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still
1057 node->count--; 1129 * have empty bits, but it only stores NULL in slots when they're being
1058 if (exceptional) 1130 * deleted.
1059 node->exceptional--; 1131 */
1132static int calculate_count(struct radix_tree_root *root,
1133 struct radix_tree_node *node, void **slot,
1134 void *item, void *old)
1135{
1136 if (is_idr(root)) {
1137 unsigned offset = get_slot_offset(node, slot);
1138 bool free = node_tag_get(root, node, IDR_FREE, offset);
1139 if (!free)
1140 return 0;
1141 if (!old)
1142 return 1;
1060 } 1143 }
1061#endif 1144 return !!item - !!old;
1062} 1145}
1063 1146
1064/** 1147/**
@@ -1078,15 +1161,19 @@ void __radix_tree_replace(struct radix_tree_root *root,
1078 void **slot, void *item, 1161 void **slot, void *item,
1079 radix_tree_update_node_t update_node, void *private) 1162 radix_tree_update_node_t update_node, void *private)
1080{ 1163{
1081 if (!item) 1164 void *old = rcu_dereference_raw(*slot);
1082 delete_sibling_entries(node, slot); 1165 int exceptional = !!radix_tree_exceptional_entry(item) -
1166 !!radix_tree_exceptional_entry(old);
1167 int count = calculate_count(root, node, slot, item, old);
1168
1083 /* 1169 /*
1084 * This function supports replacing exceptional entries and 1170 * This function supports replacing exceptional entries and
1085 * deleting entries, but that needs accounting against the 1171 * deleting entries, but that needs accounting against the
1086 * node unless the slot is root->rnode. 1172 * node unless the slot is root->rnode.
1087 */ 1173 */
1088 replace_slot(root, node, slot, item, 1174 WARN_ON_ONCE(!node && (slot != (void **)&root->rnode) &&
1089 !node && slot != (void **)&root->rnode); 1175 (count || exceptional));
1176 replace_slot(slot, item, node, count, exceptional);
1090 1177
1091 if (!node) 1178 if (!node)
1092 return; 1179 return;
@@ -1116,7 +1203,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
1116void radix_tree_replace_slot(struct radix_tree_root *root, 1203void radix_tree_replace_slot(struct radix_tree_root *root,
1117 void **slot, void *item) 1204 void **slot, void *item)
1118{ 1205{
1119 replace_slot(root, NULL, slot, item, true); 1206 __radix_tree_replace(root, NULL, slot, item, NULL, NULL);
1120} 1207}
1121 1208
1122/** 1209/**
@@ -1191,6 +1278,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
1191 void **slot; 1278 void **slot;
1192 unsigned int offset, end; 1279 unsigned int offset, end;
1193 unsigned n, tag, tags = 0; 1280 unsigned n, tag, tags = 0;
1281 gfp_t gfp = root_gfp_mask(root);
1194 1282
1195 if (!__radix_tree_lookup(root, index, &parent, &slot)) 1283 if (!__radix_tree_lookup(root, index, &parent, &slot))
1196 return -ENOENT; 1284 return -ENOENT;
@@ -1228,7 +1316,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
1228 1316
1229 for (;;) { 1317 for (;;) {
1230 if (node->shift > order) { 1318 if (node->shift > order) {
1231 child = radix_tree_node_alloc(root, node, 1319 child = radix_tree_node_alloc(gfp, node,
1232 node->shift - RADIX_TREE_MAP_SHIFT, 1320 node->shift - RADIX_TREE_MAP_SHIFT,
1233 offset, 0, 0); 1321 offset, 0, 0);
1234 if (!child) 1322 if (!child)
@@ -1444,8 +1532,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root,
1444 radix_tree_load_root(root, &node, &maxindex); 1532 radix_tree_load_root(root, &node, &maxindex);
1445 if (index > maxindex) 1533 if (index > maxindex)
1446 return 0; 1534 return 0;
1447 if (node == NULL)
1448 return 0;
1449 1535
1450 while (radix_tree_is_internal_node(node)) { 1536 while (radix_tree_is_internal_node(node)) {
1451 unsigned offset; 1537 unsigned offset;
@@ -1453,8 +1539,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root,
1453 parent = entry_to_node(node); 1539 parent = entry_to_node(node);
1454 offset = radix_tree_descend(parent, &node, index); 1540 offset = radix_tree_descend(parent, &node, index);
1455 1541
1456 if (!node)
1457 return 0;
1458 if (!tag_get(parent, tag, offset)) 1542 if (!tag_get(parent, tag, offset))
1459 return 0; 1543 return 0;
1460 if (node == RADIX_TREE_RETRY) 1544 if (node == RADIX_TREE_RETRY)
@@ -1481,6 +1565,11 @@ static void set_iter_tags(struct radix_tree_iter *iter,
1481 unsigned tag_long = offset / BITS_PER_LONG; 1565 unsigned tag_long = offset / BITS_PER_LONG;
1482 unsigned tag_bit = offset % BITS_PER_LONG; 1566 unsigned tag_bit = offset % BITS_PER_LONG;
1483 1567
1568 if (!node) {
1569 iter->tags = 1;
1570 return;
1571 }
1572
1484 iter->tags = node->tags[tag][tag_long] >> tag_bit; 1573 iter->tags = node->tags[tag][tag_long] >> tag_bit;
1485 1574
1486 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ 1575 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
@@ -1873,13 +1962,18 @@ void __radix_tree_delete_node(struct radix_tree_root *root,
1873static bool __radix_tree_delete(struct radix_tree_root *root, 1962static bool __radix_tree_delete(struct radix_tree_root *root,
1874 struct radix_tree_node *node, void **slot) 1963 struct radix_tree_node *node, void **slot)
1875{ 1964{
1965 void *old = rcu_dereference_raw(*slot);
1966 int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0;
1876 unsigned offset = get_slot_offset(node, slot); 1967 unsigned offset = get_slot_offset(node, slot);
1877 int tag; 1968 int tag;
1878 1969
1879 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1970 if (is_idr(root))
1880 node_tag_clear(root, node, tag, offset); 1971 node_tag_set(root, node, IDR_FREE, offset);
1972 else
1973 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1974 node_tag_clear(root, node, tag, offset);
1881 1975
1882 replace_slot(root, node, slot, NULL, true); 1976 replace_slot(slot, NULL, node, -1, exceptional);
1883 return node && delete_node(root, node, NULL, NULL); 1977 return node && delete_node(root, node, NULL, NULL);
1884} 1978}
1885 1979
@@ -1916,12 +2010,13 @@ void radix_tree_iter_delete(struct radix_tree_root *root,
1916void *radix_tree_delete_item(struct radix_tree_root *root, 2010void *radix_tree_delete_item(struct radix_tree_root *root,
1917 unsigned long index, void *item) 2011 unsigned long index, void *item)
1918{ 2012{
1919 struct radix_tree_node *node; 2013 struct radix_tree_node *node = NULL;
1920 void **slot; 2014 void **slot;
1921 void *entry; 2015 void *entry;
1922 2016
1923 entry = __radix_tree_lookup(root, index, &node, &slot); 2017 entry = __radix_tree_lookup(root, index, &node, &slot);
1924 if (!entry) 2018 if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
2019 get_slot_offset(node, slot))))
1925 return NULL; 2020 return NULL;
1926 2021
1927 if (item && entry != item) 2022 if (item && entry != item)
@@ -1957,8 +2052,7 @@ void radix_tree_clear_tags(struct radix_tree_root *root,
1957 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 2052 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1958 node_tag_clear(root, node, tag, offset); 2053 node_tag_clear(root, node, tag, offset);
1959 } else { 2054 } else {
1960 /* Clear root node tags */ 2055 root_tag_clear_all(root);
1961 root->gfp_mask &= __GFP_BITS_MASK;
1962 } 2056 }
1963} 2057}
1964 2058
@@ -1973,6 +2067,111 @@ int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
1973} 2067}
1974EXPORT_SYMBOL(radix_tree_tagged); 2068EXPORT_SYMBOL(radix_tree_tagged);
1975 2069
2070/**
2071 * idr_preload - preload for idr_alloc()
2072 * @gfp_mask: allocation mask to use for preloading
2073 *
2074 * Preallocate memory to use for the next call to idr_alloc(). This function
2075 * returns with preemption disabled. It will be enabled by idr_preload_end().
2076 */
2077void idr_preload(gfp_t gfp_mask)
2078{
2079 __radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE);
2080}
2081EXPORT_SYMBOL(idr_preload);
2082
2083void **idr_get_free(struct radix_tree_root *root,
2084 struct radix_tree_iter *iter, gfp_t gfp, int end)
2085{
2086 struct radix_tree_node *node = NULL, *child;
2087 void **slot = (void **)&root->rnode;
2088 unsigned long maxindex, start = iter->next_index;
2089 unsigned long max = end > 0 ? end - 1 : INT_MAX;
2090 unsigned int shift, offset = 0;
2091
2092 grow:
2093 shift = radix_tree_load_root(root, &child, &maxindex);
2094 if (!radix_tree_tagged(root, IDR_FREE))
2095 start = max(start, maxindex + 1);
2096 if (start > max)
2097 return ERR_PTR(-ENOSPC);
2098
2099 if (start > maxindex) {
2100 int error = radix_tree_extend(root, gfp, start, shift);
2101 if (error < 0)
2102 return ERR_PTR(error);
2103 shift = error;
2104 child = rcu_dereference_raw(root->rnode);
2105 }
2106
2107 while (shift) {
2108 shift -= RADIX_TREE_MAP_SHIFT;
2109 if (child == NULL) {
2110 /* Have to add a child node. */
2111 child = radix_tree_node_alloc(gfp, node, shift, offset,
2112 0, 0);
2113 if (!child)
2114 return ERR_PTR(-ENOMEM);
2115 all_tag_set(child, IDR_FREE);
2116 rcu_assign_pointer(*slot, node_to_entry(child));
2117 if (node)
2118 node->count++;
2119 } else if (!radix_tree_is_internal_node(child))
2120 break;
2121
2122 node = entry_to_node(child);
2123 offset = radix_tree_descend(node, &child, start);
2124 if (!tag_get(node, IDR_FREE, offset)) {
2125 offset = radix_tree_find_next_bit(node, IDR_FREE,
2126 offset + 1);
2127 start = next_index(start, node, offset);
2128 if (start > max)
2129 return ERR_PTR(-ENOSPC);
2130 while (offset == RADIX_TREE_MAP_SIZE) {
2131 offset = node->offset + 1;
2132 node = node->parent;
2133 if (!node)
2134 goto grow;
2135 shift = node->shift;
2136 }
2137 child = rcu_dereference_raw(node->slots[offset]);
2138 }
2139 slot = &node->slots[offset];
2140 }
2141
2142 iter->index = start;
2143 if (node)
2144 iter->next_index = 1 + min(max, (start | node_maxindex(node)));
2145 else
2146 iter->next_index = 1;
2147 iter->node = node;
2148 __set_iter_shift(iter, shift);
2149 set_iter_tags(iter, node, offset, IDR_FREE);
2150
2151 return slot;
2152}
2153
2154/**
2155 * idr_destroy - release all internal memory from an IDR
2156 * @idr: idr handle
2157 *
2158 * After this function is called, the IDR is empty, and may be reused or
2159 * the data structure containing it may be freed.
2160 *
2161 * A typical clean-up sequence for objects stored in an idr tree will use
2162 * idr_for_each() to free all objects, if necessary, then idr_destroy() to
2163 * free the memory used to keep track of those objects.
2164 */
2165void idr_destroy(struct idr *idr)
2166{
2167 struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode);
2168 if (radix_tree_is_internal_node(node))
2169 radix_tree_free_nodes(node);
2170 idr->idr_rt.rnode = NULL;
2171 root_tag_set(&idr->idr_rt, IDR_FREE);
2172}
2173EXPORT_SYMBOL(idr_destroy);
2174
1976static void 2175static void
1977radix_tree_node_ctor(void *arg) 2176radix_tree_node_ctor(void *arg)
1978{ 2177{
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index 11d888ca6a92..3b5534b643f0 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -1,2 +1,3 @@
1main 1main
2radix-tree.c 2radix-tree.c
3idr.c
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 5274e88cd293..3597a3a9f269 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -2,8 +2,8 @@
2CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE 2CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE
3LDFLAGS += -lpthread -lurcu 3LDFLAGS += -lpthread -lurcu
4TARGETS = main 4TARGETS = main
5OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_bit.o \ 5OFILES = main.o radix-tree.o idr.o linux.o test.o tag_check.o find_bit.o \
6 regression1.o regression2.o regression3.o multiorder.o \ 6 regression1.o regression2.o regression3.o multiorder.o idr-test.o \
7 iteration_check.o benchmark.o 7 iteration_check.o benchmark.o
8 8
9ifdef BENCHMARK 9ifdef BENCHMARK
@@ -23,7 +23,11 @@ vpath %.c ../../lib
23$(OFILES): *.h */*.h \ 23$(OFILES): *.h */*.h \
24 ../../include/linux/*.h \ 24 ../../include/linux/*.h \
25 ../../include/asm/*.h \ 25 ../../include/asm/*.h \
26 ../../../include/linux/radix-tree.h 26 ../../../include/linux/radix-tree.h \
27 ../../../include/linux/idr.h
27 28
28radix-tree.c: ../../../lib/radix-tree.c 29radix-tree.c: ../../../lib/radix-tree.c
29 sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ 30 sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
31
32idr.c: ../../../lib/idr.c
33 sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
new file mode 100644
index 000000000000..4dffad9284e0
--- /dev/null
+++ b/tools/testing/radix-tree/idr-test.c
@@ -0,0 +1,342 @@
1/*
2 * idr-test.c: Test the IDR API
3 * Copyright (c) 2016 Matthew Wilcox <willy@infradead.org>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14#include <linux/idr.h>
15#include <linux/slab.h>
16#include <linux/kernel.h>
17#include <linux/errno.h>
18
19#include "test.h"
20
21#define DUMMY_PTR ((void *)0x12)
22
23int item_idr_free(int id, void *p, void *data)
24{
25 struct item *item = p;
26 assert(item->index == id);
27 free(p);
28
29 return 0;
30}
31
32void item_idr_remove(struct idr *idr, int id)
33{
34 struct item *item = idr_find(idr, id);
35 assert(item->index == id);
36 idr_remove(idr, id);
37 free(item);
38}
39
40void idr_alloc_test(void)
41{
42 unsigned long i;
43 DEFINE_IDR(idr);
44
45 assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0, 0x4000, GFP_KERNEL) == 0);
46 assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0x3ffd, 0x4000, GFP_KERNEL) == 0x3ffd);
47 idr_remove(&idr, 0x3ffd);
48 idr_remove(&idr, 0);
49
50 for (i = 0x3ffe; i < 0x4003; i++) {
51 int id;
52 struct item *item;
53
54 if (i < 0x4000)
55 item = item_create(i, 0);
56 else
57 item = item_create(i - 0x3fff, 0);
58
59 id = idr_alloc_cyclic(&idr, item, 1, 0x4000, GFP_KERNEL);
60 assert(id == item->index);
61 }
62
63 idr_for_each(&idr, item_idr_free, &idr);
64 idr_destroy(&idr);
65}
66
67void idr_replace_test(void)
68{
69 DEFINE_IDR(idr);
70
71 idr_alloc(&idr, (void *)-1, 10, 11, GFP_KERNEL);
72 idr_replace(&idr, &idr, 10);
73
74 idr_destroy(&idr);
75}
76
77/*
78 * Unlike the radix tree, you can put a NULL pointer -- with care -- into
79 * the IDR. Some interfaces, like idr_find() do not distinguish between
80 * "present, value is NULL" and "not present", but that's exactly what some
81 * users want.
82 */
83void idr_null_test(void)
84{
85 int i;
86 DEFINE_IDR(idr);
87
88 assert(idr_is_empty(&idr));
89
90 assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
91 assert(!idr_is_empty(&idr));
92 idr_remove(&idr, 0);
93 assert(idr_is_empty(&idr));
94
95 assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
96 assert(!idr_is_empty(&idr));
97 idr_destroy(&idr);
98 assert(idr_is_empty(&idr));
99
100 for (i = 0; i < 10; i++) {
101 assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == i);
102 }
103
104 assert(idr_replace(&idr, DUMMY_PTR, 3) == NULL);
105 assert(idr_replace(&idr, DUMMY_PTR, 4) == NULL);
106 assert(idr_replace(&idr, NULL, 4) == DUMMY_PTR);
107 assert(idr_replace(&idr, DUMMY_PTR, 11) == ERR_PTR(-ENOENT));
108 idr_remove(&idr, 5);
109 assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 5);
110 idr_remove(&idr, 5);
111
112 for (i = 0; i < 9; i++) {
113 idr_remove(&idr, i);
114 assert(!idr_is_empty(&idr));
115 }
116 idr_remove(&idr, 8);
117 assert(!idr_is_empty(&idr));
118 idr_remove(&idr, 9);
119 assert(idr_is_empty(&idr));
120
121 assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
122 assert(idr_replace(&idr, DUMMY_PTR, 3) == ERR_PTR(-ENOENT));
123 assert(idr_replace(&idr, DUMMY_PTR, 0) == NULL);
124 assert(idr_replace(&idr, NULL, 0) == DUMMY_PTR);
125
126 idr_destroy(&idr);
127 assert(idr_is_empty(&idr));
128
129 for (i = 1; i < 10; i++) {
130 assert(idr_alloc(&idr, NULL, 1, 0, GFP_KERNEL) == i);
131 }
132
133 idr_destroy(&idr);
134 assert(idr_is_empty(&idr));
135}
136
137void idr_nowait_test(void)
138{
139 unsigned int i;
140 DEFINE_IDR(idr);
141
142 idr_preload(GFP_KERNEL);
143
144 for (i = 0; i < 3; i++) {
145 struct item *item = item_create(i, 0);
146 assert(idr_alloc(&idr, item, i, i + 1, GFP_NOWAIT) == i);
147 }
148
149 idr_preload_end();
150
151 idr_for_each(&idr, item_idr_free, &idr);
152 idr_destroy(&idr);
153}
154
155void idr_checks(void)
156{
157 unsigned long i;
158 DEFINE_IDR(idr);
159
160 for (i = 0; i < 10000; i++) {
161 struct item *item = item_create(i, 0);
162 assert(idr_alloc(&idr, item, 0, 20000, GFP_KERNEL) == i);
163 }
164
165 assert(idr_alloc(&idr, DUMMY_PTR, 5, 30, GFP_KERNEL) < 0);
166
167 for (i = 0; i < 5000; i++)
168 item_idr_remove(&idr, i);
169
170 idr_remove(&idr, 3);
171
172 idr_for_each(&idr, item_idr_free, &idr);
173 idr_destroy(&idr);
174
175 assert(idr_is_empty(&idr));
176
177 idr_remove(&idr, 3);
178 idr_remove(&idr, 0);
179
180 for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) {
181 struct item *item = item_create(i, 0);
182 assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i);
183 }
184 assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i, GFP_KERNEL) == -ENOSPC);
185
186 idr_for_each(&idr, item_idr_free, &idr);
187 idr_destroy(&idr);
188 idr_destroy(&idr);
189
190 assert(idr_is_empty(&idr));
191
192 for (i = 1; i < 10000; i++) {
193 struct item *item = item_create(i, 0);
194 assert(idr_alloc(&idr, item, 1, 20000, GFP_KERNEL) == i);
195 }
196
197 idr_for_each(&idr, item_idr_free, &idr);
198 idr_destroy(&idr);
199
200 idr_replace_test();
201 idr_alloc_test();
202 idr_null_test();
203 idr_nowait_test();
204}
205
206/*
207 * Check that we get the correct error when we run out of memory doing
208 * allocations. To ensure we run out of memory, just "forget" to preload.
209 * The first test is for not having a bitmap available, and the second test
210 * is for not being able to allocate a level of the radix tree.
211 */
212void ida_check_nomem(void)
213{
214 DEFINE_IDA(ida);
215 int id, err;
216
217 err = ida_get_new(&ida, &id);
218 assert(err == -EAGAIN);
219 err = ida_get_new_above(&ida, 1UL << 30, &id);
220 assert(err == -EAGAIN);
221}
222
223/*
224 * Check what happens when we fill a leaf and then delete it. This may
225 * discover mishandling of IDR_FREE.
226 */
227void ida_check_leaf(void)
228{
229 DEFINE_IDA(ida);
230 int id;
231 unsigned long i;
232
233 for (i = 0; i < IDA_BITMAP_BITS; i++) {
234 assert(ida_pre_get(&ida, GFP_KERNEL));
235 assert(!ida_get_new(&ida, &id));
236 assert(id == i);
237 }
238
239 ida_destroy(&ida);
240 assert(ida_is_empty(&ida));
241
242 assert(ida_pre_get(&ida, GFP_KERNEL));
243 assert(!ida_get_new(&ida, &id));
244 assert(id == 0);
245 ida_destroy(&ida);
246 assert(ida_is_empty(&ida));
247}
248
249/*
250 * Check allocations up to and slightly above the maximum allowed (2^31-1) ID.
251 * Allocating up to 2^31-1 should succeed, and then allocating the next one
252 * should fail.
253 */
254void ida_check_max(void)
255{
256 DEFINE_IDA(ida);
257 int id, err;
258 unsigned long i, j;
259
260 for (j = 1; j < 65537; j *= 2) {
261 unsigned long base = (1UL << 31) - j;
262 for (i = 0; i < j; i++) {
263 assert(ida_pre_get(&ida, GFP_KERNEL));
264 assert(!ida_get_new_above(&ida, base, &id));
265 assert(id == base + i);
266 }
267 assert(ida_pre_get(&ida, GFP_KERNEL));
268 err = ida_get_new_above(&ida, base, &id);
269 assert(err == -ENOSPC);
270 ida_destroy(&ida);
271 assert(ida_is_empty(&ida));
272 rcu_barrier();
273 }
274}
275
276void ida_checks(void)
277{
278 DEFINE_IDA(ida);
279 int id;
280 unsigned long i;
281
282 radix_tree_cpu_dead(1);
283 ida_check_nomem();
284
285 for (i = 0; i < 10000; i++) {
286 assert(ida_pre_get(&ida, GFP_KERNEL));
287 assert(!ida_get_new(&ida, &id));
288 assert(id == i);
289 }
290
291 ida_remove(&ida, 20);
292 ida_remove(&ida, 21);
293 for (i = 0; i < 3; i++) {
294 assert(ida_pre_get(&ida, GFP_KERNEL));
295 assert(!ida_get_new(&ida, &id));
296 if (i == 2)
297 assert(id == 10000);
298 }
299
300 for (i = 0; i < 5000; i++)
301 ida_remove(&ida, i);
302
303 assert(ida_pre_get(&ida, GFP_KERNEL));
304 assert(!ida_get_new_above(&ida, 5000, &id));
305 assert(id == 10001);
306
307 ida_destroy(&ida);
308
309 assert(ida_is_empty(&ida));
310
311 assert(ida_pre_get(&ida, GFP_KERNEL));
312 assert(!ida_get_new_above(&ida, 1, &id));
313 assert(id == 1);
314
315 ida_remove(&ida, id);
316 assert(ida_is_empty(&ida));
317 ida_destroy(&ida);
318 assert(ida_is_empty(&ida));
319
320 assert(ida_pre_get(&ida, GFP_KERNEL));
321 assert(!ida_get_new_above(&ida, 1, &id));
322 ida_destroy(&ida);
323 assert(ida_is_empty(&ida));
324
325 assert(ida_pre_get(&ida, GFP_KERNEL));
326 assert(!ida_get_new_above(&ida, 1, &id));
327 assert(id == 1);
328 assert(ida_pre_get(&ida, GFP_KERNEL));
329 assert(!ida_get_new_above(&ida, 1025, &id));
330 assert(id == 1025);
331 assert(ida_pre_get(&ida, GFP_KERNEL));
332 assert(!ida_get_new_above(&ida, 10000, &id));
333 assert(id == 10000);
334 ida_remove(&ida, 1025);
335 ida_destroy(&ida);
336 assert(ida_is_empty(&ida));
337
338 ida_check_leaf();
339 ida_check_max();
340
341 radix_tree_cpu_dead(1);
342}
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
index 701209816a6b..39a0dcb9475a 100644
--- a/tools/testing/radix-tree/linux/gfp.h
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -15,10 +15,12 @@
15#define __GFP_DIRECT_RECLAIM 0x400000u 15#define __GFP_DIRECT_RECLAIM 0x400000u
16#define __GFP_KSWAPD_RECLAIM 0x2000000u 16#define __GFP_KSWAPD_RECLAIM 0x2000000u
17 17
18#define __GFP_RECLAIM (__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM) 18#define __GFP_RECLAIM (__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
19
20#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
21#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
22#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM)
19 23
20#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
21#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
22 24
23static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) 25static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
24{ 26{
diff --git a/tools/testing/radix-tree/linux/idr.h b/tools/testing/radix-tree/linux/idr.h
new file mode 100644
index 000000000000..4e342f2e37cf
--- /dev/null
+++ b/tools/testing/radix-tree/linux/idr.h
@@ -0,0 +1 @@
#include "../../../../include/linux/idr.h"
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index dd1d9aefb14f..63fce553781a 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -20,6 +20,7 @@
20 20
21#define printk printf 21#define printk printf
22#define pr_debug printk 22#define pr_debug printk
23#define pr_cont printk
23 24
24#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) 25#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
25 26
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index f7e9801a6754..ddd90a11db3f 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -3,6 +3,7 @@
3#include <unistd.h> 3#include <unistd.h>
4#include <time.h> 4#include <time.h>
5#include <assert.h> 5#include <assert.h>
6#include <limits.h>
6 7
7#include <linux/slab.h> 8#include <linux/slab.h>
8#include <linux/radix-tree.h> 9#include <linux/radix-tree.h>
@@ -314,6 +315,11 @@ static void single_thread_tests(bool long_run)
314 rcu_barrier(); 315 rcu_barrier();
315 printf("after dynamic_height_check: %d allocated, preempt %d\n", 316 printf("after dynamic_height_check: %d allocated, preempt %d\n",
316 nr_allocated, preempt_count); 317 nr_allocated, preempt_count);
318 idr_checks();
319 ida_checks();
320 rcu_barrier();
321 printf("after idr_checks: %d allocated, preempt %d\n",
322 nr_allocated, preempt_count);
317 big_gang_check(long_run); 323 big_gang_check(long_run);
318 rcu_barrier(); 324 rcu_barrier();
319 printf("after big_gang_check: %d allocated, preempt %d\n", 325 printf("after big_gang_check: %d allocated, preempt %d\n",
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 056a23b56467..b30e11d9d271 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -34,6 +34,8 @@ void tag_check(void);
34void multiorder_checks(void); 34void multiorder_checks(void);
35void iteration_test(unsigned order, unsigned duration); 35void iteration_test(unsigned order, unsigned duration);
36void benchmark(void); 36void benchmark(void);
37void idr_checks(void);
38void ida_checks(void);
37 39
38struct item * 40struct item *
39item_tag_set(struct radix_tree_root *root, unsigned long index, int tag); 41item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);