diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/Makefile | 3 | ||||
| -rw-r--r-- | lib/idr.c | 1242 | ||||
| -rw-r--r-- | lib/radix-tree.c | 761 |
3 files changed, 819 insertions, 1187 deletions
diff --git a/lib/Makefile b/lib/Makefile index 469b2392893a..320ac46a8725 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
| @@ -25,6 +25,9 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ | |||
| 25 | earlycpio.o seq_buf.o siphash.o \ | 25 | earlycpio.o seq_buf.o siphash.o \ |
| 26 | nmi_backtrace.o nodemask.o win_minmax.o | 26 | nmi_backtrace.o nodemask.o win_minmax.o |
| 27 | 27 | ||
| 28 | CFLAGS_radix-tree.o += -DCONFIG_SPARSE_RCU_POINTER | ||
| 29 | CFLAGS_idr.o += -DCONFIG_SPARSE_RCU_POINTER | ||
| 30 | |||
| 28 | lib-$(CONFIG_MMU) += ioremap.o | 31 | lib-$(CONFIG_MMU) += ioremap.o |
| 29 | lib-$(CONFIG_SMP) += cpumask.o | 32 | lib-$(CONFIG_SMP) += cpumask.o |
| 30 | lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o | 33 | lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o |
| @@ -1,1068 +1,409 @@ | |||
| 1 | /* | 1 | #include <linux/bitmap.h> |
| 2 | * 2002-10-18 written by Jim Houston jim.houston@ccur.com | ||
| 3 | * Copyright (C) 2002 by Concurrent Computer Corporation | ||
| 4 | * Distributed under the GNU GPL license version 2. | ||
| 5 | * | ||
| 6 | * Modified by George Anzinger to reuse immediately and to use | ||
| 7 | * find bit instructions. Also removed _irq on spinlocks. | ||
| 8 | * | ||
| 9 | * Modified by Nadia Derbey to make it RCU safe. | ||
| 10 | * | ||
| 11 | * Small id to pointer translation service. | ||
| 12 | * | ||
| 13 | * It uses a radix tree like structure as a sparse array indexed | ||
| 14 | * by the id to obtain the pointer. The bitmap makes allocating | ||
| 15 | * a new id quick. | ||
| 16 | * | ||
| 17 | * You call it to allocate an id (an int) an associate with that id a | ||
| 18 | * pointer or what ever, we treat it as a (void *). You can pass this | ||
| 19 | * id to a user for him to pass back at a later time. You then pass | ||
| 20 | * that id to this code and it returns your pointer. | ||
| 21 | */ | ||
| 22 | |||
| 23 | #ifndef TEST // to test in user space... | ||
| 24 | #include <linux/slab.h> | ||
| 25 | #include <linux/init.h> | ||
| 26 | #include <linux/export.h> | 2 | #include <linux/export.h> |
| 27 | #endif | ||
| 28 | #include <linux/err.h> | ||
| 29 | #include <linux/string.h> | ||
| 30 | #include <linux/idr.h> | 3 | #include <linux/idr.h> |
| 4 | #include <linux/slab.h> | ||
| 31 | #include <linux/spinlock.h> | 5 | #include <linux/spinlock.h> |
| 32 | #include <linux/percpu.h> | ||
| 33 | |||
| 34 | #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1) | ||
| 35 | #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) | ||
| 36 | |||
| 37 | /* Leave the possibility of an incomplete final layer */ | ||
| 38 | #define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS) | ||
| 39 | 6 | ||
| 40 | /* Number of id_layer structs to leave in free list */ | 7 | DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap); |
| 41 | #define MAX_IDR_FREE (MAX_IDR_LEVEL * 2) | ||
| 42 | |||
| 43 | static struct kmem_cache *idr_layer_cache; | ||
| 44 | static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head); | ||
| 45 | static DEFINE_PER_CPU(int, idr_preload_cnt); | ||
| 46 | static DEFINE_SPINLOCK(simple_ida_lock); | 8 | static DEFINE_SPINLOCK(simple_ida_lock); |
| 47 | 9 | ||
| 48 | /* the maximum ID which can be allocated given idr->layers */ | ||
| 49 | static int idr_max(int layers) | ||
| 50 | { | ||
| 51 | int bits = min_t(int, layers * IDR_BITS, MAX_IDR_SHIFT); | ||
| 52 | |||
| 53 | return (1 << bits) - 1; | ||
| 54 | } | ||
| 55 | |||
| 56 | /* | ||
| 57 | * Prefix mask for an idr_layer at @layer. For layer 0, the prefix mask is | ||
| 58 | * all bits except for the lower IDR_BITS. For layer 1, 2 * IDR_BITS, and | ||
| 59 | * so on. | ||
| 60 | */ | ||
| 61 | static int idr_layer_prefix_mask(int layer) | ||
| 62 | { | ||
| 63 | return ~idr_max(layer + 1); | ||
| 64 | } | ||
| 65 | |||
| 66 | static struct idr_layer *get_from_free_list(struct idr *idp) | ||
| 67 | { | ||
| 68 | struct idr_layer *p; | ||
| 69 | unsigned long flags; | ||
| 70 | |||
| 71 | spin_lock_irqsave(&idp->lock, flags); | ||
| 72 | if ((p = idp->id_free)) { | ||
| 73 | idp->id_free = p->ary[0]; | ||
| 74 | idp->id_free_cnt--; | ||
| 75 | p->ary[0] = NULL; | ||
| 76 | } | ||
| 77 | spin_unlock_irqrestore(&idp->lock, flags); | ||
| 78 | return(p); | ||
| 79 | } | ||
| 80 | |||
| 81 | /** | 10 | /** |
| 82 | * idr_layer_alloc - allocate a new idr_layer | 11 | * idr_alloc - allocate an id |
| 83 | * @gfp_mask: allocation mask | 12 | * @idr: idr handle |
| 84 | * @layer_idr: optional idr to allocate from | ||
| 85 | * | ||
| 86 | * If @layer_idr is %NULL, directly allocate one using @gfp_mask or fetch | ||
| 87 | * one from the per-cpu preload buffer. If @layer_idr is not %NULL, fetch | ||
| 88 | * an idr_layer from @idr->id_free. | ||
| 89 | * | ||
| 90 | * @layer_idr is to maintain backward compatibility with the old alloc | ||
| 91 | * interface - idr_pre_get() and idr_get_new*() - and will be removed | ||
| 92 | * together with per-pool preload buffer. | ||
| 93 | */ | ||
| 94 | static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr) | ||
| 95 | { | ||
| 96 | struct idr_layer *new; | ||
| 97 | |||
| 98 | /* this is the old path, bypass to get_from_free_list() */ | ||
| 99 | if (layer_idr) | ||
| 100 | return get_from_free_list(layer_idr); | ||
| 101 | |||
| 102 | /* | ||
| 103 | * Try to allocate directly from kmem_cache. We want to try this | ||
| 104 | * before preload buffer; otherwise, non-preloading idr_alloc() | ||
| 105 | * users will end up taking advantage of preloading ones. As the | ||
| 106 | * following is allowed to fail for preloaded cases, suppress | ||
| 107 | * warning this time. | ||
| 108 | */ | ||
| 109 | new = kmem_cache_zalloc(idr_layer_cache, gfp_mask | __GFP_NOWARN); | ||
| 110 | if (new) | ||
| 111 | return new; | ||
| 112 | |||
| 113 | /* | ||
| 114 | * Try to fetch one from the per-cpu preload buffer if in process | ||
| 115 | * context. See idr_preload() for details. | ||
| 116 | */ | ||
| 117 | if (!in_interrupt()) { | ||
| 118 | preempt_disable(); | ||
| 119 | new = __this_cpu_read(idr_preload_head); | ||
| 120 | if (new) { | ||
| 121 | __this_cpu_write(idr_preload_head, new->ary[0]); | ||
| 122 | __this_cpu_dec(idr_preload_cnt); | ||
| 123 | new->ary[0] = NULL; | ||
| 124 | } | ||
| 125 | preempt_enable(); | ||
| 126 | if (new) | ||
| 127 | return new; | ||
| 128 | } | ||
| 129 | |||
| 130 | /* | ||
| 131 | * Both failed. Try kmem_cache again w/o adding __GFP_NOWARN so | ||
| 132 | * that memory allocation failure warning is printed as intended. | ||
| 133 | */ | ||
| 134 | return kmem_cache_zalloc(idr_layer_cache, gfp_mask); | ||
| 135 | } | ||
| 136 | |||
| 137 | static void idr_layer_rcu_free(struct rcu_head *head) | ||
| 138 | { | ||
| 139 | struct idr_layer *layer; | ||
| 140 | |||
| 141 | layer = container_of(head, struct idr_layer, rcu_head); | ||
| 142 | kmem_cache_free(idr_layer_cache, layer); | ||
| 143 | } | ||
| 144 | |||
| 145 | static inline void free_layer(struct idr *idr, struct idr_layer *p) | ||
| 146 | { | ||
| 147 | if (idr->hint == p) | ||
| 148 | RCU_INIT_POINTER(idr->hint, NULL); | ||
| 149 | call_rcu(&p->rcu_head, idr_layer_rcu_free); | ||
| 150 | } | ||
| 151 | |||
| 152 | /* only called when idp->lock is held */ | ||
| 153 | static void __move_to_free_list(struct idr *idp, struct idr_layer *p) | ||
| 154 | { | ||
| 155 | p->ary[0] = idp->id_free; | ||
| 156 | idp->id_free = p; | ||
| 157 | idp->id_free_cnt++; | ||
| 158 | } | ||
| 159 | |||
| 160 | static void move_to_free_list(struct idr *idp, struct idr_layer *p) | ||
| 161 | { | ||
| 162 | unsigned long flags; | ||
| 163 | |||
| 164 | /* | ||
| 165 | * Depends on the return element being zeroed. | ||
| 166 | */ | ||
| 167 | spin_lock_irqsave(&idp->lock, flags); | ||
| 168 | __move_to_free_list(idp, p); | ||
| 169 | spin_unlock_irqrestore(&idp->lock, flags); | ||
| 170 | } | ||
| 171 | |||
| 172 | static void idr_mark_full(struct idr_layer **pa, int id) | ||
| 173 | { | ||
| 174 | struct idr_layer *p = pa[0]; | ||
| 175 | int l = 0; | ||
| 176 | |||
| 177 | __set_bit(id & IDR_MASK, p->bitmap); | ||
| 178 | /* | ||
| 179 | * If this layer is full mark the bit in the layer above to | ||
| 180 | * show that this part of the radix tree is full. This may | ||
| 181 | * complete the layer above and require walking up the radix | ||
| 182 | * tree. | ||
| 183 | */ | ||
| 184 | while (bitmap_full(p->bitmap, IDR_SIZE)) { | ||
| 185 | if (!(p = pa[++l])) | ||
| 186 | break; | ||
| 187 | id = id >> IDR_BITS; | ||
| 188 | __set_bit((id & IDR_MASK), p->bitmap); | ||
| 189 | } | ||
| 190 | } | ||
| 191 | |||
| 192 | static int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) | ||
| 193 | { | ||
| 194 | while (idp->id_free_cnt < MAX_IDR_FREE) { | ||
| 195 | struct idr_layer *new; | ||
| 196 | new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); | ||
| 197 | if (new == NULL) | ||
| 198 | return (0); | ||
| 199 | move_to_free_list(idp, new); | ||
| 200 | } | ||
| 201 | return 1; | ||
| 202 | } | ||
| 203 | |||
| 204 | /** | ||
| 205 | * sub_alloc - try to allocate an id without growing the tree depth | ||
| 206 | * @idp: idr handle | ||
| 207 | * @starting_id: id to start search at | ||
| 208 | * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer | ||
| 209 | * @gfp_mask: allocation mask for idr_layer_alloc() | ||
| 210 | * @layer_idr: optional idr passed to idr_layer_alloc() | ||
| 211 | * | ||
| 212 | * Allocate an id in range [@starting_id, INT_MAX] from @idp without | ||
| 213 | * growing its depth. Returns | ||
| 214 | * | ||
| 215 | * the allocated id >= 0 if successful, | ||
| 216 | * -EAGAIN if the tree needs to grow for allocation to succeed, | ||
| 217 | * -ENOSPC if the id space is exhausted, | ||
| 218 | * -ENOMEM if more idr_layers need to be allocated. | ||
| 219 | */ | ||
| 220 | static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa, | ||
| 221 | gfp_t gfp_mask, struct idr *layer_idr) | ||
| 222 | { | ||
| 223 | int n, m, sh; | ||
| 224 | struct idr_layer *p, *new; | ||
| 225 | int l, id, oid; | ||
| 226 | |||
| 227 | id = *starting_id; | ||
| 228 | restart: | ||
| 229 | p = idp->top; | ||
| 230 | l = idp->layers; | ||
| 231 | pa[l--] = NULL; | ||
| 232 | while (1) { | ||
| 233 | /* | ||
| 234 | * We run around this while until we reach the leaf node... | ||
| 235 | */ | ||
| 236 | n = (id >> (IDR_BITS*l)) & IDR_MASK; | ||
| 237 | m = find_next_zero_bit(p->bitmap, IDR_SIZE, n); | ||
| 238 | if (m == IDR_SIZE) { | ||
| 239 | /* no space available go back to previous layer. */ | ||
| 240 | l++; | ||
| 241 | oid = id; | ||
| 242 | id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1; | ||
| 243 | |||
| 244 | /* if already at the top layer, we need to grow */ | ||
| 245 | if (id > idr_max(idp->layers)) { | ||
| 246 | *starting_id = id; | ||
| 247 | return -EAGAIN; | ||
| 248 | } | ||
| 249 | p = pa[l]; | ||
| 250 | BUG_ON(!p); | ||
| 251 | |||
| 252 | /* If we need to go up one layer, continue the | ||
| 253 | * loop; otherwise, restart from the top. | ||
| 254 | */ | ||
| 255 | sh = IDR_BITS * (l + 1); | ||
| 256 | if (oid >> sh == id >> sh) | ||
| 257 | continue; | ||
| 258 | else | ||
| 259 | goto restart; | ||
| 260 | } | ||
| 261 | if (m != n) { | ||
| 262 | sh = IDR_BITS*l; | ||
| 263 | id = ((id >> sh) ^ n ^ m) << sh; | ||
| 264 | } | ||
| 265 | if ((id >= MAX_IDR_BIT) || (id < 0)) | ||
| 266 | return -ENOSPC; | ||
| 267 | if (l == 0) | ||
| 268 | break; | ||
| 269 | /* | ||
| 270 | * Create the layer below if it is missing. | ||
| 271 | */ | ||
| 272 | if (!p->ary[m]) { | ||
| 273 | new = idr_layer_alloc(gfp_mask, layer_idr); | ||
| 274 | if (!new) | ||
| 275 | return -ENOMEM; | ||
| 276 | new->layer = l-1; | ||
| 277 | new->prefix = id & idr_layer_prefix_mask(new->layer); | ||
| 278 | rcu_assign_pointer(p->ary[m], new); | ||
| 279 | p->count++; | ||
| 280 | } | ||
| 281 | pa[l--] = p; | ||
| 282 | p = p->ary[m]; | ||
| 283 | } | ||
| 284 | |||
| 285 | pa[l] = p; | ||
| 286 | return id; | ||
| 287 | } | ||
| 288 | |||
| 289 | static int idr_get_empty_slot(struct idr *idp, int starting_id, | ||
| 290 | struct idr_layer **pa, gfp_t gfp_mask, | ||
| 291 | struct idr *layer_idr) | ||
| 292 | { | ||
| 293 | struct idr_layer *p, *new; | ||
| 294 | int layers, v, id; | ||
| 295 | unsigned long flags; | ||
| 296 | |||
| 297 | id = starting_id; | ||
| 298 | build_up: | ||
| 299 | p = idp->top; | ||
| 300 | layers = idp->layers; | ||
| 301 | if (unlikely(!p)) { | ||
| 302 | if (!(p = idr_layer_alloc(gfp_mask, layer_idr))) | ||
| 303 | return -ENOMEM; | ||
| 304 | p->layer = 0; | ||
| 305 | layers = 1; | ||
| 306 | } | ||
| 307 | /* | ||
| 308 | * Add a new layer to the top of the tree if the requested | ||
| 309 | * id is larger than the currently allocated space. | ||
| 310 | */ | ||
| 311 | while (id > idr_max(layers)) { | ||
| 312 | layers++; | ||
| 313 | if (!p->count) { | ||
| 314 | /* special case: if the tree is currently empty, | ||
| 315 | * then we grow the tree by moving the top node | ||
| 316 | * upwards. | ||
| 317 | */ | ||
| 318 | p->layer++; | ||
| 319 | WARN_ON_ONCE(p->prefix); | ||
| 320 | continue; | ||
| 321 | } | ||
| 322 | if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) { | ||
| 323 | /* | ||
| 324 | * The allocation failed. If we built part of | ||
| 325 | * the structure tear it down. | ||
| 326 | */ | ||
| 327 | spin_lock_irqsave(&idp->lock, flags); | ||
| 328 | for (new = p; p && p != idp->top; new = p) { | ||
| 329 | p = p->ary[0]; | ||
| 330 | new->ary[0] = NULL; | ||
| 331 | new->count = 0; | ||
| 332 | bitmap_clear(new->bitmap, 0, IDR_SIZE); | ||
| 333 | __move_to_free_list(idp, new); | ||
| 334 | } | ||
| 335 | spin_unlock_irqrestore(&idp->lock, flags); | ||
| 336 | return -ENOMEM; | ||
| 337 | } | ||
| 338 | new->ary[0] = p; | ||
| 339 | new->count = 1; | ||
| 340 | new->layer = layers-1; | ||
| 341 | new->prefix = id & idr_layer_prefix_mask(new->layer); | ||
| 342 | if (bitmap_full(p->bitmap, IDR_SIZE)) | ||
| 343 | __set_bit(0, new->bitmap); | ||
| 344 | p = new; | ||
| 345 | } | ||
| 346 | rcu_assign_pointer(idp->top, p); | ||
| 347 | idp->layers = layers; | ||
| 348 | v = sub_alloc(idp, &id, pa, gfp_mask, layer_idr); | ||
| 349 | if (v == -EAGAIN) | ||
| 350 | goto build_up; | ||
| 351 | return(v); | ||
| 352 | } | ||
| 353 | |||
| 354 | /* | ||
| 355 | * @id and @pa are from a successful allocation from idr_get_empty_slot(). | ||
| 356 | * Install the user pointer @ptr and mark the slot full. | ||
| 357 | */ | ||
| 358 | static void idr_fill_slot(struct idr *idr, void *ptr, int id, | ||
| 359 | struct idr_layer **pa) | ||
| 360 | { | ||
| 361 | /* update hint used for lookup, cleared from free_layer() */ | ||
| 362 | rcu_assign_pointer(idr->hint, pa[0]); | ||
| 363 | |||
| 364 | rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], (struct idr_layer *)ptr); | ||
| 365 | pa[0]->count++; | ||
| 366 | idr_mark_full(pa, id); | ||
| 367 | } | ||
| 368 | |||
| 369 | |||
| 370 | /** | ||
| 371 | * idr_preload - preload for idr_alloc() | ||
| 372 | * @gfp_mask: allocation mask to use for preloading | ||
| 373 | * | ||
| 374 | * Preload per-cpu layer buffer for idr_alloc(). Can only be used from | ||
| 375 | * process context and each idr_preload() invocation should be matched with | ||
| 376 | * idr_preload_end(). Note that preemption is disabled while preloaded. | ||
| 377 | * | ||
| 378 | * The first idr_alloc() in the preloaded section can be treated as if it | ||
| 379 | * were invoked with @gfp_mask used for preloading. This allows using more | ||
| 380 | * permissive allocation masks for idrs protected by spinlocks. | ||
| 381 | * | ||
| 382 | * For example, if idr_alloc() below fails, the failure can be treated as | ||
| 383 | * if idr_alloc() were called with GFP_KERNEL rather than GFP_NOWAIT. | ||
| 384 | * | ||
| 385 | * idr_preload(GFP_KERNEL); | ||
| 386 | * spin_lock(lock); | ||
| 387 | * | ||
| 388 | * id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT); | ||
| 389 | * | ||
| 390 | * spin_unlock(lock); | ||
| 391 | * idr_preload_end(); | ||
| 392 | * if (id < 0) | ||
| 393 | * error; | ||
| 394 | */ | ||
| 395 | void idr_preload(gfp_t gfp_mask) | ||
| 396 | { | ||
| 397 | /* | ||
| 398 | * Consuming preload buffer from non-process context breaks preload | ||
| 399 | * allocation guarantee. Disallow usage from those contexts. | ||
| 400 | */ | ||
| 401 | WARN_ON_ONCE(in_interrupt()); | ||
| 402 | might_sleep_if(gfpflags_allow_blocking(gfp_mask)); | ||
| 403 | |||
| 404 | preempt_disable(); | ||
| 405 | |||
| 406 | /* | ||
| 407 | * idr_alloc() is likely to succeed w/o full idr_layer buffer and | ||
| 408 | * return value from idr_alloc() needs to be checked for failure | ||
| 409 | * anyway. Silently give up if allocation fails. The caller can | ||
| 410 | * treat failures from idr_alloc() as if idr_alloc() were called | ||
| 411 | * with @gfp_mask which should be enough. | ||
| 412 | */ | ||
| 413 | while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) { | ||
| 414 | struct idr_layer *new; | ||
| 415 | |||
| 416 | preempt_enable(); | ||
| 417 | new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); | ||
| 418 | preempt_disable(); | ||
| 419 | if (!new) | ||
| 420 | break; | ||
| 421 | |||
| 422 | /* link the new one to per-cpu preload list */ | ||
| 423 | new->ary[0] = __this_cpu_read(idr_preload_head); | ||
| 424 | __this_cpu_write(idr_preload_head, new); | ||
| 425 | __this_cpu_inc(idr_preload_cnt); | ||
| 426 | } | ||
| 427 | } | ||
| 428 | EXPORT_SYMBOL(idr_preload); | ||
| 429 | |||
| 430 | /** | ||
| 431 | * idr_alloc - allocate new idr entry | ||
| 432 | * @idr: the (initialized) idr | ||
| 433 | * @ptr: pointer to be associated with the new id | 13 | * @ptr: pointer to be associated with the new id |
| 434 | * @start: the minimum id (inclusive) | 14 | * @start: the minimum id (inclusive) |
| 435 | * @end: the maximum id (exclusive, <= 0 for max) | 15 | * @end: the maximum id (exclusive) |
| 436 | * @gfp_mask: memory allocation flags | 16 | * @gfp: memory allocation flags |
| 437 | * | 17 | * |
| 438 | * Allocate an id in [start, end) and associate it with @ptr. If no ID is | 18 | * Allocates an unused ID in the range [start, end). Returns -ENOSPC |
| 439 | * available in the specified range, returns -ENOSPC. On memory allocation | 19 | * if there are no unused IDs in that range. |
| 440 | * failure, returns -ENOMEM. | ||
| 441 | * | 20 | * |
| 442 | * Note that @end is treated as max when <= 0. This is to always allow | 21 | * Note that @end is treated as max when <= 0. This is to always allow |
| 443 | * using @start + N as @end as long as N is inside integer range. | 22 | * using @start + N as @end as long as N is inside integer range. |
| 444 | * | 23 | * |
| 445 | * The user is responsible for exclusively synchronizing all operations | 24 | * Simultaneous modifications to the @idr are not allowed and should be |
| 446 | * which may modify @idr. However, read-only accesses such as idr_find() | 25 | * prevented by the user, usually with a lock. idr_alloc() may be called |
| 447 | * or iteration can be performed under RCU read lock provided the user | 26 | * concurrently with read-only accesses to the @idr, such as idr_find() and |
| 448 | * destroys @ptr in RCU-safe way after removal from idr. | 27 | * idr_for_each_entry(). |
| 449 | */ | 28 | */ |
| 450 | int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask) | 29 | int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp) |
| 451 | { | 30 | { |
| 452 | int max = end > 0 ? end - 1 : INT_MAX; /* inclusive upper limit */ | 31 | void __rcu **slot; |
| 453 | struct idr_layer *pa[MAX_IDR_LEVEL + 1]; | 32 | struct radix_tree_iter iter; |
| 454 | int id; | ||
| 455 | 33 | ||
| 456 | might_sleep_if(gfpflags_allow_blocking(gfp_mask)); | ||
| 457 | |||
| 458 | /* sanity checks */ | ||
| 459 | if (WARN_ON_ONCE(start < 0)) | 34 | if (WARN_ON_ONCE(start < 0)) |
| 460 | return -EINVAL; | 35 | return -EINVAL; |
| 461 | if (unlikely(max < start)) | 36 | if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) |
| 462 | return -ENOSPC; | 37 | return -EINVAL; |
| 463 | 38 | ||
| 464 | /* allocate id */ | 39 | radix_tree_iter_init(&iter, start); |
| 465 | id = idr_get_empty_slot(idr, start, pa, gfp_mask, NULL); | 40 | slot = idr_get_free(&idr->idr_rt, &iter, gfp, end); |
| 466 | if (unlikely(id < 0)) | 41 | if (IS_ERR(slot)) |
| 467 | return id; | 42 | return PTR_ERR(slot); |
| 468 | if (unlikely(id > max)) | ||
| 469 | return -ENOSPC; | ||
| 470 | 43 | ||
| 471 | idr_fill_slot(idr, ptr, id, pa); | 44 | radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr); |
| 472 | return id; | 45 | radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE); |
| 46 | return iter.index; | ||
| 473 | } | 47 | } |
| 474 | EXPORT_SYMBOL_GPL(idr_alloc); | 48 | EXPORT_SYMBOL_GPL(idr_alloc); |
| 475 | 49 | ||
| 476 | /** | 50 | /** |
| 477 | * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion | 51 | * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion |
| 478 | * @idr: the (initialized) idr | 52 | * @idr: idr handle |
| 479 | * @ptr: pointer to be associated with the new id | 53 | * @ptr: pointer to be associated with the new id |
| 480 | * @start: the minimum id (inclusive) | 54 | * @start: the minimum id (inclusive) |
| 481 | * @end: the maximum id (exclusive, <= 0 for max) | 55 | * @end: the maximum id (exclusive) |
| 482 | * @gfp_mask: memory allocation flags | 56 | * @gfp: memory allocation flags |
| 483 | * | ||
| 484 | * Essentially the same as idr_alloc, but prefers to allocate progressively | ||
| 485 | * higher ids if it can. If the "cur" counter wraps, then it will start again | ||
| 486 | * at the "start" end of the range and allocate one that has already been used. | ||
| 487 | */ | ||
| 488 | int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, | ||
| 489 | gfp_t gfp_mask) | ||
| 490 | { | ||
| 491 | int id; | ||
| 492 | |||
| 493 | id = idr_alloc(idr, ptr, max(start, idr->cur), end, gfp_mask); | ||
| 494 | if (id == -ENOSPC) | ||
| 495 | id = idr_alloc(idr, ptr, start, end, gfp_mask); | ||
| 496 | |||
| 497 | if (likely(id >= 0)) | ||
| 498 | idr->cur = id + 1; | ||
| 499 | return id; | ||
| 500 | } | ||
| 501 | EXPORT_SYMBOL(idr_alloc_cyclic); | ||
| 502 | |||
| 503 | static void idr_remove_warning(int id) | ||
| 504 | { | ||
| 505 | WARN(1, "idr_remove called for id=%d which is not allocated.\n", id); | ||
| 506 | } | ||
| 507 | |||
| 508 | static void sub_remove(struct idr *idp, int shift, int id) | ||
| 509 | { | ||
| 510 | struct idr_layer *p = idp->top; | ||
| 511 | struct idr_layer **pa[MAX_IDR_LEVEL + 1]; | ||
| 512 | struct idr_layer ***paa = &pa[0]; | ||
| 513 | struct idr_layer *to_free; | ||
| 514 | int n; | ||
| 515 | |||
| 516 | *paa = NULL; | ||
| 517 | *++paa = &idp->top; | ||
| 518 | |||
| 519 | while ((shift > 0) && p) { | ||
| 520 | n = (id >> shift) & IDR_MASK; | ||
| 521 | __clear_bit(n, p->bitmap); | ||
| 522 | *++paa = &p->ary[n]; | ||
| 523 | p = p->ary[n]; | ||
| 524 | shift -= IDR_BITS; | ||
| 525 | } | ||
| 526 | n = id & IDR_MASK; | ||
| 527 | if (likely(p != NULL && test_bit(n, p->bitmap))) { | ||
| 528 | __clear_bit(n, p->bitmap); | ||
| 529 | RCU_INIT_POINTER(p->ary[n], NULL); | ||
| 530 | to_free = NULL; | ||
| 531 | while(*paa && ! --((**paa)->count)){ | ||
| 532 | if (to_free) | ||
| 533 | free_layer(idp, to_free); | ||
| 534 | to_free = **paa; | ||
| 535 | **paa-- = NULL; | ||
| 536 | } | ||
| 537 | if (!*paa) | ||
| 538 | idp->layers = 0; | ||
| 539 | if (to_free) | ||
| 540 | free_layer(idp, to_free); | ||
| 541 | } else | ||
| 542 | idr_remove_warning(id); | ||
| 543 | } | ||
| 544 | |||
| 545 | /** | ||
| 546 | * idr_remove - remove the given id and free its slot | ||
| 547 | * @idp: idr handle | ||
| 548 | * @id: unique key | ||
| 549 | */ | ||
| 550 | void idr_remove(struct idr *idp, int id) | ||
| 551 | { | ||
| 552 | struct idr_layer *p; | ||
| 553 | struct idr_layer *to_free; | ||
| 554 | |||
| 555 | if (id < 0) | ||
| 556 | return; | ||
| 557 | |||
| 558 | if (id > idr_max(idp->layers)) { | ||
| 559 | idr_remove_warning(id); | ||
| 560 | return; | ||
| 561 | } | ||
| 562 | |||
| 563 | sub_remove(idp, (idp->layers - 1) * IDR_BITS, id); | ||
| 564 | if (idp->top && idp->top->count == 1 && (idp->layers > 1) && | ||
| 565 | idp->top->ary[0]) { | ||
| 566 | /* | ||
| 567 | * Single child at leftmost slot: we can shrink the tree. | ||
| 568 | * This level is not needed anymore since when layers are | ||
| 569 | * inserted, they are inserted at the top of the existing | ||
| 570 | * tree. | ||
| 571 | */ | ||
| 572 | to_free = idp->top; | ||
| 573 | p = idp->top->ary[0]; | ||
| 574 | rcu_assign_pointer(idp->top, p); | ||
| 575 | --idp->layers; | ||
| 576 | to_free->count = 0; | ||
| 577 | bitmap_clear(to_free->bitmap, 0, IDR_SIZE); | ||
| 578 | free_layer(idp, to_free); | ||
| 579 | } | ||
| 580 | } | ||
| 581 | EXPORT_SYMBOL(idr_remove); | ||
| 582 | |||
| 583 | static void __idr_remove_all(struct idr *idp) | ||
| 584 | { | ||
| 585 | int n, id, max; | ||
| 586 | int bt_mask; | ||
| 587 | struct idr_layer *p; | ||
| 588 | struct idr_layer *pa[MAX_IDR_LEVEL + 1]; | ||
| 589 | struct idr_layer **paa = &pa[0]; | ||
| 590 | |||
| 591 | n = idp->layers * IDR_BITS; | ||
| 592 | *paa = idp->top; | ||
| 593 | RCU_INIT_POINTER(idp->top, NULL); | ||
| 594 | max = idr_max(idp->layers); | ||
| 595 | |||
| 596 | id = 0; | ||
| 597 | while (id >= 0 && id <= max) { | ||
| 598 | p = *paa; | ||
| 599 | while (n > IDR_BITS && p) { | ||
| 600 | n -= IDR_BITS; | ||
| 601 | p = p->ary[(id >> n) & IDR_MASK]; | ||
| 602 | *++paa = p; | ||
| 603 | } | ||
| 604 | |||
| 605 | bt_mask = id; | ||
| 606 | id += 1 << n; | ||
| 607 | /* Get the highest bit that the above add changed from 0->1. */ | ||
| 608 | while (n < fls(id ^ bt_mask)) { | ||
| 609 | if (*paa) | ||
| 610 | free_layer(idp, *paa); | ||
| 611 | n += IDR_BITS; | ||
| 612 | --paa; | ||
| 613 | } | ||
| 614 | } | ||
| 615 | idp->layers = 0; | ||
| 616 | } | ||
| 617 | |||
| 618 | /** | ||
| 619 | * idr_destroy - release all cached layers within an idr tree | ||
| 620 | * @idp: idr handle | ||
| 621 | * | ||
| 622 | * Free all id mappings and all idp_layers. After this function, @idp is | ||
| 623 | * completely unused and can be freed / recycled. The caller is | ||
| 624 | * responsible for ensuring that no one else accesses @idp during or after | ||
| 625 | * idr_destroy(). | ||
| 626 | * | 57 | * |
| 627 | * A typical clean-up sequence for objects stored in an idr tree will use | 58 | * Allocates an ID larger than the last ID allocated if one is available. |
| 628 | * idr_for_each() to free all objects, if necessary, then idr_destroy() to | 59 | * If not, it will attempt to allocate the smallest ID that is larger or |
| 629 | * free up the id mappings and cached idr_layers. | 60 | * equal to @start. |
| 630 | */ | 61 | */ |
| 631 | void idr_destroy(struct idr *idp) | 62 | int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp) |
| 632 | { | 63 | { |
| 633 | __idr_remove_all(idp); | 64 | int id, curr = idr->idr_next; |
| 634 | 65 | ||
| 635 | while (idp->id_free_cnt) { | 66 | if (curr < start) |
| 636 | struct idr_layer *p = get_from_free_list(idp); | 67 | curr = start; |
| 637 | kmem_cache_free(idr_layer_cache, p); | ||
| 638 | } | ||
| 639 | } | ||
| 640 | EXPORT_SYMBOL(idr_destroy); | ||
| 641 | 68 | ||
| 642 | void *idr_find_slowpath(struct idr *idp, int id) | 69 | id = idr_alloc(idr, ptr, curr, end, gfp); |
| 643 | { | 70 | if ((id == -ENOSPC) && (curr > start)) |
| 644 | int n; | 71 | id = idr_alloc(idr, ptr, start, curr, gfp); |
| 645 | struct idr_layer *p; | ||
| 646 | |||
| 647 | if (id < 0) | ||
| 648 | return NULL; | ||
| 649 | |||
| 650 | p = rcu_dereference_raw(idp->top); | ||
| 651 | if (!p) | ||
| 652 | return NULL; | ||
| 653 | n = (p->layer+1) * IDR_BITS; | ||
| 654 | 72 | ||
| 655 | if (id > idr_max(p->layer + 1)) | 73 | if (id >= 0) |
| 656 | return NULL; | 74 | idr->idr_next = id + 1U; |
| 657 | BUG_ON(n == 0); | ||
| 658 | 75 | ||
| 659 | while (n > 0 && p) { | 76 | return id; |
| 660 | n -= IDR_BITS; | ||
| 661 | BUG_ON(n != p->layer*IDR_BITS); | ||
| 662 | p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); | ||
| 663 | } | ||
| 664 | return((void *)p); | ||
| 665 | } | 77 | } |
| 666 | EXPORT_SYMBOL(idr_find_slowpath); | 78 | EXPORT_SYMBOL(idr_alloc_cyclic); |
| 667 | 79 | ||
| 668 | /** | 80 | /** |
| 669 | * idr_for_each - iterate through all stored pointers | 81 | * idr_for_each - iterate through all stored pointers |
| 670 | * @idp: idr handle | 82 | * @idr: idr handle |
| 671 | * @fn: function to be called for each pointer | 83 | * @fn: function to be called for each pointer |
| 672 | * @data: data passed back to callback function | 84 | * @data: data passed to callback function |
| 673 | * | 85 | * |
| 674 | * Iterate over the pointers registered with the given idr. The | 86 | * The callback function will be called for each entry in @idr, passing |
| 675 | * callback function will be called for each pointer currently | 87 | * the id, the pointer and the data pointer passed to this function. |
| 676 | * registered, passing the id, the pointer and the data pointer passed | ||
| 677 | * to this function. It is not safe to modify the idr tree while in | ||
| 678 | * the callback, so functions such as idr_get_new and idr_remove are | ||
| 679 | * not allowed. | ||
| 680 | * | 88 | * |
| 681 | * We check the return of @fn each time. If it returns anything other | 89 | * If @fn returns anything other than %0, the iteration stops and that |
| 682 | * than %0, we break out and return that value. | 90 | * value is returned from this function. |
| 683 | * | 91 | * |
| 684 | * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove(). | 92 | * idr_for_each() can be called concurrently with idr_alloc() and |
| 93 | * idr_remove() if protected by RCU. Newly added entries may not be | ||
| 94 | * seen and deleted entries may be seen, but adding and removing entries | ||
| 95 | * will not cause other entries to be skipped, nor spurious ones to be seen. | ||
| 685 | */ | 96 | */ |
| 686 | int idr_for_each(struct idr *idp, | 97 | int idr_for_each(const struct idr *idr, |
| 687 | int (*fn)(int id, void *p, void *data), void *data) | 98 | int (*fn)(int id, void *p, void *data), void *data) |
| 688 | { | 99 | { |
| 689 | int n, id, max, error = 0; | 100 | struct radix_tree_iter iter; |
| 690 | struct idr_layer *p; | 101 | void __rcu **slot; |
| 691 | struct idr_layer *pa[MAX_IDR_LEVEL + 1]; | ||
| 692 | struct idr_layer **paa = &pa[0]; | ||
| 693 | |||
| 694 | n = idp->layers * IDR_BITS; | ||
| 695 | *paa = rcu_dereference_raw(idp->top); | ||
| 696 | max = idr_max(idp->layers); | ||
| 697 | 102 | ||
| 698 | id = 0; | 103 | radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) { |
| 699 | while (id >= 0 && id <= max) { | 104 | int ret = fn(iter.index, rcu_dereference_raw(*slot), data); |
| 700 | p = *paa; | 105 | if (ret) |
| 701 | while (n > 0 && p) { | 106 | return ret; |
| 702 | n -= IDR_BITS; | ||
| 703 | p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); | ||
| 704 | *++paa = p; | ||
| 705 | } | ||
| 706 | |||
| 707 | if (p) { | ||
| 708 | error = fn(id, (void *)p, data); | ||
| 709 | if (error) | ||
| 710 | break; | ||
| 711 | } | ||
| 712 | |||
| 713 | id += 1 << n; | ||
| 714 | while (n < fls(id)) { | ||
| 715 | n += IDR_BITS; | ||
| 716 | --paa; | ||
| 717 | } | ||
| 718 | } | 107 | } |
| 719 | 108 | ||
| 720 | return error; | 109 | return 0; |
| 721 | } | 110 | } |
| 722 | EXPORT_SYMBOL(idr_for_each); | 111 | EXPORT_SYMBOL(idr_for_each); |
| 723 | 112 | ||
| 724 | /** | 113 | /** |
| 725 | * idr_get_next - lookup next object of id to given id. | 114 | * idr_get_next - Find next populated entry |
| 726 | * @idp: idr handle | 115 | * @idr: idr handle |
| 727 | * @nextidp: pointer to lookup key | 116 | * @nextid: Pointer to lowest possible ID to return |
| 728 | * | 117 | * |
| 729 | * Returns pointer to registered object with id, which is next number to | 118 | * Returns the next populated entry in the tree with an ID greater than |
| 730 | * given id. After being looked up, *@nextidp will be updated for the next | 119 | * or equal to the value pointed to by @nextid. On exit, @nextid is updated |
| 731 | * iteration. | 120 | * to the ID of the found value. To use in a loop, the value pointed to by |
| 732 | * | 121 | * nextid must be incremented by the user. |
| 733 | * This function can be called under rcu_read_lock(), given that the leaf | ||
| 734 | * pointers lifetimes are correctly managed. | ||
| 735 | */ | 122 | */ |
| 736 | void *idr_get_next(struct idr *idp, int *nextidp) | 123 | void *idr_get_next(struct idr *idr, int *nextid) |
| 737 | { | 124 | { |
| 738 | struct idr_layer *p, *pa[MAX_IDR_LEVEL + 1]; | 125 | struct radix_tree_iter iter; |
| 739 | struct idr_layer **paa = &pa[0]; | 126 | void __rcu **slot; |
| 740 | int id = *nextidp; | ||
| 741 | int n, max; | ||
| 742 | 127 | ||
| 743 | /* find first ent */ | 128 | slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid); |
| 744 | p = *paa = rcu_dereference_raw(idp->top); | 129 | if (!slot) |
| 745 | if (!p) | ||
| 746 | return NULL; | 130 | return NULL; |
| 747 | n = (p->layer + 1) * IDR_BITS; | ||
| 748 | max = idr_max(p->layer + 1); | ||
| 749 | |||
| 750 | while (id >= 0 && id <= max) { | ||
| 751 | p = *paa; | ||
| 752 | while (n > 0 && p) { | ||
| 753 | n -= IDR_BITS; | ||
| 754 | p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); | ||
| 755 | *++paa = p; | ||
| 756 | } | ||
| 757 | |||
| 758 | if (p) { | ||
| 759 | *nextidp = id; | ||
| 760 | return p; | ||
| 761 | } | ||
| 762 | 131 | ||
| 763 | /* | 132 | *nextid = iter.index; |
| 764 | * Proceed to the next layer at the current level. Unlike | 133 | return rcu_dereference_raw(*slot); |
| 765 | * idr_for_each(), @id isn't guaranteed to be aligned to | ||
| 766 | * layer boundary at this point and adding 1 << n may | ||
| 767 | * incorrectly skip IDs. Make sure we jump to the | ||
| 768 | * beginning of the next layer using round_up(). | ||
| 769 | */ | ||
| 770 | id = round_up(id + 1, 1 << n); | ||
| 771 | while (n < fls(id)) { | ||
| 772 | n += IDR_BITS; | ||
| 773 | --paa; | ||
| 774 | } | ||
| 775 | } | ||
| 776 | return NULL; | ||
| 777 | } | 134 | } |
| 778 | EXPORT_SYMBOL(idr_get_next); | 135 | EXPORT_SYMBOL(idr_get_next); |
| 779 | 136 | ||
| 780 | |||
| 781 | /** | 137 | /** |
| 782 | * idr_replace - replace pointer for given id | 138 | * idr_replace - replace pointer for given id |
| 783 | * @idp: idr handle | 139 | * @idr: idr handle |
| 784 | * @ptr: pointer you want associated with the id | 140 | * @ptr: New pointer to associate with the ID |
| 785 | * @id: lookup key | 141 | * @id: Lookup key |
| 786 | * | 142 | * |
| 787 | * Replace the pointer registered with an id and return the old value. | 143 | * Replace the pointer registered with an ID and return the old value. |
| 788 | * A %-ENOENT return indicates that @id was not found. | 144 | * This function can be called under the RCU read lock concurrently with |
| 789 | * A %-EINVAL return indicates that @id was not within valid constraints. | 145 | * idr_alloc() and idr_remove() (as long as the ID being removed is not |
| 146 | * the one being replaced!). | ||
| 790 | * | 147 | * |
| 791 | * The caller must serialize with writers. | 148 | * Returns: 0 on success. %-ENOENT indicates that @id was not found. |
| 149 | * %-EINVAL indicates that @id or @ptr were not valid. | ||
| 792 | */ | 150 | */ |
| 793 | void *idr_replace(struct idr *idp, void *ptr, int id) | 151 | void *idr_replace(struct idr *idr, void *ptr, int id) |
| 794 | { | 152 | { |
| 795 | int n; | 153 | struct radix_tree_node *node; |
| 796 | struct idr_layer *p, *old_p; | 154 | void __rcu **slot = NULL; |
| 155 | void *entry; | ||
| 797 | 156 | ||
| 798 | if (id < 0) | 157 | if (WARN_ON_ONCE(id < 0)) |
| 158 | return ERR_PTR(-EINVAL); | ||
| 159 | if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) | ||
| 799 | return ERR_PTR(-EINVAL); | 160 | return ERR_PTR(-EINVAL); |
| 800 | 161 | ||
| 801 | p = idp->top; | 162 | entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot); |
| 802 | if (!p) | 163 | if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE)) |
| 803 | return ERR_PTR(-ENOENT); | ||
| 804 | |||
| 805 | if (id > idr_max(p->layer + 1)) | ||
| 806 | return ERR_PTR(-ENOENT); | ||
| 807 | |||
| 808 | n = p->layer * IDR_BITS; | ||
| 809 | while ((n > 0) && p) { | ||
| 810 | p = p->ary[(id >> n) & IDR_MASK]; | ||
| 811 | n -= IDR_BITS; | ||
| 812 | } | ||
| 813 | |||
| 814 | n = id & IDR_MASK; | ||
| 815 | if (unlikely(p == NULL || !test_bit(n, p->bitmap))) | ||
| 816 | return ERR_PTR(-ENOENT); | 164 | return ERR_PTR(-ENOENT); |
| 817 | 165 | ||
| 818 | old_p = p->ary[n]; | 166 | __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL, NULL); |
| 819 | rcu_assign_pointer(p->ary[n], ptr); | ||
| 820 | 167 | ||
| 821 | return old_p; | 168 | return entry; |
| 822 | } | 169 | } |
| 823 | EXPORT_SYMBOL(idr_replace); | 170 | EXPORT_SYMBOL(idr_replace); |
| 824 | 171 | ||
| 825 | void __init idr_init_cache(void) | ||
| 826 | { | ||
| 827 | idr_layer_cache = kmem_cache_create("idr_layer_cache", | ||
| 828 | sizeof(struct idr_layer), 0, SLAB_PANIC, NULL); | ||
| 829 | } | ||
| 830 | |||
| 831 | /** | ||
| 832 | * idr_init - initialize idr handle | ||
| 833 | * @idp: idr handle | ||
| 834 | * | ||
| 835 | * This function is use to set up the handle (@idp) that you will pass | ||
| 836 | * to the rest of the functions. | ||
| 837 | */ | ||
| 838 | void idr_init(struct idr *idp) | ||
| 839 | { | ||
| 840 | memset(idp, 0, sizeof(struct idr)); | ||
| 841 | spin_lock_init(&idp->lock); | ||
| 842 | } | ||
| 843 | EXPORT_SYMBOL(idr_init); | ||
| 844 | |||
| 845 | static int idr_has_entry(int id, void *p, void *data) | ||
| 846 | { | ||
| 847 | return 1; | ||
| 848 | } | ||
| 849 | |||
| 850 | bool idr_is_empty(struct idr *idp) | ||
| 851 | { | ||
| 852 | return !idr_for_each(idp, idr_has_entry, NULL); | ||
| 853 | } | ||
| 854 | EXPORT_SYMBOL(idr_is_empty); | ||
| 855 | |||
| 856 | /** | 172 | /** |
| 857 | * DOC: IDA description | 173 | * DOC: IDA description |
| 858 | * IDA - IDR based ID allocator | ||
| 859 | * | 174 | * |
| 860 | * This is id allocator without id -> pointer translation. Memory | 175 | * The IDA is an ID allocator which does not provide the ability to |
| 861 | * usage is much lower than full blown idr because each id only | 176 | * associate an ID with a pointer. As such, it only needs to store one |
| 862 | * occupies a bit. ida uses a custom leaf node which contains | 177 | * bit per ID, and so is more space efficient than an IDR. To use an IDA, |
| 863 | * IDA_BITMAP_BITS slots. | 178 | * define it using DEFINE_IDA() (or embed a &struct ida in a data structure, |
| 864 | * | 179 | * then initialise it using ida_init()). To allocate a new ID, call |
| 865 | * 2007-04-25 written by Tejun Heo <htejun@gmail.com> | 180 | * ida_simple_get(). To free an ID, call ida_simple_remove(). |
| 181 | * | ||
| 182 | * If you have more complex locking requirements, use a loop around | ||
| 183 | * ida_pre_get() and ida_get_new() to allocate a new ID. Then use | ||
| 184 | * ida_remove() to free an ID. You must make sure that ida_get_new() and | ||
| 185 | * ida_remove() cannot be called at the same time as each other for the | ||
| 186 | * same IDA. | ||
| 187 | * | ||
| 188 | * You can also use ida_get_new_above() if you need an ID to be allocated | ||
| 189 | * above a particular number. ida_destroy() can be used to dispose of an | ||
| 190 | * IDA without needing to free the individual IDs in it. You can use | ||
| 191 | * ida_is_empty() to find out whether the IDA has any IDs currently allocated. | ||
| 192 | * | ||
| 193 | * IDs are currently limited to the range [0-INT_MAX]. If this is an awkward | ||
| 194 | * limitation, it should be quite straightforward to raise the maximum. | ||
| 866 | */ | 195 | */ |
| 867 | 196 | ||
| 868 | static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap) | 197 | /* |
| 869 | { | 198 | * Developer's notes: |
| 870 | unsigned long flags; | 199 | * |
| 871 | 200 | * The IDA uses the functionality provided by the IDR & radix tree to store | |
| 872 | if (!ida->free_bitmap) { | 201 | * bitmaps in each entry. The IDR_FREE tag means there is at least one bit |
| 873 | spin_lock_irqsave(&ida->idr.lock, flags); | 202 | * free, unlike the IDR where it means at least one entry is free. |
| 874 | if (!ida->free_bitmap) { | 203 | * |
| 875 | ida->free_bitmap = bitmap; | 204 | * I considered telling the radix tree that each slot is an order-10 node |
| 876 | bitmap = NULL; | 205 | * and storing the bit numbers in the radix tree, but the radix tree can't |
| 877 | } | 206 | * allow a single multiorder entry at index 0, which would significantly |
| 878 | spin_unlock_irqrestore(&ida->idr.lock, flags); | 207 | * increase memory consumption for the IDA. So instead we divide the index |
| 879 | } | 208 | * by the number of bits in the leaf bitmap before doing a radix tree lookup. |
| 880 | 209 | * | |
| 881 | kfree(bitmap); | 210 | * As an optimisation, if there are only a few low bits set in any given |
| 882 | } | 211 | * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional |
| 883 | 212 | * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits | |
| 884 | /** | 213 | * directly in the entry. By being really tricksy, we could store |
| 885 | * ida_pre_get - reserve resources for ida allocation | 214 | * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising |
| 886 | * @ida: ida handle | 215 | * for 0-3 allocated IDs. |
| 887 | * @gfp_mask: memory allocation flag | 216 | * |
| 888 | * | 217 | * We allow the radix tree 'exceptional' count to get out of date. Nothing |
| 889 | * This function should be called prior to locking and calling the | 218 | * in the IDA nor the radix tree code checks it. If it becomes important |
| 890 | * following function. It preallocates enough memory to satisfy the | 219 | * to maintain an accurate exceptional count, switch the rcu_assign_pointer() |
| 891 | * worst possible allocation. | 220 | * calls to radix_tree_iter_replace() which will correct the exceptional |
| 892 | * | 221 | * count. |
| 893 | * If the system is REALLY out of memory this function returns %0, | 222 | * |
| 894 | * otherwise %1. | 223 | * The IDA always requires a lock to alloc/free. If we add a 'test_bit' |
| 224 | * equivalent, it will still need locking. Going to RCU lookup would require | ||
| 225 | * using RCU to free bitmaps, and that's not trivial without embedding an | ||
| 226 | * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte | ||
| 227 | * bitmap, which is excessive. | ||
| 895 | */ | 228 | */ |
| 896 | int ida_pre_get(struct ida *ida, gfp_t gfp_mask) | ||
| 897 | { | ||
| 898 | /* allocate idr_layers */ | ||
| 899 | if (!__idr_pre_get(&ida->idr, gfp_mask)) | ||
| 900 | return 0; | ||
| 901 | 229 | ||
| 902 | /* allocate free_bitmap */ | 230 | #define IDA_MAX (0x80000000U / IDA_BITMAP_BITS) |
| 903 | if (!ida->free_bitmap) { | ||
| 904 | struct ida_bitmap *bitmap; | ||
| 905 | |||
| 906 | bitmap = kmalloc(sizeof(struct ida_bitmap), gfp_mask); | ||
| 907 | if (!bitmap) | ||
| 908 | return 0; | ||
| 909 | |||
| 910 | free_bitmap(ida, bitmap); | ||
| 911 | } | ||
| 912 | |||
| 913 | return 1; | ||
| 914 | } | ||
| 915 | EXPORT_SYMBOL(ida_pre_get); | ||
| 916 | 231 | ||
| 917 | /** | 232 | /** |
| 918 | * ida_get_new_above - allocate new ID above or equal to a start id | 233 | * ida_get_new_above - allocate new ID above or equal to a start id |
| 919 | * @ida: ida handle | 234 | * @ida: ida handle |
| 920 | * @starting_id: id to start search at | 235 | * @start: id to start search at |
| 921 | * @p_id: pointer to the allocated handle | 236 | * @id: pointer to the allocated handle |
| 922 | * | 237 | * |
| 923 | * Allocate new ID above or equal to @starting_id. It should be called | 238 | * Allocate new ID above or equal to @start. It should be called |
| 924 | * with any required locks. | 239 | * with any required locks to ensure that concurrent calls to |
| 240 | * ida_get_new_above() / ida_get_new() / ida_remove() are not allowed. | ||
| 241 | * Consider using ida_simple_get() if you do not have complex locking | ||
| 242 | * requirements. | ||
| 925 | * | 243 | * |
| 926 | * If memory is required, it will return %-EAGAIN, you should unlock | 244 | * If memory is required, it will return %-EAGAIN, you should unlock |
| 927 | * and go back to the ida_pre_get() call. If the ida is full, it will | 245 | * and go back to the ida_pre_get() call. If the ida is full, it will |
| 928 | * return %-ENOSPC. | 246 | * return %-ENOSPC. On success, it will return 0. |
| 929 | * | ||
| 930 | * Note that callers must ensure that concurrent access to @ida is not possible. | ||
| 931 | * See ida_simple_get() for a varaint which takes care of locking. | ||
| 932 | * | 247 | * |
| 933 | * @p_id returns a value in the range @starting_id ... %0x7fffffff. | 248 | * @id returns a value in the range @start ... %0x7fffffff. |
| 934 | */ | 249 | */ |
| 935 | int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) | 250 | int ida_get_new_above(struct ida *ida, int start, int *id) |
| 936 | { | 251 | { |
| 937 | struct idr_layer *pa[MAX_IDR_LEVEL + 1]; | 252 | struct radix_tree_root *root = &ida->ida_rt; |
| 253 | void __rcu **slot; | ||
| 254 | struct radix_tree_iter iter; | ||
| 938 | struct ida_bitmap *bitmap; | 255 | struct ida_bitmap *bitmap; |
| 939 | unsigned long flags; | 256 | unsigned long index; |
| 940 | int idr_id = starting_id / IDA_BITMAP_BITS; | 257 | unsigned bit, ebit; |
| 941 | int offset = starting_id % IDA_BITMAP_BITS; | 258 | int new; |
| 942 | int t, id; | 259 | |
| 943 | 260 | index = start / IDA_BITMAP_BITS; | |
| 944 | restart: | 261 | bit = start % IDA_BITMAP_BITS; |
| 945 | /* get vacant slot */ | 262 | ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT; |
| 946 | t = idr_get_empty_slot(&ida->idr, idr_id, pa, 0, &ida->idr); | 263 | |
| 947 | if (t < 0) | 264 | slot = radix_tree_iter_init(&iter, index); |
| 948 | return t == -ENOMEM ? -EAGAIN : t; | 265 | for (;;) { |
| 949 | 266 | if (slot) | |
| 950 | if (t * IDA_BITMAP_BITS >= MAX_IDR_BIT) | 267 | slot = radix_tree_next_slot(slot, &iter, |
| 951 | return -ENOSPC; | 268 | RADIX_TREE_ITER_TAGGED); |
| 952 | 269 | if (!slot) { | |
| 953 | if (t != idr_id) | 270 | slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX); |
| 954 | offset = 0; | 271 | if (IS_ERR(slot)) { |
| 955 | idr_id = t; | 272 | if (slot == ERR_PTR(-ENOMEM)) |
| 956 | 273 | return -EAGAIN; | |
| 957 | /* if bitmap isn't there, create a new one */ | 274 | return PTR_ERR(slot); |
| 958 | bitmap = (void *)pa[0]->ary[idr_id & IDR_MASK]; | 275 | } |
| 959 | if (!bitmap) { | 276 | } |
| 960 | spin_lock_irqsave(&ida->idr.lock, flags); | 277 | if (iter.index > index) { |
| 961 | bitmap = ida->free_bitmap; | 278 | bit = 0; |
| 962 | ida->free_bitmap = NULL; | 279 | ebit = RADIX_TREE_EXCEPTIONAL_SHIFT; |
| 963 | spin_unlock_irqrestore(&ida->idr.lock, flags); | 280 | } |
| 964 | 281 | new = iter.index * IDA_BITMAP_BITS; | |
| 965 | if (!bitmap) | 282 | bitmap = rcu_dereference_raw(*slot); |
| 966 | return -EAGAIN; | 283 | if (radix_tree_exception(bitmap)) { |
| 967 | 284 | unsigned long tmp = (unsigned long)bitmap; | |
| 968 | memset(bitmap, 0, sizeof(struct ida_bitmap)); | 285 | ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit); |
| 969 | rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK], | 286 | if (ebit < BITS_PER_LONG) { |
| 970 | (void *)bitmap); | 287 | tmp |= 1UL << ebit; |
| 971 | pa[0]->count++; | 288 | rcu_assign_pointer(*slot, (void *)tmp); |
| 972 | } | 289 | *id = new + ebit - RADIX_TREE_EXCEPTIONAL_SHIFT; |
| 973 | 290 | return 0; | |
| 974 | /* lookup for empty slot */ | 291 | } |
| 975 | t = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, offset); | 292 | bitmap = this_cpu_xchg(ida_bitmap, NULL); |
| 976 | if (t == IDA_BITMAP_BITS) { | 293 | if (!bitmap) |
| 977 | /* no empty slot after offset, continue to the next chunk */ | 294 | return -EAGAIN; |
| 978 | idr_id++; | 295 | memset(bitmap, 0, sizeof(*bitmap)); |
| 979 | offset = 0; | 296 | bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT; |
| 980 | goto restart; | 297 | rcu_assign_pointer(*slot, bitmap); |
| 981 | } | 298 | } |
| 982 | |||
| 983 | id = idr_id * IDA_BITMAP_BITS + t; | ||
| 984 | if (id >= MAX_IDR_BIT) | ||
| 985 | return -ENOSPC; | ||
| 986 | 299 | ||
| 987 | __set_bit(t, bitmap->bitmap); | 300 | if (bitmap) { |
| 988 | if (++bitmap->nr_busy == IDA_BITMAP_BITS) | 301 | bit = find_next_zero_bit(bitmap->bitmap, |
| 989 | idr_mark_full(pa, idr_id); | 302 | IDA_BITMAP_BITS, bit); |
| 303 | new += bit; | ||
| 304 | if (new < 0) | ||
| 305 | return -ENOSPC; | ||
| 306 | if (bit == IDA_BITMAP_BITS) | ||
| 307 | continue; | ||
| 990 | 308 | ||
| 991 | *p_id = id; | 309 | __set_bit(bit, bitmap->bitmap); |
| 310 | if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS)) | ||
| 311 | radix_tree_iter_tag_clear(root, &iter, | ||
| 312 | IDR_FREE); | ||
| 313 | } else { | ||
| 314 | new += bit; | ||
| 315 | if (new < 0) | ||
| 316 | return -ENOSPC; | ||
| 317 | if (ebit < BITS_PER_LONG) { | ||
| 318 | bitmap = (void *)((1UL << ebit) | | ||
| 319 | RADIX_TREE_EXCEPTIONAL_ENTRY); | ||
| 320 | radix_tree_iter_replace(root, &iter, slot, | ||
| 321 | bitmap); | ||
| 322 | *id = new; | ||
| 323 | return 0; | ||
| 324 | } | ||
| 325 | bitmap = this_cpu_xchg(ida_bitmap, NULL); | ||
| 326 | if (!bitmap) | ||
| 327 | return -EAGAIN; | ||
| 328 | memset(bitmap, 0, sizeof(*bitmap)); | ||
| 329 | __set_bit(bit, bitmap->bitmap); | ||
| 330 | radix_tree_iter_replace(root, &iter, slot, bitmap); | ||
| 331 | } | ||
| 992 | 332 | ||
| 993 | /* Each leaf node can handle nearly a thousand slots and the | 333 | *id = new; |
| 994 | * whole idea of ida is to have small memory foot print. | 334 | return 0; |
| 995 | * Throw away extra resources one by one after each successful | ||
| 996 | * allocation. | ||
| 997 | */ | ||
| 998 | if (ida->idr.id_free_cnt || ida->free_bitmap) { | ||
| 999 | struct idr_layer *p = get_from_free_list(&ida->idr); | ||
| 1000 | if (p) | ||
| 1001 | kmem_cache_free(idr_layer_cache, p); | ||
| 1002 | } | 335 | } |
| 1003 | |||
| 1004 | return 0; | ||
| 1005 | } | 336 | } |
| 1006 | EXPORT_SYMBOL(ida_get_new_above); | 337 | EXPORT_SYMBOL(ida_get_new_above); |
| 1007 | 338 | ||
| 1008 | /** | 339 | /** |
| 1009 | * ida_remove - remove the given ID | 340 | * ida_remove - Free the given ID |
| 1010 | * @ida: ida handle | 341 | * @ida: ida handle |
| 1011 | * @id: ID to free | 342 | * @id: ID to free |
| 343 | * | ||
| 344 | * This function should not be called at the same time as ida_get_new_above(). | ||
| 1012 | */ | 345 | */ |
| 1013 | void ida_remove(struct ida *ida, int id) | 346 | void ida_remove(struct ida *ida, int id) |
| 1014 | { | 347 | { |
| 1015 | struct idr_layer *p = ida->idr.top; | 348 | unsigned long index = id / IDA_BITMAP_BITS; |
| 1016 | int shift = (ida->idr.layers - 1) * IDR_BITS; | 349 | unsigned offset = id % IDA_BITMAP_BITS; |
| 1017 | int idr_id = id / IDA_BITMAP_BITS; | ||
| 1018 | int offset = id % IDA_BITMAP_BITS; | ||
| 1019 | int n; | ||
| 1020 | struct ida_bitmap *bitmap; | 350 | struct ida_bitmap *bitmap; |
| 351 | unsigned long *btmp; | ||
| 352 | struct radix_tree_iter iter; | ||
| 353 | void __rcu **slot; | ||
| 1021 | 354 | ||
| 1022 | if (idr_id > idr_max(ida->idr.layers)) | 355 | slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index); |
| 356 | if (!slot) | ||
| 1023 | goto err; | 357 | goto err; |
| 1024 | 358 | ||
| 1025 | /* clear full bits while looking up the leaf idr_layer */ | 359 | bitmap = rcu_dereference_raw(*slot); |
| 1026 | while ((shift > 0) && p) { | 360 | if (radix_tree_exception(bitmap)) { |
| 1027 | n = (idr_id >> shift) & IDR_MASK; | 361 | btmp = (unsigned long *)slot; |
| 1028 | __clear_bit(n, p->bitmap); | 362 | offset += RADIX_TREE_EXCEPTIONAL_SHIFT; |
| 1029 | p = p->ary[n]; | 363 | if (offset >= BITS_PER_LONG) |
| 1030 | shift -= IDR_BITS; | 364 | goto err; |
| 365 | } else { | ||
| 366 | btmp = bitmap->bitmap; | ||
| 1031 | } | 367 | } |
| 1032 | 368 | if (!test_bit(offset, btmp)) | |
| 1033 | if (p == NULL) | ||
| 1034 | goto err; | ||
| 1035 | |||
| 1036 | n = idr_id & IDR_MASK; | ||
| 1037 | __clear_bit(n, p->bitmap); | ||
| 1038 | |||
| 1039 | bitmap = (void *)p->ary[n]; | ||
| 1040 | if (!bitmap || !test_bit(offset, bitmap->bitmap)) | ||
| 1041 | goto err; | 369 | goto err; |
| 1042 | 370 | ||
| 1043 | /* update bitmap and remove it if empty */ | 371 | __clear_bit(offset, btmp); |
| 1044 | __clear_bit(offset, bitmap->bitmap); | 372 | radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE); |
| 1045 | if (--bitmap->nr_busy == 0) { | 373 | if (radix_tree_exception(bitmap)) { |
| 1046 | __set_bit(n, p->bitmap); /* to please idr_remove() */ | 374 | if (rcu_dereference_raw(*slot) == |
| 1047 | idr_remove(&ida->idr, idr_id); | 375 | (void *)RADIX_TREE_EXCEPTIONAL_ENTRY) |
| 1048 | free_bitmap(ida, bitmap); | 376 | radix_tree_iter_delete(&ida->ida_rt, &iter, slot); |
| 377 | } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) { | ||
| 378 | kfree(bitmap); | ||
| 379 | radix_tree_iter_delete(&ida->ida_rt, &iter, slot); | ||
| 1049 | } | 380 | } |
| 1050 | |||
| 1051 | return; | 381 | return; |
| 1052 | |||
| 1053 | err: | 382 | err: |
| 1054 | WARN(1, "ida_remove called for id=%d which is not allocated.\n", id); | 383 | WARN(1, "ida_remove called for id=%d which is not allocated.\n", id); |
| 1055 | } | 384 | } |
| 1056 | EXPORT_SYMBOL(ida_remove); | 385 | EXPORT_SYMBOL(ida_remove); |
| 1057 | 386 | ||
| 1058 | /** | 387 | /** |
| 1059 | * ida_destroy - release all cached layers within an ida tree | 388 | * ida_destroy - Free the contents of an ida |
| 1060 | * @ida: ida handle | 389 | * @ida: ida handle |
| 390 | * | ||
| 391 | * Calling this function releases all resources associated with an IDA. When | ||
| 392 | * this call returns, the IDA is empty and can be reused or freed. The caller | ||
| 393 | * should not allow ida_remove() or ida_get_new_above() to be called at the | ||
| 394 | * same time. | ||
| 1061 | */ | 395 | */ |
| 1062 | void ida_destroy(struct ida *ida) | 396 | void ida_destroy(struct ida *ida) |
| 1063 | { | 397 | { |
| 1064 | idr_destroy(&ida->idr); | 398 | struct radix_tree_iter iter; |
| 1065 | kfree(ida->free_bitmap); | 399 | void __rcu **slot; |
| 400 | |||
| 401 | radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) { | ||
| 402 | struct ida_bitmap *bitmap = rcu_dereference_raw(*slot); | ||
| 403 | if (!radix_tree_exception(bitmap)) | ||
| 404 | kfree(bitmap); | ||
| 405 | radix_tree_iter_delete(&ida->ida_rt, &iter, slot); | ||
| 406 | } | ||
| 1066 | } | 407 | } |
| 1067 | EXPORT_SYMBOL(ida_destroy); | 408 | EXPORT_SYMBOL(ida_destroy); |
| 1068 | 409 | ||
| @@ -1141,18 +482,3 @@ void ida_simple_remove(struct ida *ida, unsigned int id) | |||
| 1141 | spin_unlock_irqrestore(&simple_ida_lock, flags); | 482 | spin_unlock_irqrestore(&simple_ida_lock, flags); |
| 1142 | } | 483 | } |
| 1143 | EXPORT_SYMBOL(ida_simple_remove); | 484 | EXPORT_SYMBOL(ida_simple_remove); |
| 1144 | |||
| 1145 | /** | ||
| 1146 | * ida_init - initialize ida handle | ||
| 1147 | * @ida: ida handle | ||
| 1148 | * | ||
| 1149 | * This function is use to set up the handle (@ida) that you will pass | ||
| 1150 | * to the rest of the functions. | ||
| 1151 | */ | ||
| 1152 | void ida_init(struct ida *ida) | ||
| 1153 | { | ||
| 1154 | memset(ida, 0, sizeof(struct ida)); | ||
| 1155 | idr_init(&ida->idr); | ||
| 1156 | |||
| 1157 | } | ||
| 1158 | EXPORT_SYMBOL(ida_init); | ||
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 72fab4999c00..5ed506d648c4 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
| @@ -22,20 +22,21 @@ | |||
| 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #include <linux/bitmap.h> | ||
| 26 | #include <linux/bitops.h> | ||
| 25 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
| 26 | #include <linux/errno.h> | 28 | #include <linux/errno.h> |
| 29 | #include <linux/export.h> | ||
| 30 | #include <linux/idr.h> | ||
| 27 | #include <linux/init.h> | 31 | #include <linux/init.h> |
| 28 | #include <linux/kernel.h> | 32 | #include <linux/kernel.h> |
| 29 | #include <linux/export.h> | 33 | #include <linux/kmemleak.h> |
| 30 | #include <linux/radix-tree.h> | ||
| 31 | #include <linux/percpu.h> | 34 | #include <linux/percpu.h> |
| 35 | #include <linux/preempt.h> /* in_interrupt() */ | ||
| 36 | #include <linux/radix-tree.h> | ||
| 37 | #include <linux/rcupdate.h> | ||
| 32 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
| 33 | #include <linux/kmemleak.h> | ||
| 34 | #include <linux/cpu.h> | ||
| 35 | #include <linux/string.h> | 39 | #include <linux/string.h> |
| 36 | #include <linux/bitops.h> | ||
| 37 | #include <linux/rcupdate.h> | ||
| 38 | #include <linux/preempt.h> /* in_interrupt() */ | ||
| 39 | 40 | ||
| 40 | 41 | ||
| 41 | /* Number of nodes in fully populated tree of given height */ | 42 | /* Number of nodes in fully populated tree of given height */ |
| @@ -60,11 +61,28 @@ static struct kmem_cache *radix_tree_node_cachep; | |||
| 60 | #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) | 61 | #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) |
| 61 | 62 | ||
| 62 | /* | 63 | /* |
| 64 | * The IDR does not have to be as high as the radix tree since it uses | ||
| 65 | * signed integers, not unsigned longs. | ||
| 66 | */ | ||
| 67 | #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1) | ||
| 68 | #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \ | ||
| 69 | RADIX_TREE_MAP_SHIFT)) | ||
| 70 | #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1) | ||
| 71 | |||
| 72 | /* | ||
| 73 | * The IDA is even shorter since it uses a bitmap at the last level. | ||
| 74 | */ | ||
| 75 | #define IDA_INDEX_BITS (8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS)) | ||
| 76 | #define IDA_MAX_PATH (DIV_ROUND_UP(IDA_INDEX_BITS, \ | ||
| 77 | RADIX_TREE_MAP_SHIFT)) | ||
| 78 | #define IDA_PRELOAD_SIZE (IDA_MAX_PATH * 2 - 1) | ||
| 79 | |||
| 80 | /* | ||
| 63 | * Per-cpu pool of preloaded nodes | 81 | * Per-cpu pool of preloaded nodes |
| 64 | */ | 82 | */ |
| 65 | struct radix_tree_preload { | 83 | struct radix_tree_preload { |
| 66 | unsigned nr; | 84 | unsigned nr; |
| 67 | /* nodes->private_data points to next preallocated node */ | 85 | /* nodes->parent points to next preallocated node */ |
| 68 | struct radix_tree_node *nodes; | 86 | struct radix_tree_node *nodes; |
| 69 | }; | 87 | }; |
| 70 | static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; | 88 | static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; |
| @@ -83,35 +101,38 @@ static inline void *node_to_entry(void *ptr) | |||
| 83 | 101 | ||
| 84 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | 102 | #ifdef CONFIG_RADIX_TREE_MULTIORDER |
| 85 | /* Sibling slots point directly to another slot in the same node */ | 103 | /* Sibling slots point directly to another slot in the same node */ |
| 86 | static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node) | 104 | static inline |
| 105 | bool is_sibling_entry(const struct radix_tree_node *parent, void *node) | ||
| 87 | { | 106 | { |
| 88 | void **ptr = node; | 107 | void __rcu **ptr = node; |
| 89 | return (parent->slots <= ptr) && | 108 | return (parent->slots <= ptr) && |
| 90 | (ptr < parent->slots + RADIX_TREE_MAP_SIZE); | 109 | (ptr < parent->slots + RADIX_TREE_MAP_SIZE); |
| 91 | } | 110 | } |
| 92 | #else | 111 | #else |
| 93 | static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node) | 112 | static inline |
| 113 | bool is_sibling_entry(const struct radix_tree_node *parent, void *node) | ||
| 94 | { | 114 | { |
| 95 | return false; | 115 | return false; |
| 96 | } | 116 | } |
| 97 | #endif | 117 | #endif |
| 98 | 118 | ||
| 99 | static inline unsigned long get_slot_offset(struct radix_tree_node *parent, | 119 | static inline unsigned long |
| 100 | void **slot) | 120 | get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) |
| 101 | { | 121 | { |
| 102 | return slot - parent->slots; | 122 | return slot - parent->slots; |
| 103 | } | 123 | } |
| 104 | 124 | ||
| 105 | static unsigned int radix_tree_descend(struct radix_tree_node *parent, | 125 | static unsigned int radix_tree_descend(const struct radix_tree_node *parent, |
| 106 | struct radix_tree_node **nodep, unsigned long index) | 126 | struct radix_tree_node **nodep, unsigned long index) |
| 107 | { | 127 | { |
| 108 | unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; | 128 | unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; |
| 109 | void **entry = rcu_dereference_raw(parent->slots[offset]); | 129 | void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); |
| 110 | 130 | ||
| 111 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | 131 | #ifdef CONFIG_RADIX_TREE_MULTIORDER |
| 112 | if (radix_tree_is_internal_node(entry)) { | 132 | if (radix_tree_is_internal_node(entry)) { |
| 113 | if (is_sibling_entry(parent, entry)) { | 133 | if (is_sibling_entry(parent, entry)) { |
| 114 | void **sibentry = (void **) entry_to_node(entry); | 134 | void __rcu **sibentry; |
| 135 | sibentry = (void __rcu **) entry_to_node(entry); | ||
| 115 | offset = get_slot_offset(parent, sibentry); | 136 | offset = get_slot_offset(parent, sibentry); |
| 116 | entry = rcu_dereference_raw(*sibentry); | 137 | entry = rcu_dereference_raw(*sibentry); |
| 117 | } | 138 | } |
| @@ -122,7 +143,7 @@ static unsigned int radix_tree_descend(struct radix_tree_node *parent, | |||
| 122 | return offset; | 143 | return offset; |
| 123 | } | 144 | } |
| 124 | 145 | ||
| 125 | static inline gfp_t root_gfp_mask(struct radix_tree_root *root) | 146 | static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) |
| 126 | { | 147 | { |
| 127 | return root->gfp_mask & __GFP_BITS_MASK; | 148 | return root->gfp_mask & __GFP_BITS_MASK; |
| 128 | } | 149 | } |
| @@ -139,42 +160,48 @@ static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, | |||
| 139 | __clear_bit(offset, node->tags[tag]); | 160 | __clear_bit(offset, node->tags[tag]); |
| 140 | } | 161 | } |
| 141 | 162 | ||
| 142 | static inline int tag_get(struct radix_tree_node *node, unsigned int tag, | 163 | static inline int tag_get(const struct radix_tree_node *node, unsigned int tag, |
| 143 | int offset) | 164 | int offset) |
| 144 | { | 165 | { |
| 145 | return test_bit(offset, node->tags[tag]); | 166 | return test_bit(offset, node->tags[tag]); |
| 146 | } | 167 | } |
| 147 | 168 | ||
| 148 | static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) | 169 | static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) |
| 149 | { | 170 | { |
| 150 | root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); | 171 | root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); |
| 151 | } | 172 | } |
| 152 | 173 | ||
| 153 | static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) | 174 | static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) |
| 154 | { | 175 | { |
| 155 | root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); | 176 | root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); |
| 156 | } | 177 | } |
| 157 | 178 | ||
| 158 | static inline void root_tag_clear_all(struct radix_tree_root *root) | 179 | static inline void root_tag_clear_all(struct radix_tree_root *root) |
| 159 | { | 180 | { |
| 160 | root->gfp_mask &= __GFP_BITS_MASK; | 181 | root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1; |
| 182 | } | ||
| 183 | |||
| 184 | static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) | ||
| 185 | { | ||
| 186 | return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT)); | ||
| 161 | } | 187 | } |
| 162 | 188 | ||
| 163 | static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) | 189 | static inline unsigned root_tags_get(const struct radix_tree_root *root) |
| 164 | { | 190 | { |
| 165 | return (__force int)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); | 191 | return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT; |
| 166 | } | 192 | } |
| 167 | 193 | ||
| 168 | static inline unsigned root_tags_get(struct radix_tree_root *root) | 194 | static inline bool is_idr(const struct radix_tree_root *root) |
| 169 | { | 195 | { |
| 170 | return (__force unsigned)root->gfp_mask >> __GFP_BITS_SHIFT; | 196 | return !!(root->gfp_mask & ROOT_IS_IDR); |
| 171 | } | 197 | } |
| 172 | 198 | ||
| 173 | /* | 199 | /* |
| 174 | * Returns 1 if any slot in the node has this tag set. | 200 | * Returns 1 if any slot in the node has this tag set. |
| 175 | * Otherwise returns 0. | 201 | * Otherwise returns 0. |
| 176 | */ | 202 | */ |
| 177 | static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) | 203 | static inline int any_tag_set(const struct radix_tree_node *node, |
| 204 | unsigned int tag) | ||
| 178 | { | 205 | { |
| 179 | unsigned idx; | 206 | unsigned idx; |
| 180 | for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { | 207 | for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { |
| @@ -184,6 +211,11 @@ static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) | |||
| 184 | return 0; | 211 | return 0; |
| 185 | } | 212 | } |
| 186 | 213 | ||
| 214 | static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) | ||
| 215 | { | ||
| 216 | bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE); | ||
| 217 | } | ||
| 218 | |||
| 187 | /** | 219 | /** |
| 188 | * radix_tree_find_next_bit - find the next set bit in a memory region | 220 | * radix_tree_find_next_bit - find the next set bit in a memory region |
| 189 | * | 221 | * |
| @@ -232,11 +264,18 @@ static inline unsigned long shift_maxindex(unsigned int shift) | |||
| 232 | return (RADIX_TREE_MAP_SIZE << shift) - 1; | 264 | return (RADIX_TREE_MAP_SIZE << shift) - 1; |
| 233 | } | 265 | } |
| 234 | 266 | ||
| 235 | static inline unsigned long node_maxindex(struct radix_tree_node *node) | 267 | static inline unsigned long node_maxindex(const struct radix_tree_node *node) |
| 236 | { | 268 | { |
| 237 | return shift_maxindex(node->shift); | 269 | return shift_maxindex(node->shift); |
| 238 | } | 270 | } |
| 239 | 271 | ||
| 272 | static unsigned long next_index(unsigned long index, | ||
| 273 | const struct radix_tree_node *node, | ||
| 274 | unsigned long offset) | ||
| 275 | { | ||
| 276 | return (index & ~node_maxindex(node)) + (offset << node->shift); | ||
| 277 | } | ||
| 278 | |||
| 240 | #ifndef __KERNEL__ | 279 | #ifndef __KERNEL__ |
| 241 | static void dump_node(struct radix_tree_node *node, unsigned long index) | 280 | static void dump_node(struct radix_tree_node *node, unsigned long index) |
| 242 | { | 281 | { |
| @@ -275,11 +314,59 @@ static void radix_tree_dump(struct radix_tree_root *root) | |||
| 275 | { | 314 | { |
| 276 | pr_debug("radix root: %p rnode %p tags %x\n", | 315 | pr_debug("radix root: %p rnode %p tags %x\n", |
| 277 | root, root->rnode, | 316 | root, root->rnode, |
| 278 | root->gfp_mask >> __GFP_BITS_SHIFT); | 317 | root->gfp_mask >> ROOT_TAG_SHIFT); |
| 279 | if (!radix_tree_is_internal_node(root->rnode)) | 318 | if (!radix_tree_is_internal_node(root->rnode)) |
| 280 | return; | 319 | return; |
| 281 | dump_node(entry_to_node(root->rnode), 0); | 320 | dump_node(entry_to_node(root->rnode), 0); |
| 282 | } | 321 | } |
| 322 | |||
| 323 | static void dump_ida_node(void *entry, unsigned long index) | ||
| 324 | { | ||
| 325 | unsigned long i; | ||
| 326 | |||
| 327 | if (!entry) | ||
| 328 | return; | ||
| 329 | |||
| 330 | if (radix_tree_is_internal_node(entry)) { | ||
| 331 | struct radix_tree_node *node = entry_to_node(entry); | ||
| 332 | |||
| 333 | pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n", | ||
| 334 | node, node->offset, index * IDA_BITMAP_BITS, | ||
| 335 | ((index | node_maxindex(node)) + 1) * | ||
| 336 | IDA_BITMAP_BITS - 1, | ||
| 337 | node->parent, node->tags[0][0], node->shift, | ||
| 338 | node->count); | ||
| 339 | for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) | ||
| 340 | dump_ida_node(node->slots[i], | ||
| 341 | index | (i << node->shift)); | ||
| 342 | } else if (radix_tree_exceptional_entry(entry)) { | ||
| 343 | pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n", | ||
| 344 | entry, (int)(index & RADIX_TREE_MAP_MASK), | ||
| 345 | index * IDA_BITMAP_BITS, | ||
| 346 | index * IDA_BITMAP_BITS + BITS_PER_LONG - | ||
| 347 | RADIX_TREE_EXCEPTIONAL_SHIFT, | ||
| 348 | (unsigned long)entry >> | ||
| 349 | RADIX_TREE_EXCEPTIONAL_SHIFT); | ||
| 350 | } else { | ||
| 351 | struct ida_bitmap *bitmap = entry; | ||
| 352 | |||
| 353 | pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap, | ||
| 354 | (int)(index & RADIX_TREE_MAP_MASK), | ||
| 355 | index * IDA_BITMAP_BITS, | ||
| 356 | (index + 1) * IDA_BITMAP_BITS - 1); | ||
| 357 | for (i = 0; i < IDA_BITMAP_LONGS; i++) | ||
| 358 | pr_cont(" %lx", bitmap->bitmap[i]); | ||
| 359 | pr_cont("\n"); | ||
| 360 | } | ||
| 361 | } | ||
| 362 | |||
| 363 | static void ida_dump(struct ida *ida) | ||
| 364 | { | ||
| 365 | struct radix_tree_root *root = &ida->ida_rt; | ||
| 366 | pr_debug("ida: %p node %p free %d\n", ida, root->rnode, | ||
| 367 | root->gfp_mask >> ROOT_TAG_SHIFT); | ||
| 368 | dump_ida_node(root->rnode, 0); | ||
| 369 | } | ||
| 283 | #endif | 370 | #endif |
| 284 | 371 | ||
| 285 | /* | 372 | /* |
| @@ -287,13 +374,12 @@ static void radix_tree_dump(struct radix_tree_root *root) | |||
| 287 | * that the caller has pinned this thread of control to the current CPU. | 374 | * that the caller has pinned this thread of control to the current CPU. |
| 288 | */ | 375 | */ |
| 289 | static struct radix_tree_node * | 376 | static struct radix_tree_node * |
| 290 | radix_tree_node_alloc(struct radix_tree_root *root, | 377 | radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, |
| 291 | struct radix_tree_node *parent, | 378 | struct radix_tree_root *root, |
| 292 | unsigned int shift, unsigned int offset, | 379 | unsigned int shift, unsigned int offset, |
| 293 | unsigned int count, unsigned int exceptional) | 380 | unsigned int count, unsigned int exceptional) |
| 294 | { | 381 | { |
| 295 | struct radix_tree_node *ret = NULL; | 382 | struct radix_tree_node *ret = NULL; |
| 296 | gfp_t gfp_mask = root_gfp_mask(root); | ||
| 297 | 383 | ||
| 298 | /* | 384 | /* |
| 299 | * Preload code isn't irq safe and it doesn't make sense to use | 385 | * Preload code isn't irq safe and it doesn't make sense to use |
| @@ -321,8 +407,7 @@ radix_tree_node_alloc(struct radix_tree_root *root, | |||
| 321 | rtp = this_cpu_ptr(&radix_tree_preloads); | 407 | rtp = this_cpu_ptr(&radix_tree_preloads); |
| 322 | if (rtp->nr) { | 408 | if (rtp->nr) { |
| 323 | ret = rtp->nodes; | 409 | ret = rtp->nodes; |
| 324 | rtp->nodes = ret->private_data; | 410 | rtp->nodes = ret->parent; |
| 325 | ret->private_data = NULL; | ||
| 326 | rtp->nr--; | 411 | rtp->nr--; |
| 327 | } | 412 | } |
| 328 | /* | 413 | /* |
| @@ -336,11 +421,12 @@ radix_tree_node_alloc(struct radix_tree_root *root, | |||
| 336 | out: | 421 | out: |
| 337 | BUG_ON(radix_tree_is_internal_node(ret)); | 422 | BUG_ON(radix_tree_is_internal_node(ret)); |
| 338 | if (ret) { | 423 | if (ret) { |
| 339 | ret->parent = parent; | ||
| 340 | ret->shift = shift; | 424 | ret->shift = shift; |
| 341 | ret->offset = offset; | 425 | ret->offset = offset; |
| 342 | ret->count = count; | 426 | ret->count = count; |
| 343 | ret->exceptional = exceptional; | 427 | ret->exceptional = exceptional; |
| 428 | ret->parent = parent; | ||
| 429 | ret->root = root; | ||
| 344 | } | 430 | } |
| 345 | return ret; | 431 | return ret; |
| 346 | } | 432 | } |
| @@ -399,7 +485,7 @@ static int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) | |||
| 399 | preempt_disable(); | 485 | preempt_disable(); |
| 400 | rtp = this_cpu_ptr(&radix_tree_preloads); | 486 | rtp = this_cpu_ptr(&radix_tree_preloads); |
| 401 | if (rtp->nr < nr) { | 487 | if (rtp->nr < nr) { |
| 402 | node->private_data = rtp->nodes; | 488 | node->parent = rtp->nodes; |
| 403 | rtp->nodes = node; | 489 | rtp->nodes = node; |
| 404 | rtp->nr++; | 490 | rtp->nr++; |
| 405 | } else { | 491 | } else { |
| @@ -510,7 +596,7 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) | |||
| 510 | return __radix_tree_preload(gfp_mask, nr_nodes); | 596 | return __radix_tree_preload(gfp_mask, nr_nodes); |
| 511 | } | 597 | } |
| 512 | 598 | ||
| 513 | static unsigned radix_tree_load_root(struct radix_tree_root *root, | 599 | static unsigned radix_tree_load_root(const struct radix_tree_root *root, |
| 514 | struct radix_tree_node **nodep, unsigned long *maxindex) | 600 | struct radix_tree_node **nodep, unsigned long *maxindex) |
| 515 | { | 601 | { |
| 516 | struct radix_tree_node *node = rcu_dereference_raw(root->rnode); | 602 | struct radix_tree_node *node = rcu_dereference_raw(root->rnode); |
| @@ -530,10 +616,10 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root, | |||
| 530 | /* | 616 | /* |
| 531 | * Extend a radix tree so it can store key @index. | 617 | * Extend a radix tree so it can store key @index. |
| 532 | */ | 618 | */ |
| 533 | static int radix_tree_extend(struct radix_tree_root *root, | 619 | static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, |
| 534 | unsigned long index, unsigned int shift) | 620 | unsigned long index, unsigned int shift) |
| 535 | { | 621 | { |
| 536 | struct radix_tree_node *slot; | 622 | void *entry; |
| 537 | unsigned int maxshift; | 623 | unsigned int maxshift; |
| 538 | int tag; | 624 | int tag; |
| 539 | 625 | ||
| @@ -542,32 +628,44 @@ static int radix_tree_extend(struct radix_tree_root *root, | |||
| 542 | while (index > shift_maxindex(maxshift)) | 628 | while (index > shift_maxindex(maxshift)) |
| 543 | maxshift += RADIX_TREE_MAP_SHIFT; | 629 | maxshift += RADIX_TREE_MAP_SHIFT; |
| 544 | 630 | ||
| 545 | slot = root->rnode; | 631 | entry = rcu_dereference_raw(root->rnode); |
| 546 | if (!slot) | 632 | if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) |
| 547 | goto out; | 633 | goto out; |
| 548 | 634 | ||
| 549 | do { | 635 | do { |
| 550 | struct radix_tree_node *node = radix_tree_node_alloc(root, | 636 | struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL, |
| 551 | NULL, shift, 0, 1, 0); | 637 | root, shift, 0, 1, 0); |
| 552 | if (!node) | 638 | if (!node) |
| 553 | return -ENOMEM; | 639 | return -ENOMEM; |
| 554 | 640 | ||
| 555 | /* Propagate the aggregated tag info into the new root */ | 641 | if (is_idr(root)) { |
| 556 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { | 642 | all_tag_set(node, IDR_FREE); |
| 557 | if (root_tag_get(root, tag)) | 643 | if (!root_tag_get(root, IDR_FREE)) { |
| 558 | tag_set(node, tag, 0); | 644 | tag_clear(node, IDR_FREE, 0); |
| 645 | root_tag_set(root, IDR_FREE); | ||
| 646 | } | ||
| 647 | } else { | ||
| 648 | /* Propagate the aggregated tag info to the new child */ | ||
| 649 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { | ||
| 650 | if (root_tag_get(root, tag)) | ||
| 651 | tag_set(node, tag, 0); | ||
| 652 | } | ||
| 559 | } | 653 | } |
| 560 | 654 | ||
| 561 | BUG_ON(shift > BITS_PER_LONG); | 655 | BUG_ON(shift > BITS_PER_LONG); |
| 562 | if (radix_tree_is_internal_node(slot)) { | 656 | if (radix_tree_is_internal_node(entry)) { |
| 563 | entry_to_node(slot)->parent = node; | 657 | entry_to_node(entry)->parent = node; |
| 564 | } else if (radix_tree_exceptional_entry(slot)) { | 658 | } else if (radix_tree_exceptional_entry(entry)) { |
| 565 | /* Moving an exceptional root->rnode to a node */ | 659 | /* Moving an exceptional root->rnode to a node */ |
| 566 | node->exceptional = 1; | 660 | node->exceptional = 1; |
| 567 | } | 661 | } |
| 568 | node->slots[0] = slot; | 662 | /* |
| 569 | slot = node_to_entry(node); | 663 | * entry was already in the radix tree, so we do not need |
| 570 | rcu_assign_pointer(root->rnode, slot); | 664 | * rcu_assign_pointer here |
| 665 | */ | ||
| 666 | node->slots[0] = (void __rcu *)entry; | ||
| 667 | entry = node_to_entry(node); | ||
| 668 | rcu_assign_pointer(root->rnode, entry); | ||
| 571 | shift += RADIX_TREE_MAP_SHIFT; | 669 | shift += RADIX_TREE_MAP_SHIFT; |
| 572 | } while (shift <= maxshift); | 670 | } while (shift <= maxshift); |
| 573 | out: | 671 | out: |
| @@ -578,12 +676,14 @@ out: | |||
| 578 | * radix_tree_shrink - shrink radix tree to minimum height | 676 | * radix_tree_shrink - shrink radix tree to minimum height |
| 579 | * @root radix tree root | 677 | * @root radix tree root |
| 580 | */ | 678 | */ |
| 581 | static inline void radix_tree_shrink(struct radix_tree_root *root, | 679 | static inline bool radix_tree_shrink(struct radix_tree_root *root, |
| 582 | radix_tree_update_node_t update_node, | 680 | radix_tree_update_node_t update_node, |
| 583 | void *private) | 681 | void *private) |
| 584 | { | 682 | { |
| 683 | bool shrunk = false; | ||
| 684 | |||
| 585 | for (;;) { | 685 | for (;;) { |
| 586 | struct radix_tree_node *node = root->rnode; | 686 | struct radix_tree_node *node = rcu_dereference_raw(root->rnode); |
| 587 | struct radix_tree_node *child; | 687 | struct radix_tree_node *child; |
| 588 | 688 | ||
| 589 | if (!radix_tree_is_internal_node(node)) | 689 | if (!radix_tree_is_internal_node(node)) |
| @@ -597,7 +697,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root, | |||
| 597 | */ | 697 | */ |
| 598 | if (node->count != 1) | 698 | if (node->count != 1) |
| 599 | break; | 699 | break; |
| 600 | child = node->slots[0]; | 700 | child = rcu_dereference_raw(node->slots[0]); |
| 601 | if (!child) | 701 | if (!child) |
| 602 | break; | 702 | break; |
| 603 | if (!radix_tree_is_internal_node(child) && node->shift) | 703 | if (!radix_tree_is_internal_node(child) && node->shift) |
| @@ -613,7 +713,9 @@ static inline void radix_tree_shrink(struct radix_tree_root *root, | |||
| 613 | * (node->slots[0]), it will be safe to dereference the new | 713 | * (node->slots[0]), it will be safe to dereference the new |
| 614 | * one (root->rnode) as far as dependent read barriers go. | 714 | * one (root->rnode) as far as dependent read barriers go. |
| 615 | */ | 715 | */ |
| 616 | root->rnode = child; | 716 | root->rnode = (void __rcu *)child; |
| 717 | if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) | ||
| 718 | root_tag_clear(root, IDR_FREE); | ||
| 617 | 719 | ||
| 618 | /* | 720 | /* |
| 619 | * We have a dilemma here. The node's slot[0] must not be | 721 | * We have a dilemma here. The node's slot[0] must not be |
| @@ -635,27 +737,34 @@ static inline void radix_tree_shrink(struct radix_tree_root *root, | |||
| 635 | */ | 737 | */ |
| 636 | node->count = 0; | 738 | node->count = 0; |
| 637 | if (!radix_tree_is_internal_node(child)) { | 739 | if (!radix_tree_is_internal_node(child)) { |
| 638 | node->slots[0] = RADIX_TREE_RETRY; | 740 | node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; |
| 639 | if (update_node) | 741 | if (update_node) |
| 640 | update_node(node, private); | 742 | update_node(node, private); |
| 641 | } | 743 | } |
| 642 | 744 | ||
| 643 | WARN_ON_ONCE(!list_empty(&node->private_list)); | 745 | WARN_ON_ONCE(!list_empty(&node->private_list)); |
| 644 | radix_tree_node_free(node); | 746 | radix_tree_node_free(node); |
| 747 | shrunk = true; | ||
| 645 | } | 748 | } |
| 749 | |||
| 750 | return shrunk; | ||
| 646 | } | 751 | } |
| 647 | 752 | ||
| 648 | static void delete_node(struct radix_tree_root *root, | 753 | static bool delete_node(struct radix_tree_root *root, |
| 649 | struct radix_tree_node *node, | 754 | struct radix_tree_node *node, |
| 650 | radix_tree_update_node_t update_node, void *private) | 755 | radix_tree_update_node_t update_node, void *private) |
| 651 | { | 756 | { |
| 757 | bool deleted = false; | ||
| 758 | |||
| 652 | do { | 759 | do { |
| 653 | struct radix_tree_node *parent; | 760 | struct radix_tree_node *parent; |
| 654 | 761 | ||
| 655 | if (node->count) { | 762 | if (node->count) { |
| 656 | if (node == entry_to_node(root->rnode)) | 763 | if (node_to_entry(node) == |
| 657 | radix_tree_shrink(root, update_node, private); | 764 | rcu_dereference_raw(root->rnode)) |
| 658 | return; | 765 | deleted |= radix_tree_shrink(root, update_node, |
| 766 | private); | ||
| 767 | return deleted; | ||
| 659 | } | 768 | } |
| 660 | 769 | ||
| 661 | parent = node->parent; | 770 | parent = node->parent; |
| @@ -663,15 +772,23 @@ static void delete_node(struct radix_tree_root *root, | |||
| 663 | parent->slots[node->offset] = NULL; | 772 | parent->slots[node->offset] = NULL; |
| 664 | parent->count--; | 773 | parent->count--; |
| 665 | } else { | 774 | } else { |
| 666 | root_tag_clear_all(root); | 775 | /* |
| 776 | * Shouldn't the tags already have all been cleared | ||
| 777 | * by the caller? | ||
| 778 | */ | ||
| 779 | if (!is_idr(root)) | ||
| 780 | root_tag_clear_all(root); | ||
| 667 | root->rnode = NULL; | 781 | root->rnode = NULL; |
| 668 | } | 782 | } |
| 669 | 783 | ||
| 670 | WARN_ON_ONCE(!list_empty(&node->private_list)); | 784 | WARN_ON_ONCE(!list_empty(&node->private_list)); |
| 671 | radix_tree_node_free(node); | 785 | radix_tree_node_free(node); |
| 786 | deleted = true; | ||
| 672 | 787 | ||
| 673 | node = parent; | 788 | node = parent; |
| 674 | } while (node); | 789 | } while (node); |
| 790 | |||
| 791 | return deleted; | ||
| 675 | } | 792 | } |
| 676 | 793 | ||
| 677 | /** | 794 | /** |
| @@ -693,13 +810,14 @@ static void delete_node(struct radix_tree_root *root, | |||
| 693 | */ | 810 | */ |
| 694 | int __radix_tree_create(struct radix_tree_root *root, unsigned long index, | 811 | int __radix_tree_create(struct radix_tree_root *root, unsigned long index, |
| 695 | unsigned order, struct radix_tree_node **nodep, | 812 | unsigned order, struct radix_tree_node **nodep, |
| 696 | void ***slotp) | 813 | void __rcu ***slotp) |
| 697 | { | 814 | { |
| 698 | struct radix_tree_node *node = NULL, *child; | 815 | struct radix_tree_node *node = NULL, *child; |
| 699 | void **slot = (void **)&root->rnode; | 816 | void __rcu **slot = (void __rcu **)&root->rnode; |
| 700 | unsigned long maxindex; | 817 | unsigned long maxindex; |
| 701 | unsigned int shift, offset = 0; | 818 | unsigned int shift, offset = 0; |
| 702 | unsigned long max = index | ((1UL << order) - 1); | 819 | unsigned long max = index | ((1UL << order) - 1); |
| 820 | gfp_t gfp = root_gfp_mask(root); | ||
| 703 | 821 | ||
| 704 | shift = radix_tree_load_root(root, &child, &maxindex); | 822 | shift = radix_tree_load_root(root, &child, &maxindex); |
| 705 | 823 | ||
| @@ -707,18 +825,18 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, | |||
| 707 | if (order > 0 && max == ((1UL << order) - 1)) | 825 | if (order > 0 && max == ((1UL << order) - 1)) |
| 708 | max++; | 826 | max++; |
| 709 | if (max > maxindex) { | 827 | if (max > maxindex) { |
| 710 | int error = radix_tree_extend(root, max, shift); | 828 | int error = radix_tree_extend(root, gfp, max, shift); |
| 711 | if (error < 0) | 829 | if (error < 0) |
| 712 | return error; | 830 | return error; |
| 713 | shift = error; | 831 | shift = error; |
| 714 | child = root->rnode; | 832 | child = rcu_dereference_raw(root->rnode); |
| 715 | } | 833 | } |
| 716 | 834 | ||
| 717 | while (shift > order) { | 835 | while (shift > order) { |
| 718 | shift -= RADIX_TREE_MAP_SHIFT; | 836 | shift -= RADIX_TREE_MAP_SHIFT; |
| 719 | if (child == NULL) { | 837 | if (child == NULL) { |
| 720 | /* Have to add a child node. */ | 838 | /* Have to add a child node. */ |
| 721 | child = radix_tree_node_alloc(root, node, shift, | 839 | child = radix_tree_node_alloc(gfp, node, root, shift, |
| 722 | offset, 0, 0); | 840 | offset, 0, 0); |
| 723 | if (!child) | 841 | if (!child) |
| 724 | return -ENOMEM; | 842 | return -ENOMEM; |
| @@ -741,7 +859,6 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, | |||
| 741 | return 0; | 859 | return 0; |
| 742 | } | 860 | } |
| 743 | 861 | ||
| 744 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | ||
| 745 | /* | 862 | /* |
| 746 | * Free any nodes below this node. The tree is presumed to not need | 863 | * Free any nodes below this node. The tree is presumed to not need |
| 747 | * shrinking, and any user data in the tree is presumed to not need a | 864 | * shrinking, and any user data in the tree is presumed to not need a |
| @@ -757,7 +874,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node) | |||
| 757 | struct radix_tree_node *child = entry_to_node(node); | 874 | struct radix_tree_node *child = entry_to_node(node); |
| 758 | 875 | ||
| 759 | for (;;) { | 876 | for (;;) { |
| 760 | void *entry = child->slots[offset]; | 877 | void *entry = rcu_dereference_raw(child->slots[offset]); |
| 761 | if (radix_tree_is_internal_node(entry) && | 878 | if (radix_tree_is_internal_node(entry) && |
| 762 | !is_sibling_entry(child, entry)) { | 879 | !is_sibling_entry(child, entry)) { |
| 763 | child = entry_to_node(entry); | 880 | child = entry_to_node(entry); |
| @@ -777,8 +894,9 @@ static void radix_tree_free_nodes(struct radix_tree_node *node) | |||
| 777 | } | 894 | } |
| 778 | } | 895 | } |
| 779 | 896 | ||
| 780 | static inline int insert_entries(struct radix_tree_node *node, void **slot, | 897 | #ifdef CONFIG_RADIX_TREE_MULTIORDER |
| 781 | void *item, unsigned order, bool replace) | 898 | static inline int insert_entries(struct radix_tree_node *node, |
| 899 | void __rcu **slot, void *item, unsigned order, bool replace) | ||
| 782 | { | 900 | { |
| 783 | struct radix_tree_node *child; | 901 | struct radix_tree_node *child; |
| 784 | unsigned i, n, tag, offset, tags = 0; | 902 | unsigned i, n, tag, offset, tags = 0; |
| @@ -813,7 +931,7 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot, | |||
| 813 | } | 931 | } |
| 814 | 932 | ||
| 815 | for (i = 0; i < n; i++) { | 933 | for (i = 0; i < n; i++) { |
| 816 | struct radix_tree_node *old = slot[i]; | 934 | struct radix_tree_node *old = rcu_dereference_raw(slot[i]); |
| 817 | if (i) { | 935 | if (i) { |
| 818 | rcu_assign_pointer(slot[i], child); | 936 | rcu_assign_pointer(slot[i], child); |
| 819 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | 937 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) |
| @@ -840,8 +958,8 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot, | |||
| 840 | return n; | 958 | return n; |
| 841 | } | 959 | } |
| 842 | #else | 960 | #else |
| 843 | static inline int insert_entries(struct radix_tree_node *node, void **slot, | 961 | static inline int insert_entries(struct radix_tree_node *node, |
| 844 | void *item, unsigned order, bool replace) | 962 | void __rcu **slot, void *item, unsigned order, bool replace) |
| 845 | { | 963 | { |
| 846 | if (*slot) | 964 | if (*slot) |
| 847 | return -EEXIST; | 965 | return -EEXIST; |
| @@ -868,7 +986,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, | |||
| 868 | unsigned order, void *item) | 986 | unsigned order, void *item) |
| 869 | { | 987 | { |
| 870 | struct radix_tree_node *node; | 988 | struct radix_tree_node *node; |
| 871 | void **slot; | 989 | void __rcu **slot; |
| 872 | int error; | 990 | int error; |
| 873 | 991 | ||
| 874 | BUG_ON(radix_tree_is_internal_node(item)); | 992 | BUG_ON(radix_tree_is_internal_node(item)); |
| @@ -908,16 +1026,17 @@ EXPORT_SYMBOL(__radix_tree_insert); | |||
| 908 | * allocated and @root->rnode is used as a direct slot instead of | 1026 | * allocated and @root->rnode is used as a direct slot instead of |
| 909 | * pointing to a node, in which case *@nodep will be NULL. | 1027 | * pointing to a node, in which case *@nodep will be NULL. |
| 910 | */ | 1028 | */ |
| 911 | void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, | 1029 | void *__radix_tree_lookup(const struct radix_tree_root *root, |
| 912 | struct radix_tree_node **nodep, void ***slotp) | 1030 | unsigned long index, struct radix_tree_node **nodep, |
| 1031 | void __rcu ***slotp) | ||
| 913 | { | 1032 | { |
| 914 | struct radix_tree_node *node, *parent; | 1033 | struct radix_tree_node *node, *parent; |
| 915 | unsigned long maxindex; | 1034 | unsigned long maxindex; |
| 916 | void **slot; | 1035 | void __rcu **slot; |
| 917 | 1036 | ||
| 918 | restart: | 1037 | restart: |
| 919 | parent = NULL; | 1038 | parent = NULL; |
| 920 | slot = (void **)&root->rnode; | 1039 | slot = (void __rcu **)&root->rnode; |
| 921 | radix_tree_load_root(root, &node, &maxindex); | 1040 | radix_tree_load_root(root, &node, &maxindex); |
| 922 | if (index > maxindex) | 1041 | if (index > maxindex) |
| 923 | return NULL; | 1042 | return NULL; |
| @@ -952,9 +1071,10 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, | |||
| 952 | * exclusive from other writers. Any dereference of the slot must be done | 1071 | * exclusive from other writers. Any dereference of the slot must be done |
| 953 | * using radix_tree_deref_slot. | 1072 | * using radix_tree_deref_slot. |
| 954 | */ | 1073 | */ |
| 955 | void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) | 1074 | void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root, |
| 1075 | unsigned long index) | ||
| 956 | { | 1076 | { |
| 957 | void **slot; | 1077 | void __rcu **slot; |
| 958 | 1078 | ||
| 959 | if (!__radix_tree_lookup(root, index, NULL, &slot)) | 1079 | if (!__radix_tree_lookup(root, index, NULL, &slot)) |
| 960 | return NULL; | 1080 | return NULL; |
| @@ -974,75 +1094,76 @@ EXPORT_SYMBOL(radix_tree_lookup_slot); | |||
| 974 | * them safely). No RCU barriers are required to access or modify the | 1094 | * them safely). No RCU barriers are required to access or modify the |
| 975 | * returned item, however. | 1095 | * returned item, however. |
| 976 | */ | 1096 | */ |
| 977 | void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) | 1097 | void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) |
| 978 | { | 1098 | { |
| 979 | return __radix_tree_lookup(root, index, NULL, NULL); | 1099 | return __radix_tree_lookup(root, index, NULL, NULL); |
| 980 | } | 1100 | } |
| 981 | EXPORT_SYMBOL(radix_tree_lookup); | 1101 | EXPORT_SYMBOL(radix_tree_lookup); |
| 982 | 1102 | ||
| 983 | static inline int slot_count(struct radix_tree_node *node, | 1103 | static inline void replace_sibling_entries(struct radix_tree_node *node, |
| 984 | void **slot) | 1104 | void __rcu **slot, int count, int exceptional) |
| 985 | { | 1105 | { |
| 986 | int n = 1; | ||
| 987 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | 1106 | #ifdef CONFIG_RADIX_TREE_MULTIORDER |
| 988 | void *ptr = node_to_entry(slot); | 1107 | void *ptr = node_to_entry(slot); |
| 989 | unsigned offset = get_slot_offset(node, slot); | 1108 | unsigned offset = get_slot_offset(node, slot) + 1; |
| 990 | int i; | ||
| 991 | 1109 | ||
| 992 | for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { | 1110 | while (offset < RADIX_TREE_MAP_SIZE) { |
| 993 | if (node->slots[offset + i] != ptr) | 1111 | if (rcu_dereference_raw(node->slots[offset]) != ptr) |
| 994 | break; | 1112 | break; |
| 995 | n++; | 1113 | if (count < 0) { |
| 1114 | node->slots[offset] = NULL; | ||
| 1115 | node->count--; | ||
| 1116 | } | ||
| 1117 | node->exceptional += exceptional; | ||
| 1118 | offset++; | ||
| 996 | } | 1119 | } |
| 997 | #endif | 1120 | #endif |
| 998 | return n; | ||
| 999 | } | 1121 | } |
| 1000 | 1122 | ||
| 1001 | static void replace_slot(struct radix_tree_root *root, | 1123 | static void replace_slot(void __rcu **slot, void *item, |
| 1002 | struct radix_tree_node *node, | 1124 | struct radix_tree_node *node, int count, int exceptional) |
| 1003 | void **slot, void *item, | ||
| 1004 | bool warn_typeswitch) | ||
| 1005 | { | 1125 | { |
| 1006 | void *old = rcu_dereference_raw(*slot); | 1126 | if (WARN_ON_ONCE(radix_tree_is_internal_node(item))) |
| 1007 | int count, exceptional; | 1127 | return; |
| 1008 | |||
| 1009 | WARN_ON_ONCE(radix_tree_is_internal_node(item)); | ||
| 1010 | |||
| 1011 | count = !!item - !!old; | ||
| 1012 | exceptional = !!radix_tree_exceptional_entry(item) - | ||
| 1013 | !!radix_tree_exceptional_entry(old); | ||
| 1014 | |||
| 1015 | WARN_ON_ONCE(warn_typeswitch && (count || exceptional)); | ||
| 1016 | 1128 | ||
| 1017 | if (node) { | 1129 | if (node && (count || exceptional)) { |
| 1018 | node->count += count; | 1130 | node->count += count; |
| 1019 | if (exceptional) { | 1131 | node->exceptional += exceptional; |
| 1020 | exceptional *= slot_count(node, slot); | 1132 | replace_sibling_entries(node, slot, count, exceptional); |
| 1021 | node->exceptional += exceptional; | ||
| 1022 | } | ||
| 1023 | } | 1133 | } |
| 1024 | 1134 | ||
| 1025 | rcu_assign_pointer(*slot, item); | 1135 | rcu_assign_pointer(*slot, item); |
| 1026 | } | 1136 | } |
| 1027 | 1137 | ||
| 1028 | static inline void delete_sibling_entries(struct radix_tree_node *node, | 1138 | static bool node_tag_get(const struct radix_tree_root *root, |
| 1029 | void **slot) | 1139 | const struct radix_tree_node *node, |
| 1140 | unsigned int tag, unsigned int offset) | ||
| 1030 | { | 1141 | { |
| 1031 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | 1142 | if (node) |
| 1032 | bool exceptional = radix_tree_exceptional_entry(*slot); | 1143 | return tag_get(node, tag, offset); |
| 1033 | void *ptr = node_to_entry(slot); | 1144 | return root_tag_get(root, tag); |
| 1034 | unsigned offset = get_slot_offset(node, slot); | 1145 | } |
| 1035 | int i; | ||
| 1036 | 1146 | ||
| 1037 | for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { | 1147 | /* |
| 1038 | if (node->slots[offset + i] != ptr) | 1148 | * IDR users want to be able to store NULL in the tree, so if the slot isn't |
| 1039 | break; | 1149 | * free, don't adjust the count, even if it's transitioning between NULL and |
| 1040 | node->slots[offset + i] = NULL; | 1150 | * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still |
| 1041 | node->count--; | 1151 | * have empty bits, but it only stores NULL in slots when they're being |
| 1042 | if (exceptional) | 1152 | * deleted. |
| 1043 | node->exceptional--; | 1153 | */ |
| 1154 | static int calculate_count(struct radix_tree_root *root, | ||
| 1155 | struct radix_tree_node *node, void __rcu **slot, | ||
| 1156 | void *item, void *old) | ||
| 1157 | { | ||
| 1158 | if (is_idr(root)) { | ||
| 1159 | unsigned offset = get_slot_offset(node, slot); | ||
| 1160 | bool free = node_tag_get(root, node, IDR_FREE, offset); | ||
| 1161 | if (!free) | ||
| 1162 | return 0; | ||
| 1163 | if (!old) | ||
| 1164 | return 1; | ||
| 1044 | } | 1165 | } |
| 1045 | #endif | 1166 | return !!item - !!old; |
| 1046 | } | 1167 | } |
| 1047 | 1168 | ||
| 1048 | /** | 1169 | /** |
| @@ -1059,18 +1180,22 @@ static inline void delete_sibling_entries(struct radix_tree_node *node, | |||
| 1059 | */ | 1180 | */ |
| 1060 | void __radix_tree_replace(struct radix_tree_root *root, | 1181 | void __radix_tree_replace(struct radix_tree_root *root, |
| 1061 | struct radix_tree_node *node, | 1182 | struct radix_tree_node *node, |
| 1062 | void **slot, void *item, | 1183 | void __rcu **slot, void *item, |
| 1063 | radix_tree_update_node_t update_node, void *private) | 1184 | radix_tree_update_node_t update_node, void *private) |
| 1064 | { | 1185 | { |
| 1065 | if (!item) | 1186 | void *old = rcu_dereference_raw(*slot); |
| 1066 | delete_sibling_entries(node, slot); | 1187 | int exceptional = !!radix_tree_exceptional_entry(item) - |
| 1188 | !!radix_tree_exceptional_entry(old); | ||
| 1189 | int count = calculate_count(root, node, slot, item, old); | ||
| 1190 | |||
| 1067 | /* | 1191 | /* |
| 1068 | * This function supports replacing exceptional entries and | 1192 | * This function supports replacing exceptional entries and |
| 1069 | * deleting entries, but that needs accounting against the | 1193 | * deleting entries, but that needs accounting against the |
| 1070 | * node unless the slot is root->rnode. | 1194 | * node unless the slot is root->rnode. |
| 1071 | */ | 1195 | */ |
| 1072 | replace_slot(root, node, slot, item, | 1196 | WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->rnode) && |
| 1073 | !node && slot != (void **)&root->rnode); | 1197 | (count || exceptional)); |
| 1198 | replace_slot(slot, item, node, count, exceptional); | ||
| 1074 | 1199 | ||
| 1075 | if (!node) | 1200 | if (!node) |
| 1076 | return; | 1201 | return; |
| @@ -1098,9 +1223,9 @@ void __radix_tree_replace(struct radix_tree_root *root, | |||
| 1098 | * radix_tree_iter_replace(). | 1223 | * radix_tree_iter_replace(). |
| 1099 | */ | 1224 | */ |
| 1100 | void radix_tree_replace_slot(struct radix_tree_root *root, | 1225 | void radix_tree_replace_slot(struct radix_tree_root *root, |
| 1101 | void **slot, void *item) | 1226 | void __rcu **slot, void *item) |
| 1102 | { | 1227 | { |
| 1103 | replace_slot(root, NULL, slot, item, true); | 1228 | __radix_tree_replace(root, NULL, slot, item, NULL, NULL); |
| 1104 | } | 1229 | } |
| 1105 | EXPORT_SYMBOL(radix_tree_replace_slot); | 1230 | EXPORT_SYMBOL(radix_tree_replace_slot); |
| 1106 | 1231 | ||
| @@ -1114,7 +1239,8 @@ EXPORT_SYMBOL(radix_tree_replace_slot); | |||
| 1114 | * Caller must hold tree write locked across split and replacement. | 1239 | * Caller must hold tree write locked across split and replacement. |
| 1115 | */ | 1240 | */ |
| 1116 | void radix_tree_iter_replace(struct radix_tree_root *root, | 1241 | void radix_tree_iter_replace(struct radix_tree_root *root, |
| 1117 | const struct radix_tree_iter *iter, void **slot, void *item) | 1242 | const struct radix_tree_iter *iter, |
| 1243 | void __rcu **slot, void *item) | ||
| 1118 | { | 1244 | { |
| 1119 | __radix_tree_replace(root, iter->node, slot, item, NULL, NULL); | 1245 | __radix_tree_replace(root, iter->node, slot, item, NULL, NULL); |
| 1120 | } | 1246 | } |
| @@ -1138,7 +1264,7 @@ int radix_tree_join(struct radix_tree_root *root, unsigned long index, | |||
| 1138 | unsigned order, void *item) | 1264 | unsigned order, void *item) |
| 1139 | { | 1265 | { |
| 1140 | struct radix_tree_node *node; | 1266 | struct radix_tree_node *node; |
| 1141 | void **slot; | 1267 | void __rcu **slot; |
| 1142 | int error; | 1268 | int error; |
| 1143 | 1269 | ||
| 1144 | BUG_ON(radix_tree_is_internal_node(item)); | 1270 | BUG_ON(radix_tree_is_internal_node(item)); |
| @@ -1173,9 +1299,10 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index, | |||
| 1173 | unsigned order) | 1299 | unsigned order) |
| 1174 | { | 1300 | { |
| 1175 | struct radix_tree_node *parent, *node, *child; | 1301 | struct radix_tree_node *parent, *node, *child; |
| 1176 | void **slot; | 1302 | void __rcu **slot; |
| 1177 | unsigned int offset, end; | 1303 | unsigned int offset, end; |
| 1178 | unsigned n, tag, tags = 0; | 1304 | unsigned n, tag, tags = 0; |
| 1305 | gfp_t gfp = root_gfp_mask(root); | ||
| 1179 | 1306 | ||
| 1180 | if (!__radix_tree_lookup(root, index, &parent, &slot)) | 1307 | if (!__radix_tree_lookup(root, index, &parent, &slot)) |
| 1181 | return -ENOENT; | 1308 | return -ENOENT; |
| @@ -1189,7 +1316,8 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index, | |||
| 1189 | tags |= 1 << tag; | 1316 | tags |= 1 << tag; |
| 1190 | 1317 | ||
| 1191 | for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { | 1318 | for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { |
| 1192 | if (!is_sibling_entry(parent, parent->slots[end])) | 1319 | if (!is_sibling_entry(parent, |
| 1320 | rcu_dereference_raw(parent->slots[end]))) | ||
| 1193 | break; | 1321 | break; |
| 1194 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | 1322 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) |
| 1195 | if (tags & (1 << tag)) | 1323 | if (tags & (1 << tag)) |
| @@ -1213,14 +1341,15 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index, | |||
| 1213 | 1341 | ||
| 1214 | for (;;) { | 1342 | for (;;) { |
| 1215 | if (node->shift > order) { | 1343 | if (node->shift > order) { |
| 1216 | child = radix_tree_node_alloc(root, node, | 1344 | child = radix_tree_node_alloc(gfp, node, root, |
| 1217 | node->shift - RADIX_TREE_MAP_SHIFT, | 1345 | node->shift - RADIX_TREE_MAP_SHIFT, |
| 1218 | offset, 0, 0); | 1346 | offset, 0, 0); |
| 1219 | if (!child) | 1347 | if (!child) |
| 1220 | goto nomem; | 1348 | goto nomem; |
| 1221 | if (node != parent) { | 1349 | if (node != parent) { |
| 1222 | node->count++; | 1350 | node->count++; |
| 1223 | node->slots[offset] = node_to_entry(child); | 1351 | rcu_assign_pointer(node->slots[offset], |
| 1352 | node_to_entry(child)); | ||
| 1224 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | 1353 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) |
| 1225 | if (tags & (1 << tag)) | 1354 | if (tags & (1 << tag)) |
| 1226 | tag_set(node, tag, offset); | 1355 | tag_set(node, tag, offset); |
| @@ -1262,6 +1391,22 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index, | |||
| 1262 | } | 1391 | } |
| 1263 | #endif | 1392 | #endif |
| 1264 | 1393 | ||
| 1394 | static void node_tag_set(struct radix_tree_root *root, | ||
| 1395 | struct radix_tree_node *node, | ||
| 1396 | unsigned int tag, unsigned int offset) | ||
| 1397 | { | ||
| 1398 | while (node) { | ||
| 1399 | if (tag_get(node, tag, offset)) | ||
| 1400 | return; | ||
| 1401 | tag_set(node, tag, offset); | ||
| 1402 | offset = node->offset; | ||
| 1403 | node = node->parent; | ||
| 1404 | } | ||
| 1405 | |||
| 1406 | if (!root_tag_get(root, tag)) | ||
| 1407 | root_tag_set(root, tag); | ||
| 1408 | } | ||
| 1409 | |||
| 1265 | /** | 1410 | /** |
| 1266 | * radix_tree_tag_set - set a tag on a radix tree node | 1411 | * radix_tree_tag_set - set a tag on a radix tree node |
| 1267 | * @root: radix tree root | 1412 | * @root: radix tree root |
| @@ -1303,6 +1448,18 @@ void *radix_tree_tag_set(struct radix_tree_root *root, | |||
| 1303 | } | 1448 | } |
| 1304 | EXPORT_SYMBOL(radix_tree_tag_set); | 1449 | EXPORT_SYMBOL(radix_tree_tag_set); |
| 1305 | 1450 | ||
| 1451 | /** | ||
| 1452 | * radix_tree_iter_tag_set - set a tag on the current iterator entry | ||
| 1453 | * @root: radix tree root | ||
| 1454 | * @iter: iterator state | ||
| 1455 | * @tag: tag to set | ||
| 1456 | */ | ||
| 1457 | void radix_tree_iter_tag_set(struct radix_tree_root *root, | ||
| 1458 | const struct radix_tree_iter *iter, unsigned int tag) | ||
| 1459 | { | ||
| 1460 | node_tag_set(root, iter->node, tag, iter_offset(iter)); | ||
| 1461 | } | ||
| 1462 | |||
| 1306 | static void node_tag_clear(struct radix_tree_root *root, | 1463 | static void node_tag_clear(struct radix_tree_root *root, |
| 1307 | struct radix_tree_node *node, | 1464 | struct radix_tree_node *node, |
| 1308 | unsigned int tag, unsigned int offset) | 1465 | unsigned int tag, unsigned int offset) |
| @@ -1323,34 +1480,6 @@ static void node_tag_clear(struct radix_tree_root *root, | |||
| 1323 | root_tag_clear(root, tag); | 1480 | root_tag_clear(root, tag); |
| 1324 | } | 1481 | } |
| 1325 | 1482 | ||
| 1326 | static void node_tag_set(struct radix_tree_root *root, | ||
| 1327 | struct radix_tree_node *node, | ||
| 1328 | unsigned int tag, unsigned int offset) | ||
| 1329 | { | ||
| 1330 | while (node) { | ||
| 1331 | if (tag_get(node, tag, offset)) | ||
| 1332 | return; | ||
| 1333 | tag_set(node, tag, offset); | ||
| 1334 | offset = node->offset; | ||
| 1335 | node = node->parent; | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | if (!root_tag_get(root, tag)) | ||
| 1339 | root_tag_set(root, tag); | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | /** | ||
| 1343 | * radix_tree_iter_tag_set - set a tag on the current iterator entry | ||
| 1344 | * @root: radix tree root | ||
| 1345 | * @iter: iterator state | ||
| 1346 | * @tag: tag to set | ||
| 1347 | */ | ||
| 1348 | void radix_tree_iter_tag_set(struct radix_tree_root *root, | ||
| 1349 | const struct radix_tree_iter *iter, unsigned int tag) | ||
| 1350 | { | ||
| 1351 | node_tag_set(root, iter->node, tag, iter_offset(iter)); | ||
| 1352 | } | ||
| 1353 | |||
| 1354 | /** | 1483 | /** |
| 1355 | * radix_tree_tag_clear - clear a tag on a radix tree node | 1484 | * radix_tree_tag_clear - clear a tag on a radix tree node |
| 1356 | * @root: radix tree root | 1485 | * @root: radix tree root |
| @@ -1391,6 +1520,18 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, | |||
| 1391 | EXPORT_SYMBOL(radix_tree_tag_clear); | 1520 | EXPORT_SYMBOL(radix_tree_tag_clear); |
| 1392 | 1521 | ||
| 1393 | /** | 1522 | /** |
| 1523 | * radix_tree_iter_tag_clear - clear a tag on the current iterator entry | ||
| 1524 | * @root: radix tree root | ||
| 1525 | * @iter: iterator state | ||
| 1526 | * @tag: tag to clear | ||
| 1527 | */ | ||
| 1528 | void radix_tree_iter_tag_clear(struct radix_tree_root *root, | ||
| 1529 | const struct radix_tree_iter *iter, unsigned int tag) | ||
| 1530 | { | ||
| 1531 | node_tag_clear(root, iter->node, tag, iter_offset(iter)); | ||
| 1532 | } | ||
| 1533 | |||
| 1534 | /** | ||
| 1394 | * radix_tree_tag_get - get a tag on a radix tree node | 1535 | * radix_tree_tag_get - get a tag on a radix tree node |
| 1395 | * @root: radix tree root | 1536 | * @root: radix tree root |
| 1396 | * @index: index key | 1537 | * @index: index key |
| @@ -1405,7 +1546,7 @@ EXPORT_SYMBOL(radix_tree_tag_clear); | |||
| 1405 | * the RCU lock is held, unless tag modification and node deletion are excluded | 1546 | * the RCU lock is held, unless tag modification and node deletion are excluded |
| 1406 | * from concurrency. | 1547 | * from concurrency. |
| 1407 | */ | 1548 | */ |
| 1408 | int radix_tree_tag_get(struct radix_tree_root *root, | 1549 | int radix_tree_tag_get(const struct radix_tree_root *root, |
| 1409 | unsigned long index, unsigned int tag) | 1550 | unsigned long index, unsigned int tag) |
| 1410 | { | 1551 | { |
| 1411 | struct radix_tree_node *node, *parent; | 1552 | struct radix_tree_node *node, *parent; |
| @@ -1417,8 +1558,6 @@ int radix_tree_tag_get(struct radix_tree_root *root, | |||
| 1417 | radix_tree_load_root(root, &node, &maxindex); | 1558 | radix_tree_load_root(root, &node, &maxindex); |
| 1418 | if (index > maxindex) | 1559 | if (index > maxindex) |
| 1419 | return 0; | 1560 | return 0; |
| 1420 | if (node == NULL) | ||
| 1421 | return 0; | ||
| 1422 | 1561 | ||
| 1423 | while (radix_tree_is_internal_node(node)) { | 1562 | while (radix_tree_is_internal_node(node)) { |
| 1424 | unsigned offset; | 1563 | unsigned offset; |
| @@ -1426,8 +1565,6 @@ int radix_tree_tag_get(struct radix_tree_root *root, | |||
| 1426 | parent = entry_to_node(node); | 1565 | parent = entry_to_node(node); |
| 1427 | offset = radix_tree_descend(parent, &node, index); | 1566 | offset = radix_tree_descend(parent, &node, index); |
| 1428 | 1567 | ||
| 1429 | if (!node) | ||
| 1430 | return 0; | ||
| 1431 | if (!tag_get(parent, tag, offset)) | 1568 | if (!tag_get(parent, tag, offset)) |
| 1432 | return 0; | 1569 | return 0; |
| 1433 | if (node == RADIX_TREE_RETRY) | 1570 | if (node == RADIX_TREE_RETRY) |
| @@ -1454,6 +1591,11 @@ static void set_iter_tags(struct radix_tree_iter *iter, | |||
| 1454 | unsigned tag_long = offset / BITS_PER_LONG; | 1591 | unsigned tag_long = offset / BITS_PER_LONG; |
| 1455 | unsigned tag_bit = offset % BITS_PER_LONG; | 1592 | unsigned tag_bit = offset % BITS_PER_LONG; |
| 1456 | 1593 | ||
| 1594 | if (!node) { | ||
| 1595 | iter->tags = 1; | ||
| 1596 | return; | ||
| 1597 | } | ||
| 1598 | |||
| 1457 | iter->tags = node->tags[tag][tag_long] >> tag_bit; | 1599 | iter->tags = node->tags[tag][tag_long] >> tag_bit; |
| 1458 | 1600 | ||
| 1459 | /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ | 1601 | /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ |
| @@ -1468,8 +1610,8 @@ static void set_iter_tags(struct radix_tree_iter *iter, | |||
| 1468 | } | 1610 | } |
| 1469 | 1611 | ||
| 1470 | #ifdef CONFIG_RADIX_TREE_MULTIORDER | 1612 | #ifdef CONFIG_RADIX_TREE_MULTIORDER |
| 1471 | static void **skip_siblings(struct radix_tree_node **nodep, | 1613 | static void __rcu **skip_siblings(struct radix_tree_node **nodep, |
| 1472 | void **slot, struct radix_tree_iter *iter) | 1614 | void __rcu **slot, struct radix_tree_iter *iter) |
| 1473 | { | 1615 | { |
| 1474 | void *sib = node_to_entry(slot - 1); | 1616 | void *sib = node_to_entry(slot - 1); |
| 1475 | 1617 | ||
| @@ -1486,8 +1628,8 @@ static void **skip_siblings(struct radix_tree_node **nodep, | |||
| 1486 | return NULL; | 1628 | return NULL; |
| 1487 | } | 1629 | } |
| 1488 | 1630 | ||
| 1489 | void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, | 1631 | void __rcu **__radix_tree_next_slot(void __rcu **slot, |
| 1490 | unsigned flags) | 1632 | struct radix_tree_iter *iter, unsigned flags) |
| 1491 | { | 1633 | { |
| 1492 | unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; | 1634 | unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; |
| 1493 | struct radix_tree_node *node = rcu_dereference_raw(*slot); | 1635 | struct radix_tree_node *node = rcu_dereference_raw(*slot); |
| @@ -1540,20 +1682,20 @@ void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, | |||
| 1540 | } | 1682 | } |
| 1541 | EXPORT_SYMBOL(__radix_tree_next_slot); | 1683 | EXPORT_SYMBOL(__radix_tree_next_slot); |
| 1542 | #else | 1684 | #else |
| 1543 | static void **skip_siblings(struct radix_tree_node **nodep, | 1685 | static void __rcu **skip_siblings(struct radix_tree_node **nodep, |
| 1544 | void **slot, struct radix_tree_iter *iter) | 1686 | void __rcu **slot, struct radix_tree_iter *iter) |
| 1545 | { | 1687 | { |
| 1546 | return slot; | 1688 | return slot; |
| 1547 | } | 1689 | } |
| 1548 | #endif | 1690 | #endif |
| 1549 | 1691 | ||
| 1550 | void **radix_tree_iter_resume(void **slot, struct radix_tree_iter *iter) | 1692 | void __rcu **radix_tree_iter_resume(void __rcu **slot, |
| 1693 | struct radix_tree_iter *iter) | ||
| 1551 | { | 1694 | { |
| 1552 | struct radix_tree_node *node; | 1695 | struct radix_tree_node *node; |
| 1553 | 1696 | ||
| 1554 | slot++; | 1697 | slot++; |
| 1555 | iter->index = __radix_tree_iter_add(iter, 1); | 1698 | iter->index = __radix_tree_iter_add(iter, 1); |
| 1556 | node = rcu_dereference_raw(*slot); | ||
| 1557 | skip_siblings(&node, slot, iter); | 1699 | skip_siblings(&node, slot, iter); |
| 1558 | iter->next_index = iter->index; | 1700 | iter->next_index = iter->index; |
| 1559 | iter->tags = 0; | 1701 | iter->tags = 0; |
| @@ -1569,7 +1711,7 @@ EXPORT_SYMBOL(radix_tree_iter_resume); | |||
| 1569 | * @flags: RADIX_TREE_ITER_* flags and tag index | 1711 | * @flags: RADIX_TREE_ITER_* flags and tag index |
| 1570 | * Returns: pointer to chunk first slot, or NULL if iteration is over | 1712 | * Returns: pointer to chunk first slot, or NULL if iteration is over |
| 1571 | */ | 1713 | */ |
| 1572 | void **radix_tree_next_chunk(struct radix_tree_root *root, | 1714 | void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, |
| 1573 | struct radix_tree_iter *iter, unsigned flags) | 1715 | struct radix_tree_iter *iter, unsigned flags) |
| 1574 | { | 1716 | { |
| 1575 | unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; | 1717 | unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; |
| @@ -1606,7 +1748,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, | |||
| 1606 | iter->tags = 1; | 1748 | iter->tags = 1; |
| 1607 | iter->node = NULL; | 1749 | iter->node = NULL; |
| 1608 | __set_iter_shift(iter, 0); | 1750 | __set_iter_shift(iter, 0); |
| 1609 | return (void **)&root->rnode; | 1751 | return (void __rcu **)&root->rnode; |
| 1610 | } | 1752 | } |
| 1611 | 1753 | ||
| 1612 | do { | 1754 | do { |
| @@ -1624,7 +1766,8 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, | |||
| 1624 | offset + 1); | 1766 | offset + 1); |
| 1625 | else | 1767 | else |
| 1626 | while (++offset < RADIX_TREE_MAP_SIZE) { | 1768 | while (++offset < RADIX_TREE_MAP_SIZE) { |
| 1627 | void *slot = node->slots[offset]; | 1769 | void *slot = rcu_dereference_raw( |
| 1770 | node->slots[offset]); | ||
| 1628 | if (is_sibling_entry(node, slot)) | 1771 | if (is_sibling_entry(node, slot)) |
| 1629 | continue; | 1772 | continue; |
| 1630 | if (slot) | 1773 | if (slot) |
| @@ -1680,11 +1823,11 @@ EXPORT_SYMBOL(radix_tree_next_chunk); | |||
| 1680 | * stored in 'results'. | 1823 | * stored in 'results'. |
| 1681 | */ | 1824 | */ |
| 1682 | unsigned int | 1825 | unsigned int |
| 1683 | radix_tree_gang_lookup(struct radix_tree_root *root, void **results, | 1826 | radix_tree_gang_lookup(const struct radix_tree_root *root, void **results, |
| 1684 | unsigned long first_index, unsigned int max_items) | 1827 | unsigned long first_index, unsigned int max_items) |
| 1685 | { | 1828 | { |
| 1686 | struct radix_tree_iter iter; | 1829 | struct radix_tree_iter iter; |
| 1687 | void **slot; | 1830 | void __rcu **slot; |
| 1688 | unsigned int ret = 0; | 1831 | unsigned int ret = 0; |
| 1689 | 1832 | ||
| 1690 | if (unlikely(!max_items)) | 1833 | if (unlikely(!max_items)) |
| @@ -1725,12 +1868,12 @@ EXPORT_SYMBOL(radix_tree_gang_lookup); | |||
| 1725 | * protection, radix_tree_deref_slot may fail requiring a retry. | 1868 | * protection, radix_tree_deref_slot may fail requiring a retry. |
| 1726 | */ | 1869 | */ |
| 1727 | unsigned int | 1870 | unsigned int |
| 1728 | radix_tree_gang_lookup_slot(struct radix_tree_root *root, | 1871 | radix_tree_gang_lookup_slot(const struct radix_tree_root *root, |
| 1729 | void ***results, unsigned long *indices, | 1872 | void __rcu ***results, unsigned long *indices, |
| 1730 | unsigned long first_index, unsigned int max_items) | 1873 | unsigned long first_index, unsigned int max_items) |
| 1731 | { | 1874 | { |
| 1732 | struct radix_tree_iter iter; | 1875 | struct radix_tree_iter iter; |
| 1733 | void **slot; | 1876 | void __rcu **slot; |
| 1734 | unsigned int ret = 0; | 1877 | unsigned int ret = 0; |
| 1735 | 1878 | ||
| 1736 | if (unlikely(!max_items)) | 1879 | if (unlikely(!max_items)) |
| @@ -1762,12 +1905,12 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_slot); | |||
| 1762 | * returns the number of items which were placed at *@results. | 1905 | * returns the number of items which were placed at *@results. |
| 1763 | */ | 1906 | */ |
| 1764 | unsigned int | 1907 | unsigned int |
| 1765 | radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, | 1908 | radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results, |
| 1766 | unsigned long first_index, unsigned int max_items, | 1909 | unsigned long first_index, unsigned int max_items, |
| 1767 | unsigned int tag) | 1910 | unsigned int tag) |
| 1768 | { | 1911 | { |
| 1769 | struct radix_tree_iter iter; | 1912 | struct radix_tree_iter iter; |
| 1770 | void **slot; | 1913 | void __rcu **slot; |
| 1771 | unsigned int ret = 0; | 1914 | unsigned int ret = 0; |
| 1772 | 1915 | ||
| 1773 | if (unlikely(!max_items)) | 1916 | if (unlikely(!max_items)) |
| @@ -1803,12 +1946,12 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag); | |||
| 1803 | * returns the number of slots which were placed at *@results. | 1946 | * returns the number of slots which were placed at *@results. |
| 1804 | */ | 1947 | */ |
| 1805 | unsigned int | 1948 | unsigned int |
| 1806 | radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, | 1949 | radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root, |
| 1807 | unsigned long first_index, unsigned int max_items, | 1950 | void __rcu ***results, unsigned long first_index, |
| 1808 | unsigned int tag) | 1951 | unsigned int max_items, unsigned int tag) |
| 1809 | { | 1952 | { |
| 1810 | struct radix_tree_iter iter; | 1953 | struct radix_tree_iter iter; |
| 1811 | void **slot; | 1954 | void __rcu **slot; |
| 1812 | unsigned int ret = 0; | 1955 | unsigned int ret = 0; |
| 1813 | 1956 | ||
| 1814 | if (unlikely(!max_items)) | 1957 | if (unlikely(!max_items)) |
| @@ -1843,59 +1986,83 @@ void __radix_tree_delete_node(struct radix_tree_root *root, | |||
| 1843 | delete_node(root, node, update_node, private); | 1986 | delete_node(root, node, update_node, private); |
| 1844 | } | 1987 | } |
| 1845 | 1988 | ||
| 1989 | static bool __radix_tree_delete(struct radix_tree_root *root, | ||
| 1990 | struct radix_tree_node *node, void __rcu **slot) | ||
| 1991 | { | ||
| 1992 | void *old = rcu_dereference_raw(*slot); | ||
| 1993 | int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0; | ||
| 1994 | unsigned offset = get_slot_offset(node, slot); | ||
| 1995 | int tag; | ||
| 1996 | |||
| 1997 | if (is_idr(root)) | ||
| 1998 | node_tag_set(root, node, IDR_FREE, offset); | ||
| 1999 | else | ||
| 2000 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 2001 | node_tag_clear(root, node, tag, offset); | ||
| 2002 | |||
| 2003 | replace_slot(slot, NULL, node, -1, exceptional); | ||
| 2004 | return node && delete_node(root, node, NULL, NULL); | ||
| 2005 | } | ||
| 2006 | |||
| 1846 | /** | 2007 | /** |
| 1847 | * radix_tree_delete_item - delete an item from a radix tree | 2008 | * radix_tree_iter_delete - delete the entry at this iterator position |
| 1848 | * @root: radix tree root | 2009 | * @root: radix tree root |
| 1849 | * @index: index key | 2010 | * @iter: iterator state |
| 1850 | * @item: expected item | 2011 | * @slot: pointer to slot |
| 1851 | * | 2012 | * |
| 1852 | * Remove @item at @index from the radix tree rooted at @root. | 2013 | * Delete the entry at the position currently pointed to by the iterator. |
| 2014 | * This may result in the current node being freed; if it is, the iterator | ||
| 2015 | * is advanced so that it will not reference the freed memory. This | ||
| 2016 | * function may be called without any locking if there are no other threads | ||
| 2017 | * which can access this tree. | ||
| 2018 | */ | ||
| 2019 | void radix_tree_iter_delete(struct radix_tree_root *root, | ||
| 2020 | struct radix_tree_iter *iter, void __rcu **slot) | ||
| 2021 | { | ||
| 2022 | if (__radix_tree_delete(root, iter->node, slot)) | ||
| 2023 | iter->index = iter->next_index; | ||
| 2024 | } | ||
| 2025 | |||
| 2026 | /** | ||
| 2027 | * radix_tree_delete_item - delete an item from a radix tree | ||
| 2028 | * @root: radix tree root | ||
| 2029 | * @index: index key | ||
| 2030 | * @item: expected item | ||
| 1853 | * | 2031 | * |
| 1854 | * Returns the address of the deleted item, or NULL if it was not present | 2032 | * Remove @item at @index from the radix tree rooted at @root. |
| 1855 | * or the entry at the given @index was not @item. | 2033 | * |
| 2034 | * Return: the deleted entry, or %NULL if it was not present | ||
| 2035 | * or the entry at the given @index was not @item. | ||
| 1856 | */ | 2036 | */ |
| 1857 | void *radix_tree_delete_item(struct radix_tree_root *root, | 2037 | void *radix_tree_delete_item(struct radix_tree_root *root, |
| 1858 | unsigned long index, void *item) | 2038 | unsigned long index, void *item) |
| 1859 | { | 2039 | { |
| 1860 | struct radix_tree_node *node; | 2040 | struct radix_tree_node *node = NULL; |
| 1861 | unsigned int offset; | 2041 | void __rcu **slot; |
| 1862 | void **slot; | ||
| 1863 | void *entry; | 2042 | void *entry; |
| 1864 | int tag; | ||
| 1865 | 2043 | ||
| 1866 | entry = __radix_tree_lookup(root, index, &node, &slot); | 2044 | entry = __radix_tree_lookup(root, index, &node, &slot); |
| 1867 | if (!entry) | 2045 | if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE, |
| 2046 | get_slot_offset(node, slot)))) | ||
| 1868 | return NULL; | 2047 | return NULL; |
| 1869 | 2048 | ||
| 1870 | if (item && entry != item) | 2049 | if (item && entry != item) |
| 1871 | return NULL; | 2050 | return NULL; |
| 1872 | 2051 | ||
| 1873 | if (!node) { | 2052 | __radix_tree_delete(root, node, slot); |
| 1874 | root_tag_clear_all(root); | ||
| 1875 | root->rnode = NULL; | ||
| 1876 | return entry; | ||
| 1877 | } | ||
| 1878 | |||
| 1879 | offset = get_slot_offset(node, slot); | ||
| 1880 | |||
| 1881 | /* Clear all tags associated with the item to be deleted. */ | ||
| 1882 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | ||
| 1883 | node_tag_clear(root, node, tag, offset); | ||
| 1884 | |||
| 1885 | __radix_tree_replace(root, node, slot, NULL, NULL, NULL); | ||
| 1886 | 2053 | ||
| 1887 | return entry; | 2054 | return entry; |
| 1888 | } | 2055 | } |
| 1889 | EXPORT_SYMBOL(radix_tree_delete_item); | 2056 | EXPORT_SYMBOL(radix_tree_delete_item); |
| 1890 | 2057 | ||
| 1891 | /** | 2058 | /** |
| 1892 | * radix_tree_delete - delete an item from a radix tree | 2059 | * radix_tree_delete - delete an entry from a radix tree |
| 1893 | * @root: radix tree root | 2060 | * @root: radix tree root |
| 1894 | * @index: index key | 2061 | * @index: index key |
| 1895 | * | 2062 | * |
| 1896 | * Remove the item at @index from the radix tree rooted at @root. | 2063 | * Remove the entry at @index from the radix tree rooted at @root. |
| 1897 | * | 2064 | * |
| 1898 | * Returns the address of the deleted item, or NULL if it was not present. | 2065 | * Return: The deleted entry, or %NULL if it was not present. |
| 1899 | */ | 2066 | */ |
| 1900 | void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) | 2067 | void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) |
| 1901 | { | 2068 | { |
| @@ -1905,15 +2072,14 @@ EXPORT_SYMBOL(radix_tree_delete); | |||
| 1905 | 2072 | ||
| 1906 | void radix_tree_clear_tags(struct radix_tree_root *root, | 2073 | void radix_tree_clear_tags(struct radix_tree_root *root, |
| 1907 | struct radix_tree_node *node, | 2074 | struct radix_tree_node *node, |
| 1908 | void **slot) | 2075 | void __rcu **slot) |
| 1909 | { | 2076 | { |
| 1910 | if (node) { | 2077 | if (node) { |
| 1911 | unsigned int tag, offset = get_slot_offset(node, slot); | 2078 | unsigned int tag, offset = get_slot_offset(node, slot); |
| 1912 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) | 2079 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) |
| 1913 | node_tag_clear(root, node, tag, offset); | 2080 | node_tag_clear(root, node, tag, offset); |
| 1914 | } else { | 2081 | } else { |
| 1915 | /* Clear root node tags */ | 2082 | root_tag_clear_all(root); |
| 1916 | root->gfp_mask &= __GFP_BITS_MASK; | ||
| 1917 | } | 2083 | } |
| 1918 | } | 2084 | } |
| 1919 | 2085 | ||
| @@ -1922,12 +2088,147 @@ void radix_tree_clear_tags(struct radix_tree_root *root, | |||
| 1922 | * @root: radix tree root | 2088 | * @root: radix tree root |
| 1923 | * @tag: tag to test | 2089 | * @tag: tag to test |
| 1924 | */ | 2090 | */ |
| 1925 | int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) | 2091 | int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag) |
| 1926 | { | 2092 | { |
| 1927 | return root_tag_get(root, tag); | 2093 | return root_tag_get(root, tag); |
| 1928 | } | 2094 | } |
| 1929 | EXPORT_SYMBOL(radix_tree_tagged); | 2095 | EXPORT_SYMBOL(radix_tree_tagged); |
| 1930 | 2096 | ||
| 2097 | /** | ||
| 2098 | * idr_preload - preload for idr_alloc() | ||
| 2099 | * @gfp_mask: allocation mask to use for preloading | ||
| 2100 | * | ||
| 2101 | * Preallocate memory to use for the next call to idr_alloc(). This function | ||
| 2102 | * returns with preemption disabled. It will be enabled by idr_preload_end(). | ||
| 2103 | */ | ||
| 2104 | void idr_preload(gfp_t gfp_mask) | ||
| 2105 | { | ||
| 2106 | __radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE); | ||
| 2107 | } | ||
| 2108 | EXPORT_SYMBOL(idr_preload); | ||
| 2109 | |||
| 2110 | /** | ||
| 2111 | * ida_pre_get - reserve resources for ida allocation | ||
| 2112 | * @ida: ida handle | ||
| 2113 | * @gfp: memory allocation flags | ||
| 2114 | * | ||
| 2115 | * This function should be called before calling ida_get_new_above(). If it | ||
| 2116 | * is unable to allocate memory, it will return %0. On success, it returns %1. | ||
| 2117 | */ | ||
| 2118 | int ida_pre_get(struct ida *ida, gfp_t gfp) | ||
| 2119 | { | ||
| 2120 | __radix_tree_preload(gfp, IDA_PRELOAD_SIZE); | ||
| 2121 | /* | ||
| 2122 | * The IDA API has no preload_end() equivalent. Instead, | ||
| 2123 | * ida_get_new() can return -EAGAIN, prompting the caller | ||
| 2124 | * to return to the ida_pre_get() step. | ||
| 2125 | */ | ||
| 2126 | preempt_enable(); | ||
| 2127 | |||
| 2128 | if (!this_cpu_read(ida_bitmap)) { | ||
| 2129 | struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp); | ||
| 2130 | if (!bitmap) | ||
| 2131 | return 0; | ||
| 2132 | bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap); | ||
| 2133 | kfree(bitmap); | ||
| 2134 | } | ||
| 2135 | |||
| 2136 | return 1; | ||
| 2137 | } | ||
| 2138 | EXPORT_SYMBOL(ida_pre_get); | ||
| 2139 | |||
| 2140 | void __rcu **idr_get_free(struct radix_tree_root *root, | ||
| 2141 | struct radix_tree_iter *iter, gfp_t gfp, int end) | ||
| 2142 | { | ||
| 2143 | struct radix_tree_node *node = NULL, *child; | ||
| 2144 | void __rcu **slot = (void __rcu **)&root->rnode; | ||
| 2145 | unsigned long maxindex, start = iter->next_index; | ||
| 2146 | unsigned long max = end > 0 ? end - 1 : INT_MAX; | ||
| 2147 | unsigned int shift, offset = 0; | ||
| 2148 | |||
| 2149 | grow: | ||
| 2150 | shift = radix_tree_load_root(root, &child, &maxindex); | ||
| 2151 | if (!radix_tree_tagged(root, IDR_FREE)) | ||
| 2152 | start = max(start, maxindex + 1); | ||
| 2153 | if (start > max) | ||
| 2154 | return ERR_PTR(-ENOSPC); | ||
| 2155 | |||
| 2156 | if (start > maxindex) { | ||
| 2157 | int error = radix_tree_extend(root, gfp, start, shift); | ||
| 2158 | if (error < 0) | ||
| 2159 | return ERR_PTR(error); | ||
| 2160 | shift = error; | ||
| 2161 | child = rcu_dereference_raw(root->rnode); | ||
| 2162 | } | ||
| 2163 | |||
| 2164 | while (shift) { | ||
| 2165 | shift -= RADIX_TREE_MAP_SHIFT; | ||
| 2166 | if (child == NULL) { | ||
| 2167 | /* Have to add a child node. */ | ||
| 2168 | child = radix_tree_node_alloc(gfp, node, root, shift, | ||
| 2169 | offset, 0, 0); | ||
| 2170 | if (!child) | ||
| 2171 | return ERR_PTR(-ENOMEM); | ||
| 2172 | all_tag_set(child, IDR_FREE); | ||
| 2173 | rcu_assign_pointer(*slot, node_to_entry(child)); | ||
| 2174 | if (node) | ||
| 2175 | node->count++; | ||
| 2176 | } else if (!radix_tree_is_internal_node(child)) | ||
| 2177 | break; | ||
| 2178 | |||
| 2179 | node = entry_to_node(child); | ||
| 2180 | offset = radix_tree_descend(node, &child, start); | ||
| 2181 | if (!tag_get(node, IDR_FREE, offset)) { | ||
| 2182 | offset = radix_tree_find_next_bit(node, IDR_FREE, | ||
| 2183 | offset + 1); | ||
| 2184 | start = next_index(start, node, offset); | ||
| 2185 | if (start > max) | ||
| 2186 | return ERR_PTR(-ENOSPC); | ||
| 2187 | while (offset == RADIX_TREE_MAP_SIZE) { | ||
| 2188 | offset = node->offset + 1; | ||
| 2189 | node = node->parent; | ||
| 2190 | if (!node) | ||
| 2191 | goto grow; | ||
| 2192 | shift = node->shift; | ||
| 2193 | } | ||
| 2194 | child = rcu_dereference_raw(node->slots[offset]); | ||
| 2195 | } | ||
| 2196 | slot = &node->slots[offset]; | ||
| 2197 | } | ||
| 2198 | |||
| 2199 | iter->index = start; | ||
| 2200 | if (node) | ||
| 2201 | iter->next_index = 1 + min(max, (start | node_maxindex(node))); | ||
| 2202 | else | ||
| 2203 | iter->next_index = 1; | ||
| 2204 | iter->node = node; | ||
| 2205 | __set_iter_shift(iter, shift); | ||
| 2206 | set_iter_tags(iter, node, offset, IDR_FREE); | ||
| 2207 | |||
| 2208 | return slot; | ||
| 2209 | } | ||
| 2210 | |||
| 2211 | /** | ||
| 2212 | * idr_destroy - release all internal memory from an IDR | ||
| 2213 | * @idr: idr handle | ||
| 2214 | * | ||
| 2215 | * After this function is called, the IDR is empty, and may be reused or | ||
| 2216 | * the data structure containing it may be freed. | ||
| 2217 | * | ||
| 2218 | * A typical clean-up sequence for objects stored in an idr tree will use | ||
| 2219 | * idr_for_each() to free all objects, if necessary, then idr_destroy() to | ||
| 2220 | * free the memory used to keep track of those objects. | ||
| 2221 | */ | ||
| 2222 | void idr_destroy(struct idr *idr) | ||
| 2223 | { | ||
| 2224 | struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode); | ||
| 2225 | if (radix_tree_is_internal_node(node)) | ||
| 2226 | radix_tree_free_nodes(node); | ||
| 2227 | idr->idr_rt.rnode = NULL; | ||
| 2228 | root_tag_set(&idr->idr_rt, IDR_FREE); | ||
| 2229 | } | ||
| 2230 | EXPORT_SYMBOL(idr_destroy); | ||
| 2231 | |||
| 1931 | static void | 2232 | static void |
| 1932 | radix_tree_node_ctor(void *arg) | 2233 | radix_tree_node_ctor(void *arg) |
| 1933 | { | 2234 | { |
| @@ -1971,10 +2272,12 @@ static int radix_tree_cpu_dead(unsigned int cpu) | |||
| 1971 | rtp = &per_cpu(radix_tree_preloads, cpu); | 2272 | rtp = &per_cpu(radix_tree_preloads, cpu); |
| 1972 | while (rtp->nr) { | 2273 | while (rtp->nr) { |
| 1973 | node = rtp->nodes; | 2274 | node = rtp->nodes; |
| 1974 | rtp->nodes = node->private_data; | 2275 | rtp->nodes = node->parent; |
| 1975 | kmem_cache_free(radix_tree_node_cachep, node); | 2276 | kmem_cache_free(radix_tree_node_cachep, node); |
| 1976 | rtp->nr--; | 2277 | rtp->nr--; |
| 1977 | } | 2278 | } |
| 2279 | kfree(per_cpu(ida_bitmap, cpu)); | ||
| 2280 | per_cpu(ida_bitmap, cpu) = NULL; | ||
| 1978 | return 0; | 2281 | return 0; |
| 1979 | } | 2282 | } |
| 1980 | 2283 | ||
