diff options
| author | Matthew Wilcox <mawilcox@microsoft.com> | 2016-12-17 08:18:17 -0500 |
|---|---|---|
| committer | Matthew Wilcox <mawilcox@microsoft.com> | 2017-02-13 21:44:02 -0500 |
| commit | d37cacc5adace7f3e0824e1f559192ad7299d029 (patch) | |
| tree | 9932ecc9ba3010ecc53bdbf5cd299eb0842106ec /lib/idr.c | |
| parent | 7ad3d4d85c7af9632055a6ac0aa15b6b6a321c6b (diff) | |
ida: Use exceptional entries for small IDAs
We can use the root entry as a bitmap and save allocating a 128 byte
bitmap for an IDA that contains only a few entries (30 on a 32-bit
machine, 62 on a 64-bit machine). This costs about 300 bytes of kernel
text on x86-64, so as long as 3 IDAs fall into this category, this
is a net win for memory consumption.
Thanks to Rasmus Villemoes for his work documenting the problem and
collecting statistics on IDAs.
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Diffstat (limited to 'lib/idr.c')
| -rw-r--r-- | lib/idr.c | 87 |
1 files changed, 81 insertions, 6 deletions
| @@ -194,6 +194,39 @@ EXPORT_SYMBOL(idr_replace); | |||
| 194 | * limitation, it should be quite straightforward to raise the maximum. | 194 | * limitation, it should be quite straightforward to raise the maximum. |
| 195 | */ | 195 | */ |
| 196 | 196 | ||
| 197 | /* | ||
| 198 | * Developer's notes: | ||
| 199 | * | ||
| 200 | * The IDA uses the functionality provided by the IDR & radix tree to store | ||
| 201 | * bitmaps in each entry. The IDR_FREE tag means there is at least one bit | ||
| 202 | * free, unlike the IDR where it means at least one entry is free. | ||
| 203 | * | ||
| 204 | * I considered telling the radix tree that each slot is an order-10 node | ||
| 205 | * and storing the bit numbers in the radix tree, but the radix tree can't | ||
| 206 | * allow a single multiorder entry at index 0, which would significantly | ||
| 207 | * increase memory consumption for the IDA. So instead we divide the index | ||
| 208 | * by the number of bits in the leaf bitmap before doing a radix tree lookup. | ||
| 209 | * | ||
| 210 | * As an optimisation, if there are only a few low bits set in any given | ||
| 211 | * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional | ||
| 212 | * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits | ||
| 213 | * directly in the entry. By being really tricksy, we could store | ||
| 214 | * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising | ||
| 215 | * for 0-3 allocated IDs. | ||
| 216 | * | ||
| 217 | * We allow the radix tree 'exceptional' count to get out of date. Nothing | ||
| 218 | * in the IDA nor the radix tree code checks it. If it becomes important | ||
| 219 | * to maintain an accurate exceptional count, switch the rcu_assign_pointer() | ||
| 220 | * calls to radix_tree_iter_replace() which will correct the exceptional | ||
| 221 | * count. | ||
| 222 | * | ||
| 223 | * The IDA always requires a lock to alloc/free. If we add a 'test_bit' | ||
| 224 | * equivalent, it will still need locking. Going to RCU lookup would require | ||
| 225 | * using RCU to free bitmaps, and that's not trivial without embedding an | ||
| 226 | * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte | ||
| 227 | * bitmap, which is excessive. | ||
| 228 | */ | ||
| 229 | |||
| 197 | #define IDA_MAX (0x80000000U / IDA_BITMAP_BITS) | 230 | #define IDA_MAX (0x80000000U / IDA_BITMAP_BITS) |
| 198 | 231 | ||
| 199 | /** | 232 | /** |
| @@ -221,11 +254,12 @@ int ida_get_new_above(struct ida *ida, int start, int *id) | |||
| 221 | struct radix_tree_iter iter; | 254 | struct radix_tree_iter iter; |
| 222 | struct ida_bitmap *bitmap; | 255 | struct ida_bitmap *bitmap; |
| 223 | unsigned long index; | 256 | unsigned long index; |
| 224 | unsigned bit; | 257 | unsigned bit, ebit; |
| 225 | int new; | 258 | int new; |
| 226 | 259 | ||
| 227 | index = start / IDA_BITMAP_BITS; | 260 | index = start / IDA_BITMAP_BITS; |
| 228 | bit = start % IDA_BITMAP_BITS; | 261 | bit = start % IDA_BITMAP_BITS; |
| 262 | ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT; | ||
| 229 | 263 | ||
| 230 | slot = radix_tree_iter_init(&iter, index); | 264 | slot = radix_tree_iter_init(&iter, index); |
| 231 | for (;;) { | 265 | for (;;) { |
| @@ -240,10 +274,29 @@ int ida_get_new_above(struct ida *ida, int start, int *id) | |||
| 240 | return PTR_ERR(slot); | 274 | return PTR_ERR(slot); |
| 241 | } | 275 | } |
| 242 | } | 276 | } |
| 243 | if (iter.index > index) | 277 | if (iter.index > index) { |
| 244 | bit = 0; | 278 | bit = 0; |
| 279 | ebit = RADIX_TREE_EXCEPTIONAL_SHIFT; | ||
| 280 | } | ||
| 245 | new = iter.index * IDA_BITMAP_BITS; | 281 | new = iter.index * IDA_BITMAP_BITS; |
| 246 | bitmap = rcu_dereference_raw(*slot); | 282 | bitmap = rcu_dereference_raw(*slot); |
| 283 | if (radix_tree_exception(bitmap)) { | ||
| 284 | unsigned long tmp = (unsigned long)bitmap; | ||
| 285 | ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit); | ||
| 286 | if (ebit < BITS_PER_LONG) { | ||
| 287 | tmp |= 1UL << ebit; | ||
| 288 | rcu_assign_pointer(*slot, (void *)tmp); | ||
| 289 | *id = new + ebit - RADIX_TREE_EXCEPTIONAL_SHIFT; | ||
| 290 | return 0; | ||
| 291 | } | ||
| 292 | bitmap = this_cpu_xchg(ida_bitmap, NULL); | ||
| 293 | if (!bitmap) | ||
| 294 | return -EAGAIN; | ||
| 295 | memset(bitmap, 0, sizeof(*bitmap)); | ||
| 296 | bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT; | ||
| 297 | rcu_assign_pointer(*slot, bitmap); | ||
| 298 | } | ||
| 299 | |||
| 247 | if (bitmap) { | 300 | if (bitmap) { |
| 248 | bit = find_next_zero_bit(bitmap->bitmap, | 301 | bit = find_next_zero_bit(bitmap->bitmap, |
| 249 | IDA_BITMAP_BITS, bit); | 302 | IDA_BITMAP_BITS, bit); |
| @@ -261,6 +314,14 @@ int ida_get_new_above(struct ida *ida, int start, int *id) | |||
| 261 | new += bit; | 314 | new += bit; |
| 262 | if (new < 0) | 315 | if (new < 0) |
| 263 | return -ENOSPC; | 316 | return -ENOSPC; |
| 317 | if (ebit < BITS_PER_LONG) { | ||
| 318 | bitmap = (void *)((1UL << ebit) | | ||
| 319 | RADIX_TREE_EXCEPTIONAL_ENTRY); | ||
| 320 | radix_tree_iter_replace(root, &iter, slot, | ||
| 321 | bitmap); | ||
| 322 | *id = new; | ||
| 323 | return 0; | ||
| 324 | } | ||
| 264 | bitmap = this_cpu_xchg(ida_bitmap, NULL); | 325 | bitmap = this_cpu_xchg(ida_bitmap, NULL); |
| 265 | if (!bitmap) | 326 | if (!bitmap) |
| 266 | return -EAGAIN; | 327 | return -EAGAIN; |
| @@ -287,6 +348,7 @@ void ida_remove(struct ida *ida, int id) | |||
| 287 | unsigned long index = id / IDA_BITMAP_BITS; | 348 | unsigned long index = id / IDA_BITMAP_BITS; |
| 288 | unsigned offset = id % IDA_BITMAP_BITS; | 349 | unsigned offset = id % IDA_BITMAP_BITS; |
| 289 | struct ida_bitmap *bitmap; | 350 | struct ida_bitmap *bitmap; |
| 351 | unsigned long *btmp; | ||
| 290 | struct radix_tree_iter iter; | 352 | struct radix_tree_iter iter; |
| 291 | void **slot; | 353 | void **slot; |
| 292 | 354 | ||
| @@ -295,12 +357,24 @@ void ida_remove(struct ida *ida, int id) | |||
| 295 | goto err; | 357 | goto err; |
| 296 | 358 | ||
| 297 | bitmap = rcu_dereference_raw(*slot); | 359 | bitmap = rcu_dereference_raw(*slot); |
| 298 | if (!test_bit(offset, bitmap->bitmap)) | 360 | if (radix_tree_exception(bitmap)) { |
| 361 | btmp = (unsigned long *)slot; | ||
| 362 | offset += RADIX_TREE_EXCEPTIONAL_SHIFT; | ||
| 363 | if (offset >= BITS_PER_LONG) | ||
| 364 | goto err; | ||
| 365 | } else { | ||
| 366 | btmp = bitmap->bitmap; | ||
| 367 | } | ||
| 368 | if (!test_bit(offset, btmp)) | ||
| 299 | goto err; | 369 | goto err; |
| 300 | 370 | ||
| 301 | __clear_bit(offset, bitmap->bitmap); | 371 | __clear_bit(offset, btmp); |
| 302 | radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE); | 372 | radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE); |
| 303 | if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) { | 373 | if (radix_tree_exception(bitmap)) { |
| 374 | if (rcu_dereference_raw(*slot) == | ||
| 375 | (void *)RADIX_TREE_EXCEPTIONAL_ENTRY) | ||
| 376 | radix_tree_iter_delete(&ida->ida_rt, &iter, slot); | ||
| 377 | } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) { | ||
| 304 | kfree(bitmap); | 378 | kfree(bitmap); |
| 305 | radix_tree_iter_delete(&ida->ida_rt, &iter, slot); | 379 | radix_tree_iter_delete(&ida->ida_rt, &iter, slot); |
| 306 | } | 380 | } |
| @@ -326,7 +400,8 @@ void ida_destroy(struct ida *ida) | |||
| 326 | 400 | ||
| 327 | radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) { | 401 | radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) { |
| 328 | struct ida_bitmap *bitmap = rcu_dereference_raw(*slot); | 402 | struct ida_bitmap *bitmap = rcu_dereference_raw(*slot); |
| 329 | kfree(bitmap); | 403 | if (!radix_tree_exception(bitmap)) |
| 404 | kfree(bitmap); | ||
| 330 | radix_tree_iter_delete(&ida->ida_rt, &iter, slot); | 405 | radix_tree_iter_delete(&ida->ida_rt, &iter, slot); |
| 331 | } | 406 | } |
| 332 | } | 407 | } |
