aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2013-09-11 17:26:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-11 18:59:36 -0400
commit5e4c0d974139a98741b829b27cf38dc8f9284490 (patch)
treefddd959828300c1de1ade15eeb33606c317b79db /lib
parent4b39248365e09fb8268b6fecd1704907ffc3d980 (diff)
lib/radix-tree.c: make radix_tree_node_alloc() work correctly within interrupt
With users of radix_tree_preload() run from interrupt (block/blk-ioc.c is one such possible user), the following race can happen: radix_tree_preload() ... radix_tree_insert() radix_tree_node_alloc() if (rtp->nr) { ret = rtp->nodes[rtp->nr - 1]; <interrupt> ... radix_tree_preload() ... radix_tree_insert() radix_tree_node_alloc() if (rtp->nr) { ret = rtp->nodes[rtp->nr - 1]; And we give out one radix tree node twice. That clearly results in radix tree corruption with different results (usually OOPS) depending on which two users of radix tree race. We fix the problem by making radix_tree_node_alloc() always allocate fresh radix tree nodes when in interrupt. Using preloading when in interrupt doesn't make sense since all the allocations have to be atomic anyway and we cannot steal nodes from process-context users because some users rely on radix_tree_insert() succeeding after radix_tree_preload(). in_interrupt() check is somewhat ugly but we cannot simply key off passed gfp_mask as that is acquired from root_gfp_mask() and thus the same for all preload users. Another part of the fix is to avoid node preallocation in radix_tree_preload() when passed gfp_mask doesn't allow waiting. Again, preallocation in such case doesn't make sense and when preallocation would happen in interrupt we could possibly leak some allocated nodes. However, some users of radix_tree_preload() require following radix_tree_insert() to succeed. To avoid unexpected effects for these users, radix_tree_preload() only warns if passed gfp mask doesn't allow waiting and we provide a new function radix_tree_maybe_preload() for those users which get different gfp mask from different call sites and which are prepared to handle radix_tree_insert() failure. Signed-off-by: Jan Kara <jack@suse.cz> Cc: Jens Axboe <jaxboe@fusionio.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/radix-tree.c41
1 files changed, 39 insertions, 2 deletions
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index e7964296fd50..7811ed3b4e70 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -32,6 +32,7 @@
32#include <linux/string.h> 32#include <linux/string.h>
33#include <linux/bitops.h> 33#include <linux/bitops.h>
34#include <linux/rcupdate.h> 34#include <linux/rcupdate.h>
35#include <linux/hardirq.h> /* in_interrupt() */
35 36
36 37
37#ifdef __KERNEL__ 38#ifdef __KERNEL__
@@ -207,7 +208,12 @@ radix_tree_node_alloc(struct radix_tree_root *root)
207 struct radix_tree_node *ret = NULL; 208 struct radix_tree_node *ret = NULL;
208 gfp_t gfp_mask = root_gfp_mask(root); 209 gfp_t gfp_mask = root_gfp_mask(root);
209 210
210 if (!(gfp_mask & __GFP_WAIT)) { 211 /*
212 * Preload code isn't irq safe and it doesn't make sence to use
213 * preloading in the interrupt anyway as all the allocations have to
214 * be atomic. So just do normal allocation when in interrupt.
215 */
216 if (!(gfp_mask & __GFP_WAIT) && !in_interrupt()) {
211 struct radix_tree_preload *rtp; 217 struct radix_tree_preload *rtp;
212 218
213 /* 219 /*
@@ -264,7 +270,7 @@ radix_tree_node_free(struct radix_tree_node *node)
264 * To make use of this facility, the radix tree must be initialised without 270 * To make use of this facility, the radix tree must be initialised without
265 * __GFP_WAIT being passed to INIT_RADIX_TREE(). 271 * __GFP_WAIT being passed to INIT_RADIX_TREE().
266 */ 272 */
267int radix_tree_preload(gfp_t gfp_mask) 273static int __radix_tree_preload(gfp_t gfp_mask)
268{ 274{
269 struct radix_tree_preload *rtp; 275 struct radix_tree_preload *rtp;
270 struct radix_tree_node *node; 276 struct radix_tree_node *node;
@@ -288,9 +294,40 @@ int radix_tree_preload(gfp_t gfp_mask)
288out: 294out:
289 return ret; 295 return ret;
290} 296}
297
298/*
299 * Load up this CPU's radix_tree_node buffer with sufficient objects to
300 * ensure that the addition of a single element in the tree cannot fail. On
301 * success, return zero, with preemption disabled. On error, return -ENOMEM
302 * with preemption not disabled.
303 *
304 * To make use of this facility, the radix tree must be initialised without
305 * __GFP_WAIT being passed to INIT_RADIX_TREE().
306 */
307int radix_tree_preload(gfp_t gfp_mask)
308{
309 /* Warn on non-sensical use... */
310 WARN_ON_ONCE(!(gfp_mask & __GFP_WAIT));
311 return __radix_tree_preload(gfp_mask);
312}
291EXPORT_SYMBOL(radix_tree_preload); 313EXPORT_SYMBOL(radix_tree_preload);
292 314
293/* 315/*
316 * The same as above function, except we don't guarantee preloading happens.
317 * We do it, if we decide it helps. On success, return zero with preemption
318 * disabled. On error, return -ENOMEM with preemption not disabled.
319 */
320int radix_tree_maybe_preload(gfp_t gfp_mask)
321{
322 if (gfp_mask & __GFP_WAIT)
323 return __radix_tree_preload(gfp_mask);
324 /* Preloading doesn't help anything with this gfp mask, skip it */
325 preempt_disable();
326 return 0;
327}
328EXPORT_SYMBOL(radix_tree_maybe_preload);
329
330/*
294 * Return the maximum key which can be store into a 331 * Return the maximum key which can be store into a
295 * radix tree with height HEIGHT. 332 * radix tree with height HEIGHT.
296 */ 333 */