aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorNick Piggin <npiggin@kernel.dk>2010-11-11 17:05:19 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-11-12 10:55:32 -0500
commit27d20fddc8af539464fc3ba499d6a830054c3bd6 (patch)
tree23514cfe88f90150a8635c47586a8a378fb905e3 /include
parenteaf06b241b091357e72b76863ba16e89610d31bd (diff)
radix-tree: fix RCU bug
Salman Qazi describes the following radix-tree bug: In the following case, we get can get a deadlock: 0. The radix tree contains two items, one has the index 0. 1. The reader (in this case find_get_pages) takes the rcu_read_lock. 2. The reader acquires slot(s) for item(s) including the index 0 item. 3. The non-zero index item is deleted, and as a consequence the other item is moved to the root of the tree. The place where it used to be is queued for deletion after the readers finish. 3b. The zero item is deleted, removing it from the direct slot, it remains in the rcu-delayed indirect node. 4. The reader looks at the index 0 slot, and finds that the page has 0 ref count 5. The reader looks at it again, hoping that the item will either be freed or the ref count will increase. This never happens, as the slot it is looking at will never be updated. Also, this slot can never be reclaimed because the reader is holding rcu_read_lock and is in an infinite loop. The fix is to re-use the same "indirect" pointer case that requires a slot lookup retry into a general "retry the lookup" bit. Signed-off-by: Nick Piggin <npiggin@kernel.dk> Reported-by: Salman Qazi <sqazi@google.com> Cc: <stable@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/radix-tree.h39
1 files changed, 23 insertions, 16 deletions
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index a39cbed9ee17..ab2baa5c4884 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -34,19 +34,13 @@
34 * needed for RCU lookups (because root->height is unreliable). The only 34 * needed for RCU lookups (because root->height is unreliable). The only
35 * time callers need worry about this is when doing a lookup_slot under 35 * time callers need worry about this is when doing a lookup_slot under
36 * RCU. 36 * RCU.
37 *
38 * Indirect pointer in fact is also used to tag the last pointer of a node
39 * when it is shrunk, before we rcu free the node. See shrink code for
40 * details.
37 */ 41 */
38#define RADIX_TREE_INDIRECT_PTR 1 42#define RADIX_TREE_INDIRECT_PTR 1
39#define RADIX_TREE_RETRY ((void *)-1UL)
40
41static inline void *radix_tree_ptr_to_indirect(void *ptr)
42{
43 return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
44}
45 43
46static inline void *radix_tree_indirect_to_ptr(void *ptr)
47{
48 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
49}
50#define radix_tree_indirect_to_ptr(ptr) \ 44#define radix_tree_indirect_to_ptr(ptr) \
51 radix_tree_indirect_to_ptr((void __force *)(ptr)) 45 radix_tree_indirect_to_ptr((void __force *)(ptr))
52 46
@@ -140,16 +134,29 @@ do { \
140 * removed. 134 * removed.
141 * 135 *
142 * For use with radix_tree_lookup_slot(). Caller must hold tree at least read 136 * For use with radix_tree_lookup_slot(). Caller must hold tree at least read
143 * locked across slot lookup and dereference. More likely, will be used with 137 * locked across slot lookup and dereference. Not required if write lock is
144 * radix_tree_replace_slot(), as well, so caller will hold tree write locked. 138 * held (ie. items cannot be concurrently inserted).
139 *
140 * radix_tree_deref_retry must be used to confirm validity of the pointer if
141 * only the read lock is held.
145 */ 142 */
146static inline void *radix_tree_deref_slot(void **pslot) 143static inline void *radix_tree_deref_slot(void **pslot)
147{ 144{
148 void *ret = rcu_dereference(*pslot); 145 return rcu_dereference(*pslot);
149 if (unlikely(radix_tree_is_indirect_ptr(ret)))
150 ret = RADIX_TREE_RETRY;
151 return ret;
152} 146}
147
148/**
149 * radix_tree_deref_retry - check radix_tree_deref_slot
150 * @arg: pointer returned by radix_tree_deref_slot
151 * Returns: 0 if retry is not required, otherwise retry is required
152 *
153 * radix_tree_deref_retry must be used with radix_tree_deref_slot.
154 */
155static inline int radix_tree_deref_retry(void *arg)
156{
157 return unlikely((unsigned long)arg & RADIX_TREE_INDIRECT_PTR);
158}
159
153/** 160/**
154 * radix_tree_replace_slot - replace item in a slot 161 * radix_tree_replace_slot - replace item in a slot
155 * @pslot: pointer to slot, returned by radix_tree_lookup_slot 162 * @pslot: pointer to slot, returned by radix_tree_lookup_slot