summaryrefslogtreecommitdiffstats
path: root/lib/rbtree.c
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2015-05-26 21:39:36 -0400
committerRusty Russell <rusty@rustcorp.com.au>2015-05-27 22:02:04 -0400
commitd72da4a4d973d8a0a0d3c97e7cdebf287fbe3a99 (patch)
treeb3fbbf22c0f1bbc175dce958cf99241aee665f4c /lib/rbtree.c
parent0be964be0d45084245673c971d72a4b51690231d (diff)
rbtree: Make lockless searches non-fatal
Change the insert and erase code such that lockless searches are non-fatal. In and of itself an rbtree cannot be correctly searched while in-modification, we can however provide weaker guarantees that will allow the rbtree to be used in conjunction with other techniques, such as latches; see 9b0fd802e8c0 ("seqcount: Add raw_write_seqcount_latch()"). For this to work we need the following guarantees from the rbtree code: 1) a lockless reader must not see partial stores, this would allow it to observe nodes that are invalid memory. 2) there must not be (temporary) loops in the tree structure in the modifier's program order, this would cause a lookup which interrupts the modifier to get stuck indefinitely. For 1) we must use WRITE_ONCE() for all updates to the tree structure; in particular this patch only does rb_{left,right} as those are the only element required for simple searches. It generates slightly worse code, probably because volatile. But in pointer chasing heavy code a few instructions more should not matter. For 2) I have carefully audited the code and drawn every intermediate link state and not found a loop. Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: David Woodhouse <David.Woodhouse@intel.com> Cc: Rik van Riel <riel@redhat.com> Reviewed-by: Michel Lespinasse <walken@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'lib/rbtree.c')
-rw-r--r--lib/rbtree.c76
1 files changed, 54 insertions, 22 deletions
diff --git a/lib/rbtree.c b/lib/rbtree.c
index c16c81a3d430..1356454e36de 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -44,6 +44,30 @@
44 * parentheses and have some accompanying text comment. 44 * parentheses and have some accompanying text comment.
45 */ 45 */
46 46
47/*
48 * Notes on lockless lookups:
49 *
50 * All stores to the tree structure (rb_left and rb_right) must be done using
51 * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the
52 * tree structure as seen in program order.
53 *
54 * These two requirements will allow lockless iteration of the tree -- not
55 * correct iteration mind you, tree rotations are not atomic so a lookup might
56 * miss entire subtrees.
57 *
58 * But they do guarantee that any such traversal will only see valid elements
59 * and that it will indeed complete -- does not get stuck in a loop.
60 *
61 * It also guarantees that if the lookup returns an element it is the 'correct'
62 * one. But not returning an element does _NOT_ mean it's not present.
63 *
64 * NOTE:
65 *
66 * Stores to __rb_parent_color are not important for simple lookups so those
67 * are left undone as of now. Nor did I check for loops involving parent
68 * pointers.
69 */
70
47static inline void rb_set_black(struct rb_node *rb) 71static inline void rb_set_black(struct rb_node *rb)
48{ 72{
49 rb->__rb_parent_color |= RB_BLACK; 73 rb->__rb_parent_color |= RB_BLACK;
@@ -129,8 +153,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
129 * This still leaves us in violation of 4), the 153 * This still leaves us in violation of 4), the
130 * continuation into Case 3 will fix that. 154 * continuation into Case 3 will fix that.
131 */ 155 */
132 parent->rb_right = tmp = node->rb_left; 156 tmp = node->rb_left;
133 node->rb_left = parent; 157 WRITE_ONCE(parent->rb_right, tmp);
158 WRITE_ONCE(node->rb_left, parent);
134 if (tmp) 159 if (tmp)
135 rb_set_parent_color(tmp, parent, 160 rb_set_parent_color(tmp, parent,
136 RB_BLACK); 161 RB_BLACK);
@@ -149,8 +174,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
149 * / \ 174 * / \
150 * n U 175 * n U
151 */ 176 */
152 gparent->rb_left = tmp; /* == parent->rb_right */ 177 WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */
153 parent->rb_right = gparent; 178 WRITE_ONCE(parent->rb_right, gparent);
154 if (tmp) 179 if (tmp)
155 rb_set_parent_color(tmp, gparent, RB_BLACK); 180 rb_set_parent_color(tmp, gparent, RB_BLACK);
156 __rb_rotate_set_parents(gparent, parent, root, RB_RED); 181 __rb_rotate_set_parents(gparent, parent, root, RB_RED);
@@ -171,8 +196,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
171 tmp = parent->rb_left; 196 tmp = parent->rb_left;
172 if (node == tmp) { 197 if (node == tmp) {
173 /* Case 2 - right rotate at parent */ 198 /* Case 2 - right rotate at parent */
174 parent->rb_left = tmp = node->rb_right; 199 tmp = node->rb_right;
175 node->rb_right = parent; 200 WRITE_ONCE(parent->rb_left, tmp);
201 WRITE_ONCE(node->rb_right, parent);
176 if (tmp) 202 if (tmp)
177 rb_set_parent_color(tmp, parent, 203 rb_set_parent_color(tmp, parent,
178 RB_BLACK); 204 RB_BLACK);
@@ -183,8 +209,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
183 } 209 }
184 210
185 /* Case 3 - left rotate at gparent */ 211 /* Case 3 - left rotate at gparent */
186 gparent->rb_right = tmp; /* == parent->rb_left */ 212 WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */
187 parent->rb_left = gparent; 213 WRITE_ONCE(parent->rb_left, gparent);
188 if (tmp) 214 if (tmp)
189 rb_set_parent_color(tmp, gparent, RB_BLACK); 215 rb_set_parent_color(tmp, gparent, RB_BLACK);
190 __rb_rotate_set_parents(gparent, parent, root, RB_RED); 216 __rb_rotate_set_parents(gparent, parent, root, RB_RED);
@@ -224,8 +250,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
224 * / \ / \ 250 * / \ / \
225 * Sl Sr N Sl 251 * Sl Sr N Sl
226 */ 252 */
227 parent->rb_right = tmp1 = sibling->rb_left; 253 tmp1 = sibling->rb_left;
228 sibling->rb_left = parent; 254 WRITE_ONCE(parent->rb_right, tmp1);
255 WRITE_ONCE(sibling->rb_left, parent);
229 rb_set_parent_color(tmp1, parent, RB_BLACK); 256 rb_set_parent_color(tmp1, parent, RB_BLACK);
230 __rb_rotate_set_parents(parent, sibling, root, 257 __rb_rotate_set_parents(parent, sibling, root,
231 RB_RED); 258 RB_RED);
@@ -275,9 +302,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
275 * \ 302 * \
276 * Sr 303 * Sr
277 */ 304 */
278 sibling->rb_left = tmp1 = tmp2->rb_right; 305 tmp1 = tmp2->rb_right;
279 tmp2->rb_right = sibling; 306 WRITE_ONCE(sibling->rb_left, tmp1);
280 parent->rb_right = tmp2; 307 WRITE_ONCE(tmp2->rb_right, sibling);
308 WRITE_ONCE(parent->rb_right, tmp2);
281 if (tmp1) 309 if (tmp1)
282 rb_set_parent_color(tmp1, sibling, 310 rb_set_parent_color(tmp1, sibling,
283 RB_BLACK); 311 RB_BLACK);
@@ -297,8 +325,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
297 * / \ / \ 325 * / \ / \
298 * (sl) sr N (sl) 326 * (sl) sr N (sl)
299 */ 327 */
300 parent->rb_right = tmp2 = sibling->rb_left; 328 tmp2 = sibling->rb_left;
301 sibling->rb_left = parent; 329 WRITE_ONCE(parent->rb_right, tmp2);
330 WRITE_ONCE(sibling->rb_left, parent);
302 rb_set_parent_color(tmp1, sibling, RB_BLACK); 331 rb_set_parent_color(tmp1, sibling, RB_BLACK);
303 if (tmp2) 332 if (tmp2)
304 rb_set_parent(tmp2, parent); 333 rb_set_parent(tmp2, parent);
@@ -310,8 +339,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
310 sibling = parent->rb_left; 339 sibling = parent->rb_left;
311 if (rb_is_red(sibling)) { 340 if (rb_is_red(sibling)) {
312 /* Case 1 - right rotate at parent */ 341 /* Case 1 - right rotate at parent */
313 parent->rb_left = tmp1 = sibling->rb_right; 342 tmp1 = sibling->rb_right;
314 sibling->rb_right = parent; 343 WRITE_ONCE(parent->rb_left, tmp1);
344 WRITE_ONCE(sibling->rb_right, parent);
315 rb_set_parent_color(tmp1, parent, RB_BLACK); 345 rb_set_parent_color(tmp1, parent, RB_BLACK);
316 __rb_rotate_set_parents(parent, sibling, root, 346 __rb_rotate_set_parents(parent, sibling, root,
317 RB_RED); 347 RB_RED);
@@ -336,9 +366,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
336 break; 366 break;
337 } 367 }
338 /* Case 3 - right rotate at sibling */ 368 /* Case 3 - right rotate at sibling */
339 sibling->rb_right = tmp1 = tmp2->rb_left; 369 tmp1 = tmp2->rb_left;
340 tmp2->rb_left = sibling; 370 WRITE_ONCE(sibling->rb_right, tmp1);
341 parent->rb_left = tmp2; 371 WRITE_ONCE(tmp2->rb_left, sibling);
372 WRITE_ONCE(parent->rb_left, tmp2);
342 if (tmp1) 373 if (tmp1)
343 rb_set_parent_color(tmp1, sibling, 374 rb_set_parent_color(tmp1, sibling,
344 RB_BLACK); 375 RB_BLACK);
@@ -347,8 +378,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
347 sibling = tmp2; 378 sibling = tmp2;
348 } 379 }
349 /* Case 4 - left rotate at parent + color flips */ 380 /* Case 4 - left rotate at parent + color flips */
350 parent->rb_left = tmp2 = sibling->rb_right; 381 tmp2 = sibling->rb_right;
351 sibling->rb_right = parent; 382 WRITE_ONCE(parent->rb_left, tmp2);
383 WRITE_ONCE(sibling->rb_right, parent);
352 rb_set_parent_color(tmp1, sibling, RB_BLACK); 384 rb_set_parent_color(tmp1, sibling, RB_BLACK);
353 if (tmp2) 385 if (tmp2)
354 rb_set_parent(tmp2, parent); 386 rb_set_parent(tmp2, parent);