aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorRoss Zwisler <ross.zwisler@linux.intel.com>2018-05-18 19:09:01 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-05-18 20:17:12 -0400
commitfd8f58c40b703e47697c9f12bc16c31f14c161f1 (patch)
treed93502102793c414d75fd915edffdf54fb72592c /tools
parent3e252fa7d4f711798e7a3f5ff2d7b62f0e2987ce (diff)
radix tree test suite: multi-order iteration race
Add a test which shows a race in the multi-order iteration code. This test reliably hits the race in under a second on my machine, and is the result of a real bug report against kernel a production v4.15 based kernel (4.15.6-300.fc27.x86_64). With a real kernel this issue is hit when using order 9 PMD DAX radix tree entries. The race has to do with how we tear down multi-order sibling entries when we are removing an item from the tree. Remember that an order 2 entry looks like this: struct radix_tree_node.slots[] = [entry][sibling][sibling][sibling] where 'entry' is in some slot in the struct radix_tree_node, and the three slots following 'entry' contain sibling pointers which point back to 'entry.' When we delete 'entry' from the tree, we call : radix_tree_delete() radix_tree_delete_item() __radix_tree_delete() replace_slot() replace_slot() first removes the siblings in order from the first to the last, then at then replaces 'entry' with NULL. This means that for a brief period of time we end up with one or more of the siblings removed, so: struct radix_tree_node.slots[] = [entry][NULL][sibling][sibling] This causes an issue if you have a reader iterating over the slots in the tree via radix_tree_for_each_slot() while only under rcu_read_lock()/rcu_read_unlock() protection. This is a common case in mm/filemap.c. The issue is that when __radix_tree_next_slot() => skip_siblings() tries to skip over the sibling entries in the slots, it currently does so with an exact match on the slot directly preceding our current slot. Normally this works: V preceding slot struct radix_tree_node.slots[] = [entry][sibling][sibling][sibling] ^ current slot This lets you find the first sibling, and you skip them all in order. But in the case where one of the siblings is NULL, that slot is skipped and then our sibling detection is interrupted: V preceding slot struct radix_tree_node.slots[] = [entry][NULL][sibling][sibling] ^ current slot This means that the sibling pointers aren't recognized since they point all the way back to 'entry', so we think that they are normal internal radix tree pointers. This causes us to think we need to walk down to a struct radix_tree_node starting at the address of 'entry'. In a real running kernel this will crash the thread with a GP fault when you try and dereference the slots in your broken node starting at 'entry'. In the radix tree test suite this will be caught by the address sanitizer: ==27063==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x60c0008ae400 at pc 0x00000040ce4f bp 0x7fa89b8fcad0 sp 0x7fa89b8fcac0 READ of size 8 at 0x60c0008ae400 thread T3 #0 0x40ce4e in __radix_tree_next_slot /home/rzwisler/project/linux/tools/testing/radix-tree/radix-tree.c:1660 #1 0x4022cc in radix_tree_next_slot linux/../../../../include/linux/radix-tree.h:567 #2 0x4022cc in iterator_func /home/rzwisler/project/linux/tools/testing/radix-tree/multiorder.c:655 #3 0x7fa8a088d50a in start_thread (/lib64/libpthread.so.0+0x750a) #4 0x7fa8a03bd16e in clone (/lib64/libc.so.6+0xf516e) Link: http://lkml.kernel.org/r/20180503192430.7582-5-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Christoph Hellwig <hch@lst.de> Cc: CR, Sapthagirish <sapthagirish.cr@intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Jan Kara <jack@suse.cz> Cc: Matthew Wilcox <willy@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/radix-tree/multiorder.c63
-rw-r--r--tools/testing/radix-tree/test.h1
2 files changed, 64 insertions, 0 deletions
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 59245b3d587c..7bf405638b0b 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -16,6 +16,7 @@
16#include <linux/radix-tree.h> 16#include <linux/radix-tree.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <pthread.h>
19 20
20#include "test.h" 21#include "test.h"
21 22
@@ -624,6 +625,67 @@ static void multiorder_account(void)
624 item_kill_tree(&tree); 625 item_kill_tree(&tree);
625} 626}
626 627
628bool stop_iteration = false;
629
630static void *creator_func(void *ptr)
631{
632 /* 'order' is set up to ensure we have sibling entries */
633 unsigned int order = RADIX_TREE_MAP_SHIFT - 1;
634 struct radix_tree_root *tree = ptr;
635 int i;
636
637 for (i = 0; i < 10000; i++) {
638 item_insert_order(tree, 0, order);
639 item_delete_rcu(tree, 0);
640 }
641
642 stop_iteration = true;
643 return NULL;
644}
645
646static void *iterator_func(void *ptr)
647{
648 struct radix_tree_root *tree = ptr;
649 struct radix_tree_iter iter;
650 struct item *item;
651 void **slot;
652
653 while (!stop_iteration) {
654 rcu_read_lock();
655 radix_tree_for_each_slot(slot, tree, &iter, 0) {
656 item = radix_tree_deref_slot(slot);
657
658 if (!item)
659 continue;
660 if (radix_tree_deref_retry(item)) {
661 slot = radix_tree_iter_retry(&iter);
662 continue;
663 }
664
665 item_sanity(item, iter.index);
666 }
667 rcu_read_unlock();
668 }
669 return NULL;
670}
671
672static void multiorder_iteration_race(void)
673{
674 const int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
675 pthread_t worker_thread[num_threads];
676 RADIX_TREE(tree, GFP_KERNEL);
677 int i;
678
679 pthread_create(&worker_thread[0], NULL, &creator_func, &tree);
680 for (i = 1; i < num_threads; i++)
681 pthread_create(&worker_thread[i], NULL, &iterator_func, &tree);
682
683 for (i = 0; i < num_threads; i++)
684 pthread_join(worker_thread[i], NULL);
685
686 item_kill_tree(&tree);
687}
688
627void multiorder_checks(void) 689void multiorder_checks(void)
628{ 690{
629 int i; 691 int i;
@@ -644,6 +706,7 @@ void multiorder_checks(void)
644 multiorder_join(); 706 multiorder_join();
645 multiorder_split(); 707 multiorder_split();
646 multiorder_account(); 708 multiorder_account();
709 multiorder_iteration_race();
647 710
648 radix_tree_cpu_dead(0); 711 radix_tree_cpu_dead(0);
649} 712}
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 8cf4a7a7f94c..31f1d9b6f506 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -13,6 +13,7 @@ struct item {
13struct item *item_create(unsigned long index, unsigned int order); 13struct item *item_create(unsigned long index, unsigned int order);
14int __item_insert(struct radix_tree_root *root, struct item *item); 14int __item_insert(struct radix_tree_root *root, struct item *item);
15int item_insert(struct radix_tree_root *root, unsigned long index); 15int item_insert(struct radix_tree_root *root, unsigned long index);
16void item_sanity(struct item *item, unsigned long index);
16int item_insert_order(struct radix_tree_root *root, unsigned long index, 17int item_insert_order(struct radix_tree_root *root, unsigned long index,
17 unsigned order); 18 unsigned order);
18int item_delete(struct radix_tree_root *root, unsigned long index); 19int item_delete(struct radix_tree_root *root, unsigned long index);