summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2015-04-25 15:48:29 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2015-05-27 15:58:02 -0400
commit5af4692a75daf08dddc93dbb4cd2a1b3d3b617af (patch)
treecd86327aa73bd38e2adbd546c0eeb3c512da1f5d
parent81e701e4376232b2779f52f15e3b7413131bd8e4 (diff)
smp: Make control dependencies work on Alpha, improve documentation
The current formulation of control dependencies fails on DEC Alpha, which does not respect dependencies of any kind unless an explicit memory barrier is provided. This means that the current fomulation of control dependencies fails on Alpha. This commit therefore creates a READ_ONCE_CTRL() that has the same overhead on non-Alpha systems, but causes Alpha to produce the needed ordering. This commit also applies READ_ONCE_CTRL() to the one known use of control dependencies. Use of READ_ONCE_CTRL() also has the beneficial effect of adding a bit of self-documentation to control dependencies. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-rw-r--r--Documentation/memory-barriers.txt55
-rw-r--r--include/linux/compiler.h16
-rw-r--r--kernel/events/ring_buffer.c2
3 files changed, 50 insertions, 23 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f95746189b5d..a3014bcc5b08 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -617,16 +617,16 @@ case what's actually required is:
617However, stores are not speculated. This means that ordering -is- provided 617However, stores are not speculated. This means that ordering -is- provided
618for load-store control dependencies, as in the following example: 618for load-store control dependencies, as in the following example:
619 619
620 q = ACCESS_ONCE(a); 620 q = READ_ONCE_CTRL(a);
621 if (q) { 621 if (q) {
622 ACCESS_ONCE(b) = p; 622 ACCESS_ONCE(b) = p;
623 } 623 }
624 624
625Control dependencies pair normally with other types of barriers. 625Control dependencies pair normally with other types of barriers. That
626That said, please note that ACCESS_ONCE() is not optional! Without the 626said, please note that READ_ONCE_CTRL() is not optional! Without the
627ACCESS_ONCE(), might combine the load from 'a' with other loads from 627READ_ONCE_CTRL(), the compiler might combine the load from 'a' with
628'a', and the store to 'b' with other stores to 'b', with possible highly 628other loads from 'a', and the store to 'b' with other stores to 'b',
629counterintuitive effects on ordering. 629with possible highly counterintuitive effects on ordering.
630 630
631Worse yet, if the compiler is able to prove (say) that the value of 631Worse yet, if the compiler is able to prove (say) that the value of
632variable 'a' is always non-zero, it would be well within its rights 632variable 'a' is always non-zero, it would be well within its rights
@@ -636,12 +636,15 @@ as follows:
636 q = a; 636 q = a;
637 b = p; /* BUG: Compiler and CPU can both reorder!!! */ 637 b = p; /* BUG: Compiler and CPU can both reorder!!! */
638 638
639So don't leave out the ACCESS_ONCE(). 639Finally, the READ_ONCE_CTRL() includes an smp_read_barrier_depends()
640that DEC Alpha needs in order to respect control depedencies.
641
642So don't leave out the READ_ONCE_CTRL().
640 643
641It is tempting to try to enforce ordering on identical stores on both 644It is tempting to try to enforce ordering on identical stores on both
642branches of the "if" statement as follows: 645branches of the "if" statement as follows:
643 646
644 q = ACCESS_ONCE(a); 647 q = READ_ONCE_CTRL(a);
645 if (q) { 648 if (q) {
646 barrier(); 649 barrier();
647 ACCESS_ONCE(b) = p; 650 ACCESS_ONCE(b) = p;
@@ -655,7 +658,7 @@ branches of the "if" statement as follows:
655Unfortunately, current compilers will transform this as follows at high 658Unfortunately, current compilers will transform this as follows at high
656optimization levels: 659optimization levels:
657 660
658 q = ACCESS_ONCE(a); 661 q = READ_ONCE_CTRL(a);
659 barrier(); 662 barrier();
660 ACCESS_ONCE(b) = p; /* BUG: No ordering vs. load from a!!! */ 663 ACCESS_ONCE(b) = p; /* BUG: No ordering vs. load from a!!! */
661 if (q) { 664 if (q) {
@@ -685,7 +688,7 @@ memory barriers, for example, smp_store_release():
685In contrast, without explicit memory barriers, two-legged-if control 688In contrast, without explicit memory barriers, two-legged-if control
686ordering is guaranteed only when the stores differ, for example: 689ordering is guaranteed only when the stores differ, for example:
687 690
688 q = ACCESS_ONCE(a); 691 q = READ_ONCE_CTRL(a);
689 if (q) { 692 if (q) {
690 ACCESS_ONCE(b) = p; 693 ACCESS_ONCE(b) = p;
691 do_something(); 694 do_something();
@@ -694,14 +697,14 @@ ordering is guaranteed only when the stores differ, for example:
694 do_something_else(); 697 do_something_else();
695 } 698 }
696 699
697The initial ACCESS_ONCE() is still required to prevent the compiler from 700The initial READ_ONCE_CTRL() is still required to prevent the compiler
698proving the value of 'a'. 701from proving the value of 'a'.
699 702
700In addition, you need to be careful what you do with the local variable 'q', 703In addition, you need to be careful what you do with the local variable 'q',
701otherwise the compiler might be able to guess the value and again remove 704otherwise the compiler might be able to guess the value and again remove
702the needed conditional. For example: 705the needed conditional. For example:
703 706
704 q = ACCESS_ONCE(a); 707 q = READ_ONCE_CTRL(a);
705 if (q % MAX) { 708 if (q % MAX) {
706 ACCESS_ONCE(b) = p; 709 ACCESS_ONCE(b) = p;
707 do_something(); 710 do_something();
@@ -714,7 +717,7 @@ If MAX is defined to be 1, then the compiler knows that (q % MAX) is
714equal to zero, in which case the compiler is within its rights to 717equal to zero, in which case the compiler is within its rights to
715transform the above code into the following: 718transform the above code into the following:
716 719
717 q = ACCESS_ONCE(a); 720 q = READ_ONCE_CTRL(a);
718 ACCESS_ONCE(b) = p; 721 ACCESS_ONCE(b) = p;
719 do_something_else(); 722 do_something_else();
720 723
@@ -725,7 +728,7 @@ is gone, and the barrier won't bring it back. Therefore, if you are
725relying on this ordering, you should make sure that MAX is greater than 728relying on this ordering, you should make sure that MAX is greater than
726one, perhaps as follows: 729one, perhaps as follows:
727 730
728 q = ACCESS_ONCE(a); 731 q = READ_ONCE_CTRL(a);
729 BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */ 732 BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
730 if (q % MAX) { 733 if (q % MAX) {
731 ACCESS_ONCE(b) = p; 734 ACCESS_ONCE(b) = p;
@@ -742,14 +745,15 @@ of the 'if' statement.
742You must also be careful not to rely too much on boolean short-circuit 745You must also be careful not to rely too much on boolean short-circuit
743evaluation. Consider this example: 746evaluation. Consider this example:
744 747
745 q = ACCESS_ONCE(a); 748 q = READ_ONCE_CTRL(a);
746 if (a || 1 > 0) 749 if (a || 1 > 0)
747 ACCESS_ONCE(b) = 1; 750 ACCESS_ONCE(b) = 1;
748 751
749Because the second condition is always true, the compiler can transform 752Because the first condition cannot fault and the second condition is
750this example as following, defeating control dependency: 753always true, the compiler can transform this example as following,
754defeating control dependency:
751 755
752 q = ACCESS_ONCE(a); 756 q = READ_ONCE_CTRL(a);
753 ACCESS_ONCE(b) = 1; 757 ACCESS_ONCE(b) = 1;
754 758
755This example underscores the need to ensure that the compiler cannot 759This example underscores the need to ensure that the compiler cannot
@@ -762,8 +766,8 @@ demonstrated by two related examples, with the initial values of
762x and y both being zero: 766x and y both being zero:
763 767
764 CPU 0 CPU 1 768 CPU 0 CPU 1
765 ===================== ===================== 769 ======================= =======================
766 r1 = ACCESS_ONCE(x); r2 = ACCESS_ONCE(y); 770 r1 = READ_ONCE_CTRL(x); r2 = READ_ONCE_CTRL(y);
767 if (r1 > 0) if (r2 > 0) 771 if (r1 > 0) if (r2 > 0)
768 ACCESS_ONCE(y) = 1; ACCESS_ONCE(x) = 1; 772 ACCESS_ONCE(y) = 1; ACCESS_ONCE(x) = 1;
769 773
@@ -783,7 +787,8 @@ But because control dependencies do -not- provide transitivity, the above
783assertion can fail after the combined three-CPU example completes. If you 787assertion can fail after the combined three-CPU example completes. If you
784need the three-CPU example to provide ordering, you will need smp_mb() 788need the three-CPU example to provide ordering, you will need smp_mb()
785between the loads and stores in the CPU 0 and CPU 1 code fragments, 789between the loads and stores in the CPU 0 and CPU 1 code fragments,
786that is, just before or just after the "if" statements. 790that is, just before or just after the "if" statements. Furthermore,
791the original two-CPU example is very fragile and should be avoided.
787 792
788These two examples are the LB and WWC litmus tests from this paper: 793These two examples are the LB and WWC litmus tests from this paper:
789http://www.cl.cam.ac.uk/users/pes20/ppc-supplemental/test6.pdf and this 794http://www.cl.cam.ac.uk/users/pes20/ppc-supplemental/test6.pdf and this
@@ -791,6 +796,12 @@ site: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html.
791 796
792In summary: 797In summary:
793 798
799 (*) Control dependencies must be headed by READ_ONCE_CTRL().
800 Or, as a much less preferable alternative, interpose
801 be headed by READ_ONCE() or an ACCESS_ONCE() read and must
802 have smp_read_barrier_depends() between this read and the
803 control-dependent write.
804
794 (*) Control dependencies can order prior loads against later stores. 805 (*) Control dependencies can order prior loads against later stores.
795 However, they do -not- guarantee any other sort of ordering: 806 However, they do -not- guarantee any other sort of ordering:
796 Not prior loads against later loads, nor prior stores against 807 Not prior loads against later loads, nor prior stores against
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 867722591be2..5d66777914db 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -252,6 +252,22 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
252#define WRITE_ONCE(x, val) \ 252#define WRITE_ONCE(x, val) \
253 ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; }) 253 ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
254 254
255/**
256 * READ_ONCE_CTRL - Read a value heading a control dependency
257 * @x: The value to be read, heading the control dependency
258 *
259 * Control dependencies are tricky. See Documentation/memory-barriers.txt
260 * for important information on how to use them. Note that in many cases,
261 * use of smp_load_acquire() will be much simpler. Control dependencies
262 * should be avoided except on the hottest of hotpaths.
263 */
264#define READ_ONCE_CTRL(x) \
265({ \
266 typeof(x) __val = READ_ONCE(x); \
267 smp_read_barrier_depends(); /* Enforce control dependency. */ \
268 __val; \
269})
270
255#endif /* __KERNEL__ */ 271#endif /* __KERNEL__ */
256 272
257#endif /* __ASSEMBLY__ */ 273#endif /* __ASSEMBLY__ */
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 232f00f273cb..17fcb73c4a50 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -141,7 +141,7 @@ int perf_output_begin(struct perf_output_handle *handle,
141 perf_output_get_handle(handle); 141 perf_output_get_handle(handle);
142 142
143 do { 143 do {
144 tail = ACCESS_ONCE(rb->user_page->data_tail); 144 tail = READ_ONCE_CTRL(rb->user_page->data_tail);
145 offset = head = local_read(&rb->head); 145 offset = head = local_read(&rb->head);
146 if (!rb->overwrite && 146 if (!rb->overwrite &&
147 unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size)) 147 unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))