aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/memory-barriers.txt922
-rw-r--r--Documentation/robust-futex-ABI.txt4
-rw-r--r--MAINTAINERS5
-rw-r--r--arch/alpha/include/asm/barrier.h25
-rw-r--r--arch/arc/include/asm/Kbuild1
-rw-r--r--arch/arc/include/asm/atomic.h5
-rw-r--r--arch/arc/include/asm/barrier.h5
-rw-r--r--arch/arm/include/asm/barrier.h15
-rw-r--r--arch/arm64/include/asm/barrier.h50
-rw-r--r--arch/avr32/include/asm/barrier.h17
-rw-r--r--arch/blackfin/include/asm/barrier.h18
-rw-r--r--arch/cris/include/asm/Kbuild1
-rw-r--r--arch/cris/include/asm/barrier.h25
-rw-r--r--arch/frv/include/asm/barrier.h8
-rw-r--r--arch/hexagon/include/asm/Kbuild1
-rw-r--r--arch/hexagon/include/asm/atomic.h6
-rw-r--r--arch/hexagon/include/asm/barrier.h4
-rw-r--r--arch/ia64/include/asm/barrier.h23
-rw-r--r--arch/m32r/include/asm/barrier.h80
-rw-r--r--arch/m68k/include/asm/barrier.h14
-rw-r--r--arch/metag/include/asm/barrier.h15
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/barrier.h27
-rw-r--r--arch/mips/include/asm/barrier.h15
-rw-r--r--arch/mn10300/include/asm/Kbuild1
-rw-r--r--arch/mn10300/include/asm/barrier.h37
-rw-r--r--arch/parisc/include/asm/Kbuild1
-rw-r--r--arch/parisc/include/asm/barrier.h35
-rw-r--r--arch/powerpc/include/asm/barrier.h21
-rw-r--r--arch/powerpc/include/asm/spinlock.h2
-rw-r--r--arch/s390/include/asm/barrier.h15
-rw-r--r--arch/score/include/asm/Kbuild1
-rw-r--r--arch/score/include/asm/barrier.h16
-rw-r--r--arch/sh/include/asm/barrier.h21
-rw-r--r--arch/sparc/include/asm/barrier_32.h12
-rw-r--r--arch/sparc/include/asm/barrier_64.h15
-rw-r--r--arch/tile/include/asm/barrier.h68
-rw-r--r--arch/unicore32/include/asm/barrier.h11
-rw-r--r--arch/x86/include/asm/barrier.h43
-rw-r--r--arch/xtensa/include/asm/barrier.h9
-rw-r--r--include/asm-generic/barrier.h55
-rw-r--r--include/linux/compiler.h9
-rw-r--r--include/linux/spinlock.h10
-rw-r--r--kernel/futex.c201
-rw-r--r--kernel/locking/lockdep.c4
-rw-r--r--kernel/locking/mutex-debug.c7
-rw-r--r--kernel/rcu/tree.c18
-rw-r--r--kernel/rcu/tree_plugin.h13
-rw-r--r--kernel/softirq.c49
-rw-r--r--tools/lib/lockdep/Makefile251
-rw-r--r--tools/lib/lockdep/common.c33
-rw-r--r--tools/lib/lockdep/include/liblockdep/common.h50
-rw-r--r--tools/lib/lockdep/include/liblockdep/mutex.h70
-rw-r--r--tools/lib/lockdep/include/liblockdep/rwlock.h86
-rwxr-xr-xtools/lib/lockdep/lockdep3
-rw-r--r--tools/lib/lockdep/lockdep.c2
-rw-r--r--tools/lib/lockdep/lockdep_internals.h1
-rw-r--r--tools/lib/lockdep/lockdep_states.h1
-rw-r--r--tools/lib/lockdep/preload.c447
-rw-r--r--tools/lib/lockdep/rbtree.c1
-rw-r--r--tools/lib/lockdep/run_tests.sh27
-rw-r--r--tools/lib/lockdep/tests/AA.c13
-rw-r--r--tools/lib/lockdep/tests/ABBA.c13
-rw-r--r--tools/lib/lockdep/tests/ABBCCA.c15
-rw-r--r--tools/lib/lockdep/tests/ABBCCDDA.c17
-rw-r--r--tools/lib/lockdep/tests/ABCABC.c15
-rw-r--r--tools/lib/lockdep/tests/ABCDBCDA.c17
-rw-r--r--tools/lib/lockdep/tests/ABCDBDDA.c17
-rw-r--r--tools/lib/lockdep/tests/WW.c13
-rw-r--r--tools/lib/lockdep/tests/common.h12
-rw-r--r--tools/lib/lockdep/tests/unlock_balance.c12
-rw-r--r--tools/lib/lockdep/uinclude/asm/hweight.h3
-rw-r--r--tools/lib/lockdep/uinclude/asm/sections.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/bitops.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/compiler.h7
-rw-r--r--tools/lib/lockdep/uinclude/linux/debug_locks.h12
-rw-r--r--tools/lib/lockdep/uinclude/linux/delay.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/export.h7
-rw-r--r--tools/lib/lockdep/uinclude/linux/ftrace.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/gfp.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/hardirq.h11
-rw-r--r--tools/lib/lockdep/uinclude/linux/hash.h1
-rw-r--r--tools/lib/lockdep/uinclude/linux/interrupt.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/irqflags.h38
-rw-r--r--tools/lib/lockdep/uinclude/linux/kallsyms.h32
-rw-r--r--tools/lib/lockdep/uinclude/linux/kern_levels.h25
-rw-r--r--tools/lib/lockdep/uinclude/linux/kernel.h44
-rw-r--r--tools/lib/lockdep/uinclude/linux/kmemcheck.h8
-rw-r--r--tools/lib/lockdep/uinclude/linux/linkage.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/list.h1
-rw-r--r--tools/lib/lockdep/uinclude/linux/lockdep.h55
-rw-r--r--tools/lib/lockdep/uinclude/linux/module.h6
-rw-r--r--tools/lib/lockdep/uinclude/linux/mutex.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/poison.h1
-rw-r--r--tools/lib/lockdep/uinclude/linux/prefetch.h6
-rw-r--r--tools/lib/lockdep/uinclude/linux/proc_fs.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/rbtree.h1
-rw-r--r--tools/lib/lockdep/uinclude/linux/rbtree_augmented.h2
-rw-r--r--tools/lib/lockdep/uinclude/linux/rcu.h16
-rw-r--r--tools/lib/lockdep/uinclude/linux/seq_file.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/spinlock.h25
-rw-r--r--tools/lib/lockdep/uinclude/linux/stacktrace.h32
-rw-r--r--tools/lib/lockdep/uinclude/linux/stringify.h7
-rw-r--r--tools/lib/lockdep/uinclude/linux/types.h58
-rw-r--r--tools/lib/lockdep/uinclude/trace/events/lock.h3
105 files changed, 2825 insertions, 683 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index c8c42e64e953..102dc19c4119 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -194,18 +194,22 @@ There are some minimal guarantees that may be expected of a CPU:
194 (*) On any given CPU, dependent memory accesses will be issued in order, with 194 (*) On any given CPU, dependent memory accesses will be issued in order, with
195 respect to itself. This means that for: 195 respect to itself. This means that for:
196 196
197 Q = P; D = *Q; 197 ACCESS_ONCE(Q) = P; smp_read_barrier_depends(); D = ACCESS_ONCE(*Q);
198 198
199 the CPU will issue the following memory operations: 199 the CPU will issue the following memory operations:
200 200
201 Q = LOAD P, D = LOAD *Q 201 Q = LOAD P, D = LOAD *Q
202 202
203 and always in that order. 203 and always in that order. On most systems, smp_read_barrier_depends()
204 does nothing, but it is required for DEC Alpha. The ACCESS_ONCE()
205 is required to prevent compiler mischief. Please note that you
206 should normally use something like rcu_dereference() instead of
207 open-coding smp_read_barrier_depends().
204 208
205 (*) Overlapping loads and stores within a particular CPU will appear to be 209 (*) Overlapping loads and stores within a particular CPU will appear to be
206 ordered within that CPU. This means that for: 210 ordered within that CPU. This means that for:
207 211
208 a = *X; *X = b; 212 a = ACCESS_ONCE(*X); ACCESS_ONCE(*X) = b;
209 213
210 the CPU will only issue the following sequence of memory operations: 214 the CPU will only issue the following sequence of memory operations:
211 215
@@ -213,7 +217,7 @@ There are some minimal guarantees that may be expected of a CPU:
213 217
214 And for: 218 And for:
215 219
216 *X = c; d = *X; 220 ACCESS_ONCE(*X) = c; d = ACCESS_ONCE(*X);
217 221
218 the CPU will only issue: 222 the CPU will only issue:
219 223
@@ -224,6 +228,12 @@ There are some minimal guarantees that may be expected of a CPU:
224 228
225And there are a number of things that _must_ or _must_not_ be assumed: 229And there are a number of things that _must_ or _must_not_ be assumed:
226 230
231 (*) It _must_not_ be assumed that the compiler will do what you want with
232 memory references that are not protected by ACCESS_ONCE(). Without
233 ACCESS_ONCE(), the compiler is within its rights to do all sorts
234 of "creative" transformations, which are covered in the Compiler
235 Barrier section.
236
227 (*) It _must_not_ be assumed that independent loads and stores will be issued 237 (*) It _must_not_ be assumed that independent loads and stores will be issued
228 in the order given. This means that for: 238 in the order given. This means that for:
229 239
@@ -371,33 +381,44 @@ Memory barriers come in four basic varieties:
371 381
372And a couple of implicit varieties: 382And a couple of implicit varieties:
373 383
374 (5) LOCK operations. 384 (5) ACQUIRE operations.
375 385
376 This acts as a one-way permeable barrier. It guarantees that all memory 386 This acts as a one-way permeable barrier. It guarantees that all memory
377 operations after the LOCK operation will appear to happen after the LOCK 387 operations after the ACQUIRE operation will appear to happen after the
378 operation with respect to the other components of the system. 388 ACQUIRE operation with respect to the other components of the system.
389 ACQUIRE operations include LOCK operations and smp_load_acquire()
390 operations.
379 391
380 Memory operations that occur before a LOCK operation may appear to happen 392 Memory operations that occur before an ACQUIRE operation may appear to
381 after it completes. 393 happen after it completes.
382 394
383 A LOCK operation should almost always be paired with an UNLOCK operation. 395 An ACQUIRE operation should almost always be paired with a RELEASE
396 operation.
384 397
385 398
386 (6) UNLOCK operations. 399 (6) RELEASE operations.
387 400
388 This also acts as a one-way permeable barrier. It guarantees that all 401 This also acts as a one-way permeable barrier. It guarantees that all
389 memory operations before the UNLOCK operation will appear to happen before 402 memory operations before the RELEASE operation will appear to happen
390 the UNLOCK operation with respect to the other components of the system. 403 before the RELEASE operation with respect to the other components of the
404 system. RELEASE operations include UNLOCK operations and
405 smp_store_release() operations.
391 406
392 Memory operations that occur after an UNLOCK operation may appear to 407 Memory operations that occur after a RELEASE operation may appear to
393 happen before it completes. 408 happen before it completes.
394 409
395 LOCK and UNLOCK operations are guaranteed to appear with respect to each 410 The use of ACQUIRE and RELEASE operations generally precludes the need
396 other strictly in the order specified. 411 for other sorts of memory barrier (but note the exceptions mentioned in
412 the subsection "MMIO write barrier"). In addition, a RELEASE+ACQUIRE
413 pair is -not- guaranteed to act as a full memory barrier. However, after
414 an ACQUIRE on a given variable, all memory accesses preceding any prior
415 RELEASE on that same variable are guaranteed to be visible. In other
416 words, within a given variable's critical section, all accesses of all
417 previous critical sections for that variable are guaranteed to have
418 completed.
397 419
398 The use of LOCK and UNLOCK operations generally precludes the need for 420 This means that ACQUIRE acts as a minimal "acquire" operation and
399 other sorts of memory barrier (but note the exceptions mentioned in the 421 RELEASE acts as a minimal "release" operation.
400 subsection "MMIO write barrier").
401 422
402 423
403Memory barriers are only required where there's a possibility of interaction 424Memory barriers are only required where there's a possibility of interaction
@@ -450,14 +471,14 @@ The usage requirements of data dependency barriers are a little subtle, and
450it's not always obvious that they're needed. To illustrate, consider the 471it's not always obvious that they're needed. To illustrate, consider the
451following sequence of events: 472following sequence of events:
452 473
453 CPU 1 CPU 2 474 CPU 1 CPU 2
454 =============== =============== 475 =============== ===============
455 { A == 1, B == 2, C = 3, P == &A, Q == &C } 476 { A == 1, B == 2, C = 3, P == &A, Q == &C }
456 B = 4; 477 B = 4;
457 <write barrier> 478 <write barrier>
458 P = &B 479 ACCESS_ONCE(P) = &B
459 Q = P; 480 Q = ACCESS_ONCE(P);
460 D = *Q; 481 D = *Q;
461 482
462There's a clear data dependency here, and it would seem that by the end of the 483There's a clear data dependency here, and it would seem that by the end of the
463sequence, Q must be either &A or &B, and that: 484sequence, Q must be either &A or &B, and that:
@@ -477,15 +498,15 @@ Alpha).
477To deal with this, a data dependency barrier or better must be inserted 498To deal with this, a data dependency barrier or better must be inserted
478between the address load and the data load: 499between the address load and the data load:
479 500
480 CPU 1 CPU 2 501 CPU 1 CPU 2
481 =============== =============== 502 =============== ===============
482 { A == 1, B == 2, C = 3, P == &A, Q == &C } 503 { A == 1, B == 2, C = 3, P == &A, Q == &C }
483 B = 4; 504 B = 4;
484 <write barrier> 505 <write barrier>
485 P = &B 506 ACCESS_ONCE(P) = &B
486 Q = P; 507 Q = ACCESS_ONCE(P);
487 <data dependency barrier> 508 <data dependency barrier>
488 D = *Q; 509 D = *Q;
489 510
490This enforces the occurrence of one of the two implications, and prevents the 511This enforces the occurrence of one of the two implications, and prevents the
491third possibility from arising. 512third possibility from arising.
@@ -500,25 +521,26 @@ odd-numbered bank is idle, one can see the new value of the pointer P (&B),
500but the old value of the variable B (2). 521but the old value of the variable B (2).
501 522
502 523
503Another example of where data dependency barriers might by required is where a 524Another example of where data dependency barriers might be required is where a
504number is read from memory and then used to calculate the index for an array 525number is read from memory and then used to calculate the index for an array
505access: 526access:
506 527
507 CPU 1 CPU 2 528 CPU 1 CPU 2
508 =============== =============== 529 =============== ===============
509 { M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 } 530 { M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 }
510 M[1] = 4; 531 M[1] = 4;
511 <write barrier> 532 <write barrier>
512 P = 1 533 ACCESS_ONCE(P) = 1
513 Q = P; 534 Q = ACCESS_ONCE(P);
514 <data dependency barrier> 535 <data dependency barrier>
515 D = M[Q]; 536 D = M[Q];
516 537
517 538
518The data dependency barrier is very important to the RCU system, for example. 539The data dependency barrier is very important to the RCU system,
519See rcu_dereference() in include/linux/rcupdate.h. This permits the current 540for example. See rcu_assign_pointer() and rcu_dereference() in
520target of an RCU'd pointer to be replaced with a new modified target, without 541include/linux/rcupdate.h. This permits the current target of an RCU'd
521the replacement target appearing to be incompletely initialised. 542pointer to be replaced with a new modified target, without the replacement
543target appearing to be incompletely initialised.
522 544
523See also the subsection on "Cache Coherency" for a more thorough example. 545See also the subsection on "Cache Coherency" for a more thorough example.
524 546
@@ -530,24 +552,190 @@ A control dependency requires a full read memory barrier, not simply a data
530dependency barrier to make it work correctly. Consider the following bit of 552dependency barrier to make it work correctly. Consider the following bit of
531code: 553code:
532 554
533 q = &a; 555 q = ACCESS_ONCE(a);
534 if (p) { 556 if (q) {
535 <data dependency barrier> 557 <data dependency barrier> /* BUG: No data dependency!!! */
536 q = &b; 558 p = ACCESS_ONCE(b);
537 } 559 }
538 x = *q;
539 560
540This will not have the desired effect because there is no actual data 561This will not have the desired effect because there is no actual data
541dependency, but rather a control dependency that the CPU may short-circuit by 562dependency, but rather a control dependency that the CPU may short-circuit
542attempting to predict the outcome in advance. In such a case what's actually 563by attempting to predict the outcome in advance, so that other CPUs see
543required is: 564the load from b as having happened before the load from a. In such a
565case what's actually required is:
544 566
545 q = &a; 567 q = ACCESS_ONCE(a);
546 if (p) { 568 if (q) {
547 <read barrier> 569 <read barrier>
548 q = &b; 570 p = ACCESS_ONCE(b);
571 }
572
573However, stores are not speculated. This means that ordering -is- provided
574in the following example:
575
576 q = ACCESS_ONCE(a);
577 if (ACCESS_ONCE(q)) {
578 ACCESS_ONCE(b) = p;
579 }
580
581Please note that ACCESS_ONCE() is not optional! Without the ACCESS_ONCE(),
582the compiler is within its rights to transform this example:
583
584 q = a;
585 if (q) {
586 b = p; /* BUG: Compiler can reorder!!! */
587 do_something();
588 } else {
589 b = p; /* BUG: Compiler can reorder!!! */
590 do_something_else();
591 }
592
593into this, which of course defeats the ordering:
594
595 b = p;
596 q = a;
597 if (q)
598 do_something();
599 else
600 do_something_else();
601
602Worse yet, if the compiler is able to prove (say) that the value of
603variable 'a' is always non-zero, it would be well within its rights
604to optimize the original example by eliminating the "if" statement
605as follows:
606
607 q = a;
608 b = p; /* BUG: Compiler can reorder!!! */
609 do_something();
610
611The solution is again ACCESS_ONCE(), which preserves the ordering between
612the load from variable 'a' and the store to variable 'b':
613
614 q = ACCESS_ONCE(a);
615 if (q) {
616 ACCESS_ONCE(b) = p;
617 do_something();
618 } else {
619 ACCESS_ONCE(b) = p;
620 do_something_else();
621 }
622
623You could also use barrier() to prevent the compiler from moving
624the stores to variable 'b', but barrier() would not prevent the
625compiler from proving to itself that a==1 always, so ACCESS_ONCE()
626is also needed.
627
628It is important to note that control dependencies absolutely require a
629a conditional. For example, the following "optimized" version of
630the above example breaks ordering:
631
632 q = ACCESS_ONCE(a);
633 ACCESS_ONCE(b) = p; /* BUG: No ordering vs. load from a!!! */
634 if (q) {
635 /* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */
636 do_something();
637 } else {
638 /* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */
639 do_something_else();
549 } 640 }
550 x = *q; 641
642It is of course legal for the prior load to be part of the conditional,
643for example, as follows:
644
645 if (ACCESS_ONCE(a) > 0) {
646 ACCESS_ONCE(b) = q / 2;
647 do_something();
648 } else {
649 ACCESS_ONCE(b) = q / 3;
650 do_something_else();
651 }
652
653This will again ensure that the load from variable 'a' is ordered before the
654stores to variable 'b'.
655
656In addition, you need to be careful what you do with the local variable 'q',
657otherwise the compiler might be able to guess the value and again remove
658the needed conditional. For example:
659
660 q = ACCESS_ONCE(a);
661 if (q % MAX) {
662 ACCESS_ONCE(b) = p;
663 do_something();
664 } else {
665 ACCESS_ONCE(b) = p;
666 do_something_else();
667 }
668
669If MAX is defined to be 1, then the compiler knows that (q % MAX) is
670equal to zero, in which case the compiler is within its rights to
671transform the above code into the following:
672
673 q = ACCESS_ONCE(a);
674 ACCESS_ONCE(b) = p;
675 do_something_else();
676
677This transformation loses the ordering between the load from variable 'a'
678and the store to variable 'b'. If you are relying on this ordering, you
679should do something like the following:
680
681 q = ACCESS_ONCE(a);
682 BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
683 if (q % MAX) {
684 ACCESS_ONCE(b) = p;
685 do_something();
686 } else {
687 ACCESS_ONCE(b) = p;
688 do_something_else();
689 }
690
691Finally, control dependencies do -not- provide transitivity. This is
692demonstrated by two related examples:
693
694 CPU 0 CPU 1
695 ===================== =====================
696 r1 = ACCESS_ONCE(x); r2 = ACCESS_ONCE(y);
697 if (r1 >= 0) if (r2 >= 0)
698 ACCESS_ONCE(y) = 1; ACCESS_ONCE(x) = 1;
699
700 assert(!(r1 == 1 && r2 == 1));
701
702The above two-CPU example will never trigger the assert(). However,
703if control dependencies guaranteed transitivity (which they do not),
704then adding the following two CPUs would guarantee a related assertion:
705
706 CPU 2 CPU 3
707 ===================== =====================
708 ACCESS_ONCE(x) = 2; ACCESS_ONCE(y) = 2;
709
710 assert(!(r1 == 2 && r2 == 2 && x == 1 && y == 1)); /* FAILS!!! */
711
712But because control dependencies do -not- provide transitivity, the
713above assertion can fail after the combined four-CPU example completes.
714If you need the four-CPU example to provide ordering, you will need
715smp_mb() between the loads and stores in the CPU 0 and CPU 1 code fragments.
716
717In summary:
718
719 (*) Control dependencies can order prior loads against later stores.
720 However, they do -not- guarantee any other sort of ordering:
721 Not prior loads against later loads, nor prior stores against
722 later anything. If you need these other forms of ordering,
723 use smb_rmb(), smp_wmb(), or, in the case of prior stores and
724 later loads, smp_mb().
725
726 (*) Control dependencies require at least one run-time conditional
727 between the prior load and the subsequent store. If the compiler
728 is able to optimize the conditional away, it will have also
729 optimized away the ordering. Careful use of ACCESS_ONCE() can
730 help to preserve the needed conditional.
731
732 (*) Control dependencies require that the compiler avoid reordering the
733 dependency into nonexistence. Careful use of ACCESS_ONCE() or
734 barrier() can help to preserve your control dependency. Please
735 see the Compiler Barrier section for more information.
736
737 (*) Control dependencies do -not- provide transitivity. If you
738 need transitivity, use smp_mb().
551 739
552 740
553SMP BARRIER PAIRING 741SMP BARRIER PAIRING
@@ -561,23 +749,23 @@ barrier, though a general barrier would also be viable. Similarly a read
561barrier or a data dependency barrier should always be paired with at least an 749barrier or a data dependency barrier should always be paired with at least an
562write barrier, though, again, a general barrier is viable: 750write barrier, though, again, a general barrier is viable:
563 751
564 CPU 1 CPU 2 752 CPU 1 CPU 2
565 =============== =============== 753 =============== ===============
566 a = 1; 754 ACCESS_ONCE(a) = 1;
567 <write barrier> 755 <write barrier>
568 b = 2; x = b; 756 ACCESS_ONCE(b) = 2; x = ACCESS_ONCE(b);
569 <read barrier> 757 <read barrier>
570 y = a; 758 y = ACCESS_ONCE(a);
571 759
572Or: 760Or:
573 761
574 CPU 1 CPU 2 762 CPU 1 CPU 2
575 =============== =============================== 763 =============== ===============================
576 a = 1; 764 a = 1;
577 <write barrier> 765 <write barrier>
578 b = &a; x = b; 766 ACCESS_ONCE(b) = &a; x = ACCESS_ONCE(b);
579 <data dependency barrier> 767 <data dependency barrier>
580 y = *x; 768 y = *x;
581 769
582Basically, the read barrier always has to be there, even though it can be of 770Basically, the read barrier always has to be there, even though it can be of
583the "weaker" type. 771the "weaker" type.
@@ -586,13 +774,13 @@ the "weaker" type.
586match the loads after the read barrier or the data dependency barrier, and vice 774match the loads after the read barrier or the data dependency barrier, and vice
587versa: 775versa:
588 776
589 CPU 1 CPU 2 777 CPU 1 CPU 2
590 =============== =============== 778 =================== ===================
591 a = 1; }---- --->{ v = c 779 ACCESS_ONCE(a) = 1; }---- --->{ v = ACCESS_ONCE(c);
592 b = 2; } \ / { w = d 780 ACCESS_ONCE(b) = 2; } \ / { w = ACCESS_ONCE(d);
593 <write barrier> \ <read barrier> 781 <write barrier> \ <read barrier>
594 c = 3; } / \ { x = a; 782 ACCESS_ONCE(c) = 3; } / \ { x = ACCESS_ONCE(a);
595 d = 4; }---- --->{ y = b; 783 ACCESS_ONCE(d) = 4; }---- --->{ y = ACCESS_ONCE(b);
596 784
597 785
598EXAMPLES OF MEMORY BARRIER SEQUENCES 786EXAMPLES OF MEMORY BARRIER SEQUENCES
@@ -882,12 +1070,12 @@ cache it for later use.
882 1070
883Consider: 1071Consider:
884 1072
885 CPU 1 CPU 2 1073 CPU 1 CPU 2
886 ======================= ======================= 1074 ======================= =======================
887 LOAD B 1075 LOAD B
888 DIVIDE } Divide instructions generally 1076 DIVIDE } Divide instructions generally
889 DIVIDE } take a long time to perform 1077 DIVIDE } take a long time to perform
890 LOAD A 1078 LOAD A
891 1079
892Which might appear as this: 1080Which might appear as this:
893 1081
@@ -910,13 +1098,13 @@ Which might appear as this:
910Placing a read barrier or a data dependency barrier just before the second 1098Placing a read barrier or a data dependency barrier just before the second
911load: 1099load:
912 1100
913 CPU 1 CPU 2 1101 CPU 1 CPU 2
914 ======================= ======================= 1102 ======================= =======================
915 LOAD B 1103 LOAD B
916 DIVIDE 1104 DIVIDE
917 DIVIDE 1105 DIVIDE
918 <read barrier> 1106 <read barrier>
919 LOAD A 1107 LOAD A
920 1108
921will force any value speculatively obtained to be reconsidered to an extent 1109will force any value speculatively obtained to be reconsidered to an extent
922dependent on the type of barrier used. If there was no change made to the 1110dependent on the type of barrier used. If there was no change made to the
@@ -1042,10 +1230,277 @@ compiler from moving the memory accesses either side of it to the other side:
1042 1230
1043 barrier(); 1231 barrier();
1044 1232
1045This is a general barrier - lesser varieties of compiler barrier do not exist. 1233This is a general barrier -- there are no read-read or write-write variants
1234of barrier(). However, ACCESS_ONCE() can be thought of as a weak form
1235for barrier() that affects only the specific accesses flagged by the
1236ACCESS_ONCE().
1237
1238The barrier() function has the following effects:
1239
1240 (*) Prevents the compiler from reordering accesses following the
1241 barrier() to precede any accesses preceding the barrier().
1242 One example use for this property is to ease communication between
1243 interrupt-handler code and the code that was interrupted.
1244
1245 (*) Within a loop, forces the compiler to load the variables used
1246 in that loop's conditional on each pass through that loop.
1247
1248The ACCESS_ONCE() function can prevent any number of optimizations that,
1249while perfectly safe in single-threaded code, can be fatal in concurrent
1250code. Here are some examples of these sorts of optimizations:
1251
1252 (*) The compiler is within its rights to merge successive loads from
1253 the same variable. Such merging can cause the compiler to "optimize"
1254 the following code:
1255
1256 while (tmp = a)
1257 do_something_with(tmp);
1258
1259 into the following code, which, although in some sense legitimate
1260 for single-threaded code, is almost certainly not what the developer
1261 intended:
1262
1263 if (tmp = a)
1264 for (;;)
1265 do_something_with(tmp);
1266
1267 Use ACCESS_ONCE() to prevent the compiler from doing this to you:
1268
1269 while (tmp = ACCESS_ONCE(a))
1270 do_something_with(tmp);
1271
1272 (*) The compiler is within its rights to reload a variable, for example,
1273 in cases where high register pressure prevents the compiler from
1274 keeping all data of interest in registers. The compiler might
1275 therefore optimize the variable 'tmp' out of our previous example:
1276
1277 while (tmp = a)
1278 do_something_with(tmp);
1279
1280 This could result in the following code, which is perfectly safe in
1281 single-threaded code, but can be fatal in concurrent code:
1282
1283 while (a)
1284 do_something_with(a);
1285
1286 For example, the optimized version of this code could result in
1287 passing a zero to do_something_with() in the case where the variable
1288 a was modified by some other CPU between the "while" statement and
1289 the call to do_something_with().
1290
1291 Again, use ACCESS_ONCE() to prevent the compiler from doing this:
1292
1293 while (tmp = ACCESS_ONCE(a))
1294 do_something_with(tmp);
1295
1296 Note that if the compiler runs short of registers, it might save
1297 tmp onto the stack. The overhead of this saving and later restoring
1298 is why compilers reload variables. Doing so is perfectly safe for
1299 single-threaded code, so you need to tell the compiler about cases
1300 where it is not safe.
1301
1302 (*) The compiler is within its rights to omit a load entirely if it knows
1303 what the value will be. For example, if the compiler can prove that
1304 the value of variable 'a' is always zero, it can optimize this code:
1305
1306 while (tmp = a)
1307 do_something_with(tmp);
1046 1308
1047The compiler barrier has no direct effect on the CPU, which may then reorder 1309 Into this:
1048things however it wishes. 1310
1311 do { } while (0);
1312
1313 This transformation is a win for single-threaded code because it gets
1314 rid of a load and a branch. The problem is that the compiler will
1315 carry out its proof assuming that the current CPU is the only one
1316 updating variable 'a'. If variable 'a' is shared, then the compiler's
1317 proof will be erroneous. Use ACCESS_ONCE() to tell the compiler
1318 that it doesn't know as much as it thinks it does:
1319
1320 while (tmp = ACCESS_ONCE(a))
1321 do_something_with(tmp);
1322
1323 But please note that the compiler is also closely watching what you
1324 do with the value after the ACCESS_ONCE(). For example, suppose you
1325 do the following and MAX is a preprocessor macro with the value 1:
1326
1327 while ((tmp = ACCESS_ONCE(a)) % MAX)
1328 do_something_with(tmp);
1329
1330 Then the compiler knows that the result of the "%" operator applied
1331 to MAX will always be zero, again allowing the compiler to optimize
1332 the code into near-nonexistence. (It will still load from the
1333 variable 'a'.)
1334
1335 (*) Similarly, the compiler is within its rights to omit a store entirely
1336 if it knows that the variable already has the value being stored.
1337 Again, the compiler assumes that the current CPU is the only one
1338 storing into the variable, which can cause the compiler to do the
1339 wrong thing for shared variables. For example, suppose you have
1340 the following:
1341
1342 a = 0;
1343 /* Code that does not store to variable a. */
1344 a = 0;
1345
1346 The compiler sees that the value of variable 'a' is already zero, so
1347 it might well omit the second store. This would come as a fatal
1348 surprise if some other CPU might have stored to variable 'a' in the
1349 meantime.
1350
1351 Use ACCESS_ONCE() to prevent the compiler from making this sort of
1352 wrong guess:
1353
1354 ACCESS_ONCE(a) = 0;
1355 /* Code that does not store to variable a. */
1356 ACCESS_ONCE(a) = 0;
1357
1358 (*) The compiler is within its rights to reorder memory accesses unless
1359 you tell it not to. For example, consider the following interaction
1360 between process-level code and an interrupt handler:
1361
1362 void process_level(void)
1363 {
1364 msg = get_message();
1365 flag = true;
1366 }
1367
1368 void interrupt_handler(void)
1369 {
1370 if (flag)
1371 process_message(msg);
1372 }
1373
1374 There is nothing to prevent the the compiler from transforming
1375 process_level() to the following, in fact, this might well be a
1376 win for single-threaded code:
1377
1378 void process_level(void)
1379 {
1380 flag = true;
1381 msg = get_message();
1382 }
1383
1384 If the interrupt occurs between these two statement, then
1385 interrupt_handler() might be passed a garbled msg. Use ACCESS_ONCE()
1386 to prevent this as follows:
1387
1388 void process_level(void)
1389 {
1390 ACCESS_ONCE(msg) = get_message();
1391 ACCESS_ONCE(flag) = true;
1392 }
1393
1394 void interrupt_handler(void)
1395 {
1396 if (ACCESS_ONCE(flag))
1397 process_message(ACCESS_ONCE(msg));
1398 }
1399
1400 Note that the ACCESS_ONCE() wrappers in interrupt_handler()
1401 are needed if this interrupt handler can itself be interrupted
1402 by something that also accesses 'flag' and 'msg', for example,
1403 a nested interrupt or an NMI. Otherwise, ACCESS_ONCE() is not
1404 needed in interrupt_handler() other than for documentation purposes.
1405 (Note also that nested interrupts do not typically occur in modern
1406 Linux kernels, in fact, if an interrupt handler returns with
1407 interrupts enabled, you will get a WARN_ONCE() splat.)
1408
1409 You should assume that the compiler can move ACCESS_ONCE() past
1410 code not containing ACCESS_ONCE(), barrier(), or similar primitives.
1411
1412 This effect could also be achieved using barrier(), but ACCESS_ONCE()
1413 is more selective: With ACCESS_ONCE(), the compiler need only forget
1414 the contents of the indicated memory locations, while with barrier()
1415 the compiler must discard the value of all memory locations that
1416 it has currented cached in any machine registers. Of course,
1417 the compiler must also respect the order in which the ACCESS_ONCE()s
1418 occur, though the CPU of course need not do so.
1419
1420 (*) The compiler is within its rights to invent stores to a variable,
1421 as in the following example:
1422
1423 if (a)
1424 b = a;
1425 else
1426 b = 42;
1427
1428 The compiler might save a branch by optimizing this as follows:
1429
1430 b = 42;
1431 if (a)
1432 b = a;
1433
1434 In single-threaded code, this is not only safe, but also saves
1435 a branch. Unfortunately, in concurrent code, this optimization
1436 could cause some other CPU to see a spurious value of 42 -- even
1437 if variable 'a' was never zero -- when loading variable 'b'.
1438 Use ACCESS_ONCE() to prevent this as follows:
1439
1440 if (a)
1441 ACCESS_ONCE(b) = a;
1442 else
1443 ACCESS_ONCE(b) = 42;
1444
1445 The compiler can also invent loads. These are usually less
1446 damaging, but they can result in cache-line bouncing and thus in
1447 poor performance and scalability. Use ACCESS_ONCE() to prevent
1448 invented loads.
1449
1450 (*) For aligned memory locations whose size allows them to be accessed
1451 with a single memory-reference instruction, prevents "load tearing"
1452 and "store tearing," in which a single large access is replaced by
1453 multiple smaller accesses. For example, given an architecture having
1454 16-bit store instructions with 7-bit immediate fields, the compiler
1455 might be tempted to use two 16-bit store-immediate instructions to
1456 implement the following 32-bit store:
1457
1458 p = 0x00010002;
1459
1460 Please note that GCC really does use this sort of optimization,
1461 which is not surprising given that it would likely take more
1462 than two instructions to build the constant and then store it.
1463 This optimization can therefore be a win in single-threaded code.
1464 In fact, a recent bug (since fixed) caused GCC to incorrectly use
1465 this optimization in a volatile store. In the absence of such bugs,
1466 use of ACCESS_ONCE() prevents store tearing in the following example:
1467
1468 ACCESS_ONCE(p) = 0x00010002;
1469
1470 Use of packed structures can also result in load and store tearing,
1471 as in this example:
1472
1473 struct __attribute__((__packed__)) foo {
1474 short a;
1475 int b;
1476 short c;
1477 };
1478 struct foo foo1, foo2;
1479 ...
1480
1481 foo2.a = foo1.a;
1482 foo2.b = foo1.b;
1483 foo2.c = foo1.c;
1484
1485 Because there are no ACCESS_ONCE() wrappers and no volatile markings,
1486 the compiler would be well within its rights to implement these three
1487 assignment statements as a pair of 32-bit loads followed by a pair
1488 of 32-bit stores. This would result in load tearing on 'foo1.b'
1489 and store tearing on 'foo2.b'. ACCESS_ONCE() again prevents tearing
1490 in this example:
1491
1492 foo2.a = foo1.a;
1493 ACCESS_ONCE(foo2.b) = ACCESS_ONCE(foo1.b);
1494 foo2.c = foo1.c;
1495
1496All that aside, it is never necessary to use ACCESS_ONCE() on a variable
1497that has been marked volatile. For example, because 'jiffies' is marked
1498volatile, it is never necessary to say ACCESS_ONCE(jiffies). The reason
1499for this is that ACCESS_ONCE() is implemented as a volatile cast, which
1500has no effect when its argument is already marked volatile.
1501
1502Please note that these compiler barriers have no direct effect on the CPU,
1503which may then reorder things however it wishes.
1049 1504
1050 1505
1051CPU MEMORY BARRIERS 1506CPU MEMORY BARRIERS
@@ -1135,7 +1590,7 @@ There are some more advanced barrier functions:
1135 clear_bit( ... ); 1590 clear_bit( ... );
1136 1591
1137 This prevents memory operations before the clear leaking to after it. See 1592 This prevents memory operations before the clear leaking to after it. See
1138 the subsection on "Locking Functions" with reference to UNLOCK operation 1593 the subsection on "Locking Functions" with reference to RELEASE operation
1139 implications. 1594 implications.
1140 1595
1141 See Documentation/atomic_ops.txt for more information. See the "Atomic 1596 See Documentation/atomic_ops.txt for more information. See the "Atomic
@@ -1169,8 +1624,8 @@ provide more substantial guarantees, but these may not be relied upon outside
1169of arch specific code. 1624of arch specific code.
1170 1625
1171 1626
1172LOCKING FUNCTIONS 1627ACQUIRING FUNCTIONS
1173----------------- 1628-------------------
1174 1629
1175The Linux kernel has a number of locking constructs: 1630The Linux kernel has a number of locking constructs:
1176 1631
@@ -1181,65 +1636,107 @@ The Linux kernel has a number of locking constructs:
1181 (*) R/W semaphores 1636 (*) R/W semaphores
1182 (*) RCU 1637 (*) RCU
1183 1638
1184In all cases there are variants on "LOCK" operations and "UNLOCK" operations 1639In all cases there are variants on "ACQUIRE" operations and "RELEASE" operations
1185for each construct. These operations all imply certain barriers: 1640for each construct. These operations all imply certain barriers:
1186 1641
1187 (1) LOCK operation implication: 1642 (1) ACQUIRE operation implication:
1188 1643
1189 Memory operations issued after the LOCK will be completed after the LOCK 1644 Memory operations issued after the ACQUIRE will be completed after the
1190 operation has completed. 1645 ACQUIRE operation has completed.
1191 1646
1192 Memory operations issued before the LOCK may be completed after the LOCK 1647 Memory operations issued before the ACQUIRE may be completed after the
1193 operation has completed. 1648 ACQUIRE operation has completed. An smp_mb__before_spinlock(), combined
1649 with a following ACQUIRE, orders prior loads against subsequent stores and
1650 stores and prior stores against subsequent stores. Note that this is
1651 weaker than smp_mb()! The smp_mb__before_spinlock() primitive is free on
1652 many architectures.
1194 1653
1195 (2) UNLOCK operation implication: 1654 (2) RELEASE operation implication:
1196 1655
1197 Memory operations issued before the UNLOCK will be completed before the 1656 Memory operations issued before the RELEASE will be completed before the
1198 UNLOCK operation has completed. 1657 RELEASE operation has completed.
1199 1658
1200 Memory operations issued after the UNLOCK may be completed before the 1659 Memory operations issued after the RELEASE may be completed before the
1201 UNLOCK operation has completed. 1660 RELEASE operation has completed.
1202 1661
1203 (3) LOCK vs LOCK implication: 1662 (3) ACQUIRE vs ACQUIRE implication:
1204 1663
1205 All LOCK operations issued before another LOCK operation will be completed 1664 All ACQUIRE operations issued before another ACQUIRE operation will be
1206 before that LOCK operation. 1665 completed before that ACQUIRE operation.
1207 1666
1208 (4) LOCK vs UNLOCK implication: 1667 (4) ACQUIRE vs RELEASE implication:
1209 1668
1210 All LOCK operations issued before an UNLOCK operation will be completed 1669 All ACQUIRE operations issued before a RELEASE operation will be
1211 before the UNLOCK operation. 1670 completed before the RELEASE operation.
1212 1671
1213 All UNLOCK operations issued before a LOCK operation will be completed 1672 (5) Failed conditional ACQUIRE implication:
1214 before the LOCK operation.
1215 1673
1216 (5) Failed conditional LOCK implication: 1674 Certain locking variants of the ACQUIRE operation may fail, either due to
1217 1675 being unable to get the lock immediately, or due to receiving an unblocked
1218 Certain variants of the LOCK operation may fail, either due to being
1219 unable to get the lock immediately, or due to receiving an unblocked
1220 signal whilst asleep waiting for the lock to become available. Failed 1676 signal whilst asleep waiting for the lock to become available. Failed
1221 locks do not imply any sort of barrier. 1677 locks do not imply any sort of barrier.
1222 1678
1223Therefore, from (1), (2) and (4) an UNLOCK followed by an unconditional LOCK is 1679[!] Note: one of the consequences of lock ACQUIREs and RELEASEs being only
1224equivalent to a full barrier, but a LOCK followed by an UNLOCK is not. 1680one-way barriers is that the effects of instructions outside of a critical
1225 1681section may seep into the inside of the critical section.
1226[!] Note: one of the consequences of LOCKs and UNLOCKs being only one-way
1227 barriers is that the effects of instructions outside of a critical section
1228 may seep into the inside of the critical section.
1229 1682
1230A LOCK followed by an UNLOCK may not be assumed to be full memory barrier 1683An ACQUIRE followed by a RELEASE may not be assumed to be full memory barrier
1231because it is possible for an access preceding the LOCK to happen after the 1684because it is possible for an access preceding the ACQUIRE to happen after the
1232LOCK, and an access following the UNLOCK to happen before the UNLOCK, and the 1685ACQUIRE, and an access following the RELEASE to happen before the RELEASE, and
1233two accesses can themselves then cross: 1686the two accesses can themselves then cross:
1234 1687
1235 *A = a; 1688 *A = a;
1236 LOCK 1689 ACQUIRE M
1237 UNLOCK 1690 RELEASE M
1238 *B = b; 1691 *B = b;
1239 1692
1240may occur as: 1693may occur as:
1241 1694
1242 LOCK, STORE *B, STORE *A, UNLOCK 1695 ACQUIRE M, STORE *B, STORE *A, RELEASE M
1696
1697This same reordering can of course occur if the lock's ACQUIRE and RELEASE are
1698to the same lock variable, but only from the perspective of another CPU not
1699holding that lock.
1700
1701In short, a RELEASE followed by an ACQUIRE may -not- be assumed to be a full
1702memory barrier because it is possible for a preceding RELEASE to pass a
1703later ACQUIRE from the viewpoint of the CPU, but not from the viewpoint
1704of the compiler. Note that deadlocks cannot be introduced by this
1705interchange because if such a deadlock threatened, the RELEASE would
1706simply complete.
1707
1708If it is necessary for a RELEASE-ACQUIRE pair to produce a full barrier, the
1709ACQUIRE can be followed by an smp_mb__after_unlock_lock() invocation. This
1710will produce a full barrier if either (a) the RELEASE and the ACQUIRE are
1711executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on the
1712same variable. The smp_mb__after_unlock_lock() primitive is free on many
1713architectures. Without smp_mb__after_unlock_lock(), the critical sections
1714corresponding to the RELEASE and the ACQUIRE can cross:
1715
1716 *A = a;
1717 RELEASE M
1718 ACQUIRE N
1719 *B = b;
1720
1721could occur as:
1722
1723 ACQUIRE N, STORE *B, STORE *A, RELEASE M
1724
1725With smp_mb__after_unlock_lock(), they cannot, so that:
1726
1727 *A = a;
1728 RELEASE M
1729 ACQUIRE N
1730 smp_mb__after_unlock_lock();
1731 *B = b;
1732
1733will always occur as either of the following:
1734
1735 STORE *A, RELEASE, ACQUIRE, STORE *B
1736 STORE *A, ACQUIRE, RELEASE, STORE *B
1737
1738If the RELEASE and ACQUIRE were instead both operating on the same lock
1739variable, only the first of these two alternatives can occur.
1243 1740
1244Locks and semaphores may not provide any guarantee of ordering on UP compiled 1741Locks and semaphores may not provide any guarantee of ordering on UP compiled
1245systems, and so cannot be counted on in such a situation to actually achieve 1742systems, and so cannot be counted on in such a situation to actually achieve
@@ -1253,33 +1750,33 @@ As an example, consider the following:
1253 1750
1254 *A = a; 1751 *A = a;
1255 *B = b; 1752 *B = b;
1256 LOCK 1753 ACQUIRE
1257 *C = c; 1754 *C = c;
1258 *D = d; 1755 *D = d;
1259 UNLOCK 1756 RELEASE
1260 *E = e; 1757 *E = e;
1261 *F = f; 1758 *F = f;
1262 1759
1263The following sequence of events is acceptable: 1760The following sequence of events is acceptable:
1264 1761
1265 LOCK, {*F,*A}, *E, {*C,*D}, *B, UNLOCK 1762 ACQUIRE, {*F,*A}, *E, {*C,*D}, *B, RELEASE
1266 1763
1267 [+] Note that {*F,*A} indicates a combined access. 1764 [+] Note that {*F,*A} indicates a combined access.
1268 1765
1269But none of the following are: 1766But none of the following are:
1270 1767
1271 {*F,*A}, *B, LOCK, *C, *D, UNLOCK, *E 1768 {*F,*A}, *B, ACQUIRE, *C, *D, RELEASE, *E
1272 *A, *B, *C, LOCK, *D, UNLOCK, *E, *F 1769 *A, *B, *C, ACQUIRE, *D, RELEASE, *E, *F
1273 *A, *B, LOCK, *C, UNLOCK, *D, *E, *F 1770 *A, *B, ACQUIRE, *C, RELEASE, *D, *E, *F
1274 *B, LOCK, *C, *D, UNLOCK, {*F,*A}, *E 1771 *B, ACQUIRE, *C, *D, RELEASE, {*F,*A}, *E
1275 1772
1276 1773
1277 1774
1278INTERRUPT DISABLING FUNCTIONS 1775INTERRUPT DISABLING FUNCTIONS
1279----------------------------- 1776-----------------------------
1280 1777
1281Functions that disable interrupts (LOCK equivalent) and enable interrupts 1778Functions that disable interrupts (ACQUIRE equivalent) and enable interrupts
1282(UNLOCK equivalent) will act as compiler barriers only. So if memory or I/O 1779(RELEASE equivalent) will act as compiler barriers only. So if memory or I/O
1283barriers are required in such a situation, they must be provided from some 1780barriers are required in such a situation, they must be provided from some
1284other means. 1781other means.
1285 1782
@@ -1418,75 +1915,81 @@ Other functions that imply barriers:
1418 (*) schedule() and similar imply full memory barriers. 1915 (*) schedule() and similar imply full memory barriers.
1419 1916
1420 1917
1421================================= 1918===================================
1422INTER-CPU LOCKING BARRIER EFFECTS 1919INTER-CPU ACQUIRING BARRIER EFFECTS
1423================================= 1920===================================
1424 1921
1425On SMP systems locking primitives give a more substantial form of barrier: one 1922On SMP systems locking primitives give a more substantial form of barrier: one
1426that does affect memory access ordering on other CPUs, within the context of 1923that does affect memory access ordering on other CPUs, within the context of
1427conflict on any particular lock. 1924conflict on any particular lock.
1428 1925
1429 1926
1430LOCKS VS MEMORY ACCESSES 1927ACQUIRES VS MEMORY ACCESSES
1431------------------------ 1928---------------------------
1432 1929
1433Consider the following: the system has a pair of spinlocks (M) and (Q), and 1930Consider the following: the system has a pair of spinlocks (M) and (Q), and
1434three CPUs; then should the following sequence of events occur: 1931three CPUs; then should the following sequence of events occur:
1435 1932
1436 CPU 1 CPU 2 1933 CPU 1 CPU 2
1437 =============================== =============================== 1934 =============================== ===============================
1438 *A = a; *E = e; 1935 ACCESS_ONCE(*A) = a; ACCESS_ONCE(*E) = e;
1439 LOCK M LOCK Q 1936 ACQUIRE M ACQUIRE Q
1440 *B = b; *F = f; 1937 ACCESS_ONCE(*B) = b; ACCESS_ONCE(*F) = f;
1441 *C = c; *G = g; 1938 ACCESS_ONCE(*C) = c; ACCESS_ONCE(*G) = g;
1442 UNLOCK M UNLOCK Q 1939 RELEASE M RELEASE Q
1443 *D = d; *H = h; 1940 ACCESS_ONCE(*D) = d; ACCESS_ONCE(*H) = h;
1444 1941
1445Then there is no guarantee as to what order CPU 3 will see the accesses to *A 1942Then there is no guarantee as to what order CPU 3 will see the accesses to *A
1446through *H occur in, other than the constraints imposed by the separate locks 1943through *H occur in, other than the constraints imposed by the separate locks
1447on the separate CPUs. It might, for example, see: 1944on the separate CPUs. It might, for example, see:
1448 1945
1449 *E, LOCK M, LOCK Q, *G, *C, *F, *A, *B, UNLOCK Q, *D, *H, UNLOCK M 1946 *E, ACQUIRE M, ACQUIRE Q, *G, *C, *F, *A, *B, RELEASE Q, *D, *H, RELEASE M
1450 1947
1451But it won't see any of: 1948But it won't see any of:
1452 1949
1453 *B, *C or *D preceding LOCK M 1950 *B, *C or *D preceding ACQUIRE M
1454 *A, *B or *C following UNLOCK M 1951 *A, *B or *C following RELEASE M
1455 *F, *G or *H preceding LOCK Q 1952 *F, *G or *H preceding ACQUIRE Q
1456 *E, *F or *G following UNLOCK Q 1953 *E, *F or *G following RELEASE Q
1457 1954
1458 1955
1459However, if the following occurs: 1956However, if the following occurs:
1460 1957
1461 CPU 1 CPU 2 1958 CPU 1 CPU 2
1462 =============================== =============================== 1959 =============================== ===============================
1463 *A = a; 1960 ACCESS_ONCE(*A) = a;
1464 LOCK M [1] 1961 ACQUIRE M [1]
1465 *B = b; 1962 ACCESS_ONCE(*B) = b;
1466 *C = c; 1963 ACCESS_ONCE(*C) = c;
1467 UNLOCK M [1] 1964 RELEASE M [1]
1468 *D = d; *E = e; 1965 ACCESS_ONCE(*D) = d; ACCESS_ONCE(*E) = e;
1469 LOCK M [2] 1966 ACQUIRE M [2]
1470 *F = f; 1967 smp_mb__after_unlock_lock();
1471 *G = g; 1968 ACCESS_ONCE(*F) = f;
1472 UNLOCK M [2] 1969 ACCESS_ONCE(*G) = g;
1473 *H = h; 1970 RELEASE M [2]
1971 ACCESS_ONCE(*H) = h;
1474 1972
1475CPU 3 might see: 1973CPU 3 might see:
1476 1974
1477 *E, LOCK M [1], *C, *B, *A, UNLOCK M [1], 1975 *E, ACQUIRE M [1], *C, *B, *A, RELEASE M [1],
1478 LOCK M [2], *H, *F, *G, UNLOCK M [2], *D 1976 ACQUIRE M [2], *H, *F, *G, RELEASE M [2], *D
1479 1977
1480But assuming CPU 1 gets the lock first, CPU 3 won't see any of: 1978But assuming CPU 1 gets the lock first, CPU 3 won't see any of:
1481 1979
1482 *B, *C, *D, *F, *G or *H preceding LOCK M [1] 1980 *B, *C, *D, *F, *G or *H preceding ACQUIRE M [1]
1483 *A, *B or *C following UNLOCK M [1] 1981 *A, *B or *C following RELEASE M [1]
1484 *F, *G or *H preceding LOCK M [2] 1982 *F, *G or *H preceding ACQUIRE M [2]
1485 *A, *B, *C, *E, *F or *G following UNLOCK M [2] 1983 *A, *B, *C, *E, *F or *G following RELEASE M [2]
1486 1984
1985Note that the smp_mb__after_unlock_lock() is critically important
1986here: Without it CPU 3 might see some of the above orderings.
1987Without smp_mb__after_unlock_lock(), the accesses are not guaranteed
1988to be seen in order unless CPU 3 holds lock M.
1487 1989
1488LOCKS VS I/O ACCESSES 1990
1489--------------------- 1991ACQUIRES VS I/O ACCESSES
1992------------------------
1490 1993
1491Under certain circumstances (especially involving NUMA), I/O accesses within 1994Under certain circumstances (especially involving NUMA), I/O accesses within
1492two spinlocked sections on two different CPUs may be seen as interleaved by the 1995two spinlocked sections on two different CPUs may be seen as interleaved by the
@@ -1687,28 +2190,30 @@ explicit lock operations, described later). These include:
1687 2190
1688 xchg(); 2191 xchg();
1689 cmpxchg(); 2192 cmpxchg();
1690 atomic_xchg(); 2193 atomic_xchg(); atomic_long_xchg();
1691 atomic_cmpxchg(); 2194 atomic_cmpxchg(); atomic_long_cmpxchg();
1692 atomic_inc_return(); 2195 atomic_inc_return(); atomic_long_inc_return();
1693 atomic_dec_return(); 2196 atomic_dec_return(); atomic_long_dec_return();
1694 atomic_add_return(); 2197 atomic_add_return(); atomic_long_add_return();
1695 atomic_sub_return(); 2198 atomic_sub_return(); atomic_long_sub_return();
1696 atomic_inc_and_test(); 2199 atomic_inc_and_test(); atomic_long_inc_and_test();
1697 atomic_dec_and_test(); 2200 atomic_dec_and_test(); atomic_long_dec_and_test();
1698 atomic_sub_and_test(); 2201 atomic_sub_and_test(); atomic_long_sub_and_test();
1699 atomic_add_negative(); 2202 atomic_add_negative(); atomic_long_add_negative();
1700 atomic_add_unless(); /* when succeeds (returns 1) */
1701 test_and_set_bit(); 2203 test_and_set_bit();
1702 test_and_clear_bit(); 2204 test_and_clear_bit();
1703 test_and_change_bit(); 2205 test_and_change_bit();
1704 2206
1705These are used for such things as implementing LOCK-class and UNLOCK-class 2207 /* when succeeds (returns 1) */
2208 atomic_add_unless(); atomic_long_add_unless();
2209
2210These are used for such things as implementing ACQUIRE-class and RELEASE-class
1706operations and adjusting reference counters towards object destruction, and as 2211operations and adjusting reference counters towards object destruction, and as
1707such the implicit memory barrier effects are necessary. 2212such the implicit memory barrier effects are necessary.
1708 2213
1709 2214
1710The following operations are potential problems as they do _not_ imply memory 2215The following operations are potential problems as they do _not_ imply memory
1711barriers, but might be used for implementing such things as UNLOCK-class 2216barriers, but might be used for implementing such things as RELEASE-class
1712operations: 2217operations:
1713 2218
1714 atomic_set(); 2219 atomic_set();
@@ -1750,7 +2255,7 @@ The following operations are special locking primitives:
1750 clear_bit_unlock(); 2255 clear_bit_unlock();
1751 __clear_bit_unlock(); 2256 __clear_bit_unlock();
1752 2257
1753These implement LOCK-class and UNLOCK-class operations. These should be used in 2258These implement ACQUIRE-class and RELEASE-class operations. These should be used in
1754preference to other operations when implementing locking primitives, because 2259preference to other operations when implementing locking primitives, because
1755their implementations can be optimised on many architectures. 2260their implementations can be optimised on many architectures.
1756 2261
@@ -1887,8 +2392,8 @@ functions:
1887 space should suffice for PCI. 2392 space should suffice for PCI.
1888 2393
1889 [*] NOTE! attempting to load from the same location as was written to may 2394 [*] NOTE! attempting to load from the same location as was written to may
1890 cause a malfunction - consider the 16550 Rx/Tx serial registers for 2395 cause a malfunction - consider the 16550 Rx/Tx serial registers for
1891 example. 2396 example.
1892 2397
1893 Used with prefetchable I/O memory, an mmiowb() barrier may be required to 2398 Used with prefetchable I/O memory, an mmiowb() barrier may be required to
1894 force stores to be ordered. 2399 force stores to be ordered.
@@ -1955,19 +2460,19 @@ barriers for the most part act at the interface between the CPU and its cache
1955 : 2460 :
1956 +--------+ +--------+ : +--------+ +-----------+ 2461 +--------+ +--------+ : +--------+ +-----------+
1957 | | | | : | | | | +--------+ 2462 | | | | : | | | | +--------+
1958 | CPU | | Memory | : | CPU | | | | | 2463 | CPU | | Memory | : | CPU | | | | |
1959 | Core |--->| Access |----->| Cache |<-->| | | | 2464 | Core |--->| Access |----->| Cache |<-->| | | |
1960 | | | Queue | : | | | |--->| Memory | 2465 | | | Queue | : | | | |--->| Memory |
1961 | | | | : | | | | | | 2466 | | | | : | | | | | |
1962 +--------+ +--------+ : +--------+ | | | | 2467 +--------+ +--------+ : +--------+ | | | |
1963 : | Cache | +--------+ 2468 : | Cache | +--------+
1964 : | Coherency | 2469 : | Coherency |
1965 : | Mechanism | +--------+ 2470 : | Mechanism | +--------+
1966 +--------+ +--------+ : +--------+ | | | | 2471 +--------+ +--------+ : +--------+ | | | |
1967 | | | | : | | | | | | 2472 | | | | : | | | | | |
1968 | CPU | | Memory | : | CPU | | |--->| Device | 2473 | CPU | | Memory | : | CPU | | |--->| Device |
1969 | Core |--->| Access |----->| Cache |<-->| | | | 2474 | Core |--->| Access |----->| Cache |<-->| | | |
1970 | | | Queue | : | | | | | | 2475 | | | Queue | : | | | | | |
1971 | | | | : | | | | +--------+ 2476 | | | | : | | | | +--------+
1972 +--------+ +--------+ : +--------+ +-----------+ 2477 +--------+ +--------+ : +--------+ +-----------+
1973 : 2478 :
@@ -2090,7 +2595,7 @@ CPU's caches by some other cache event:
2090 p = &v; q = p; 2595 p = &v; q = p;
2091 <D:request p> 2596 <D:request p>
2092 <B:modify p=&v> <D:commit p=&v> 2597 <B:modify p=&v> <D:commit p=&v>
2093 <D:read p> 2598 <D:read p>
2094 x = *q; 2599 x = *q;
2095 <C:read *q> Reads from v before v updated in cache 2600 <C:read *q> Reads from v before v updated in cache
2096 <C:unbusy> 2601 <C:unbusy>
@@ -2115,7 +2620,7 @@ queue before processing any further requests:
2115 p = &v; q = p; 2620 p = &v; q = p;
2116 <D:request p> 2621 <D:request p>
2117 <B:modify p=&v> <D:commit p=&v> 2622 <B:modify p=&v> <D:commit p=&v>
2118 <D:read p> 2623 <D:read p>
2119 smp_read_barrier_depends() 2624 smp_read_barrier_depends()
2120 <C:unbusy> 2625 <C:unbusy>
2121 <C:commit v=2> 2626 <C:commit v=2>
@@ -2177,11 +2682,11 @@ A programmer might take it for granted that the CPU will perform memory
2177operations in exactly the order specified, so that if the CPU is, for example, 2682operations in exactly the order specified, so that if the CPU is, for example,
2178given the following piece of code to execute: 2683given the following piece of code to execute:
2179 2684
2180 a = *A; 2685 a = ACCESS_ONCE(*A);
2181 *B = b; 2686 ACCESS_ONCE(*B) = b;
2182 c = *C; 2687 c = ACCESS_ONCE(*C);
2183 d = *D; 2688 d = ACCESS_ONCE(*D);
2184 *E = e; 2689 ACCESS_ONCE(*E) = e;
2185 2690
2186they would then expect that the CPU will complete the memory operation for each 2691they would then expect that the CPU will complete the memory operation for each
2187instruction before moving on to the next one, leading to a definite sequence of 2692instruction before moving on to the next one, leading to a definite sequence of
@@ -2228,12 +2733,12 @@ However, it is guaranteed that a CPU will be self-consistent: it will see its
2228_own_ accesses appear to be correctly ordered, without the need for a memory 2733_own_ accesses appear to be correctly ordered, without the need for a memory
2229barrier. For instance with the following code: 2734barrier. For instance with the following code:
2230 2735
2231 U = *A; 2736 U = ACCESS_ONCE(*A);
2232 *A = V; 2737 ACCESS_ONCE(*A) = V;
2233 *A = W; 2738 ACCESS_ONCE(*A) = W;
2234 X = *A; 2739 X = ACCESS_ONCE(*A);
2235 *A = Y; 2740 ACCESS_ONCE(*A) = Y;
2236 Z = *A; 2741 Z = ACCESS_ONCE(*A);
2237 2742
2238and assuming no intervention by an external influence, it can be assumed that 2743and assuming no intervention by an external influence, it can be assumed that
2239the final result will appear to be: 2744the final result will appear to be:
@@ -2250,7 +2755,12 @@ accesses:
2250 2755
2251in that order, but, without intervention, the sequence may have almost any 2756in that order, but, without intervention, the sequence may have almost any
2252combination of elements combined or discarded, provided the program's view of 2757combination of elements combined or discarded, provided the program's view of
2253the world remains consistent. 2758the world remains consistent. Note that ACCESS_ONCE() is -not- optional
2759in the above example, as there are architectures where a given CPU might
2760interchange successive loads to the same location. On such architectures,
2761ACCESS_ONCE() does whatever is necessary to prevent this, for example, on
2762Itanium the volatile casts used by ACCESS_ONCE() cause GCC to emit the
2763special ld.acq and st.rel instructions that prevent such reordering.
2254 2764
2255The compiler may also combine, discard or defer elements of the sequence before 2765The compiler may also combine, discard or defer elements of the sequence before
2256the CPU even sees them. 2766the CPU even sees them.
@@ -2264,13 +2774,13 @@ may be reduced to:
2264 2774
2265 *A = W; 2775 *A = W;
2266 2776
2267since, without a write barrier, it can be assumed that the effect of the 2777since, without either a write barrier or an ACCESS_ONCE(), it can be
2268storage of V to *A is lost. Similarly: 2778assumed that the effect of the storage of V to *A is lost. Similarly:
2269 2779
2270 *A = Y; 2780 *A = Y;
2271 Z = *A; 2781 Z = *A;
2272 2782
2273may, without a memory barrier, be reduced to: 2783may, without a memory barrier or an ACCESS_ONCE(), be reduced to:
2274 2784
2275 *A = Y; 2785 *A = Y;
2276 Z = Y; 2786 Z = Y;
diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/robust-futex-ABI.txt
index fd1cd8aae4eb..16eb314f56cc 100644
--- a/Documentation/robust-futex-ABI.txt
+++ b/Documentation/robust-futex-ABI.txt
@@ -146,8 +146,8 @@ On removal:
146 1) set the 'list_op_pending' word to the address of the 'lock entry' 146 1) set the 'list_op_pending' word to the address of the 'lock entry'
147 to be removed, 147 to be removed,
148 2) remove the lock entry for this lock from the 'head' list, 148 2) remove the lock entry for this lock from the 'head' list,
149 2) release the futex lock, and 149 3) release the futex lock, and
150 2) clear the 'lock_op_pending' word. 150 4) clear the 'lock_op_pending' word.
151 151
152On exit, the kernel will consider the address stored in 152On exit, the kernel will consider the address stored in
153'list_op_pending' and the address of each 'lock word' found by walking 153'list_op_pending' and the address of each 'lock word' found by walking
diff --git a/MAINTAINERS b/MAINTAINERS
index 6a6e4ac72287..38b11d554169 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5141,6 +5141,11 @@ F: drivers/lguest/
5141F: include/linux/lguest*.h 5141F: include/linux/lguest*.h
5142F: tools/lguest/ 5142F: tools/lguest/
5143 5143
5144LIBLOCKDEP
5145M: Sasha Levin <sasha.levin@oracle.com>
5146S: Maintained
5147F: tools/lib/lockdep/
5148
5144LINUX FOR IBM pSERIES (RS/6000) 5149LINUX FOR IBM pSERIES (RS/6000)
5145M: Paul Mackerras <paulus@au.ibm.com> 5150M: Paul Mackerras <paulus@au.ibm.com>
5146W: http://www.ibm.com/linux/ltc/projects/ppc 5151W: http://www.ibm.com/linux/ltc/projects/ppc
diff --git a/arch/alpha/include/asm/barrier.h b/arch/alpha/include/asm/barrier.h
index ce8860a0b32d..3832bdb794fe 100644
--- a/arch/alpha/include/asm/barrier.h
+++ b/arch/alpha/include/asm/barrier.h
@@ -3,33 +3,18 @@
3 3
4#include <asm/compiler.h> 4#include <asm/compiler.h>
5 5
6#define mb() \ 6#define mb() __asm__ __volatile__("mb": : :"memory")
7__asm__ __volatile__("mb": : :"memory") 7#define rmb() __asm__ __volatile__("mb": : :"memory")
8#define wmb() __asm__ __volatile__("wmb": : :"memory")
8 9
9#define rmb() \ 10#define read_barrier_depends() __asm__ __volatile__("mb": : :"memory")
10__asm__ __volatile__("mb": : :"memory")
11
12#define wmb() \
13__asm__ __volatile__("wmb": : :"memory")
14
15#define read_barrier_depends() \
16__asm__ __volatile__("mb": : :"memory")
17 11
18#ifdef CONFIG_SMP 12#ifdef CONFIG_SMP
19#define __ASM_SMP_MB "\tmb\n" 13#define __ASM_SMP_MB "\tmb\n"
20#define smp_mb() mb()
21#define smp_rmb() rmb()
22#define smp_wmb() wmb()
23#define smp_read_barrier_depends() read_barrier_depends()
24#else 14#else
25#define __ASM_SMP_MB 15#define __ASM_SMP_MB
26#define smp_mb() barrier()
27#define smp_rmb() barrier()
28#define smp_wmb() barrier()
29#define smp_read_barrier_depends() do { } while (0)
30#endif 16#endif
31 17
32#define set_mb(var, value) \ 18#include <asm-generic/barrier.h>
33do { var = value; mb(); } while (0)
34 19
35#endif /* __BARRIER_H */ 20#endif /* __BARRIER_H */
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 5943f7f9d325..9ae21c198007 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -1,4 +1,5 @@
1generic-y += auxvec.h 1generic-y += auxvec.h
2generic-y += barrier.h
2generic-y += bugs.h 3generic-y += bugs.h
3generic-y += bitsperlong.h 4generic-y += bitsperlong.h
4generic-y += clkdev.h 5generic-y += clkdev.h
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 83f03ca6caf6..03e494f695d1 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -190,6 +190,11 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
190 190
191#endif /* !CONFIG_ARC_HAS_LLSC */ 191#endif /* !CONFIG_ARC_HAS_LLSC */
192 192
193#define smp_mb__before_atomic_dec() barrier()
194#define smp_mb__after_atomic_dec() barrier()
195#define smp_mb__before_atomic_inc() barrier()
196#define smp_mb__after_atomic_inc() barrier()
197
193/** 198/**
194 * __atomic_add_unless - add unless the number is a given value 199 * __atomic_add_unless - add unless the number is a given value
195 * @v: pointer of type atomic_t 200 * @v: pointer of type atomic_t
diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h
index f6cb7c4ffb35..c32245c3d1e9 100644
--- a/arch/arc/include/asm/barrier.h
+++ b/arch/arc/include/asm/barrier.h
@@ -30,11 +30,6 @@
30#define smp_wmb() barrier() 30#define smp_wmb() barrier()
31#endif 31#endif
32 32
33#define smp_mb__before_atomic_dec() barrier()
34#define smp_mb__after_atomic_dec() barrier()
35#define smp_mb__before_atomic_inc() barrier()
36#define smp_mb__after_atomic_inc() barrier()
37
38#define smp_read_barrier_depends() do { } while (0) 33#define smp_read_barrier_depends() do { } while (0)
39 34
40#endif 35#endif
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 60f15e274e6d..2f59f7443396 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -59,6 +59,21 @@
59#define smp_wmb() dmb(ishst) 59#define smp_wmb() dmb(ishst)
60#endif 60#endif
61 61
62#define smp_store_release(p, v) \
63do { \
64 compiletime_assert_atomic_type(*p); \
65 smp_mb(); \
66 ACCESS_ONCE(*p) = (v); \
67} while (0)
68
69#define smp_load_acquire(p) \
70({ \
71 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
72 compiletime_assert_atomic_type(*p); \
73 smp_mb(); \
74 ___p1; \
75})
76
62#define read_barrier_depends() do { } while(0) 77#define read_barrier_depends() do { } while(0)
63#define smp_read_barrier_depends() do { } while(0) 78#define smp_read_barrier_depends() do { } while(0)
64 79
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index d4a63338a53c..78e20ba8806b 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -35,10 +35,60 @@
35#define smp_mb() barrier() 35#define smp_mb() barrier()
36#define smp_rmb() barrier() 36#define smp_rmb() barrier()
37#define smp_wmb() barrier() 37#define smp_wmb() barrier()
38
39#define smp_store_release(p, v) \
40do { \
41 compiletime_assert_atomic_type(*p); \
42 smp_mb(); \
43 ACCESS_ONCE(*p) = (v); \
44} while (0)
45
46#define smp_load_acquire(p) \
47({ \
48 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
49 compiletime_assert_atomic_type(*p); \
50 smp_mb(); \
51 ___p1; \
52})
53
38#else 54#else
55
39#define smp_mb() asm volatile("dmb ish" : : : "memory") 56#define smp_mb() asm volatile("dmb ish" : : : "memory")
40#define smp_rmb() asm volatile("dmb ishld" : : : "memory") 57#define smp_rmb() asm volatile("dmb ishld" : : : "memory")
41#define smp_wmb() asm volatile("dmb ishst" : : : "memory") 58#define smp_wmb() asm volatile("dmb ishst" : : : "memory")
59
60#define smp_store_release(p, v) \
61do { \
62 compiletime_assert_atomic_type(*p); \
63 switch (sizeof(*p)) { \
64 case 4: \
65 asm volatile ("stlr %w1, %0" \
66 : "=Q" (*p) : "r" (v) : "memory"); \
67 break; \
68 case 8: \
69 asm volatile ("stlr %1, %0" \
70 : "=Q" (*p) : "r" (v) : "memory"); \
71 break; \
72 } \
73} while (0)
74
75#define smp_load_acquire(p) \
76({ \
77 typeof(*p) ___p1; \
78 compiletime_assert_atomic_type(*p); \
79 switch (sizeof(*p)) { \
80 case 4: \
81 asm volatile ("ldar %w0, %1" \
82 : "=r" (___p1) : "Q" (*p) : "memory"); \
83 break; \
84 case 8: \
85 asm volatile ("ldar %0, %1" \
86 : "=r" (___p1) : "Q" (*p) : "memory"); \
87 break; \
88 } \
89 ___p1; \
90})
91
42#endif 92#endif
43 93
44#define read_barrier_depends() do { } while(0) 94#define read_barrier_depends() do { } while(0)
diff --git a/arch/avr32/include/asm/barrier.h b/arch/avr32/include/asm/barrier.h
index 0961275373db..715100790fd0 100644
--- a/arch/avr32/include/asm/barrier.h
+++ b/arch/avr32/include/asm/barrier.h
@@ -8,22 +8,15 @@
8#ifndef __ASM_AVR32_BARRIER_H 8#ifndef __ASM_AVR32_BARRIER_H
9#define __ASM_AVR32_BARRIER_H 9#define __ASM_AVR32_BARRIER_H
10 10
11#define nop() asm volatile("nop") 11/*
12 12 * Weirdest thing ever.. no full barrier, but it has a write barrier!
13#define mb() asm volatile("" : : : "memory") 13 */
14#define rmb() mb() 14#define wmb() asm volatile("sync 0" : : : "memory")
15#define wmb() asm volatile("sync 0" : : : "memory")
16#define read_barrier_depends() do { } while(0)
17#define set_mb(var, value) do { var = value; mb(); } while(0)
18 15
19#ifdef CONFIG_SMP 16#ifdef CONFIG_SMP
20# error "The AVR32 port does not support SMP" 17# error "The AVR32 port does not support SMP"
21#else
22# define smp_mb() barrier()
23# define smp_rmb() barrier()
24# define smp_wmb() barrier()
25# define smp_read_barrier_depends() do { } while(0)
26#endif 18#endif
27 19
20#include <asm-generic/barrier.h>
28 21
29#endif /* __ASM_AVR32_BARRIER_H */ 22#endif /* __ASM_AVR32_BARRIER_H */
diff --git a/arch/blackfin/include/asm/barrier.h b/arch/blackfin/include/asm/barrier.h
index ebb189507dd7..19283a16ac08 100644
--- a/arch/blackfin/include/asm/barrier.h
+++ b/arch/blackfin/include/asm/barrier.h
@@ -23,26 +23,10 @@
23# define rmb() do { barrier(); smp_check_barrier(); } while (0) 23# define rmb() do { barrier(); smp_check_barrier(); } while (0)
24# define wmb() do { barrier(); smp_mark_barrier(); } while (0) 24# define wmb() do { barrier(); smp_mark_barrier(); } while (0)
25# define read_barrier_depends() do { barrier(); smp_check_barrier(); } while (0) 25# define read_barrier_depends() do { barrier(); smp_check_barrier(); } while (0)
26#else
27# define mb() barrier()
28# define rmb() barrier()
29# define wmb() barrier()
30# define read_barrier_depends() do { } while (0)
31#endif 26#endif
32 27
33#else /* !CONFIG_SMP */
34
35#define mb() barrier()
36#define rmb() barrier()
37#define wmb() barrier()
38#define read_barrier_depends() do { } while (0)
39
40#endif /* !CONFIG_SMP */ 28#endif /* !CONFIG_SMP */
41 29
42#define smp_mb() mb() 30#include <asm-generic/barrier.h>
43#define smp_rmb() rmb()
44#define smp_wmb() wmb()
45#define set_mb(var, value) do { var = value; mb(); } while (0)
46#define smp_read_barrier_depends() read_barrier_depends()
47 31
48#endif /* _BLACKFIN_BARRIER_H */ 32#endif /* _BLACKFIN_BARRIER_H */
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index b06caf649a95..199b1a9dab89 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -3,6 +3,7 @@ header-y += arch-v10/
3header-y += arch-v32/ 3header-y += arch-v32/
4 4
5 5
6generic-y += barrier.h
6generic-y += clkdev.h 7generic-y += clkdev.h
7generic-y += exec.h 8generic-y += exec.h
8generic-y += kvm_para.h 9generic-y += kvm_para.h
diff --git a/arch/cris/include/asm/barrier.h b/arch/cris/include/asm/barrier.h
deleted file mode 100644
index 198ad7fa6b25..000000000000
--- a/arch/cris/include/asm/barrier.h
+++ /dev/null
@@ -1,25 +0,0 @@
1#ifndef __ASM_CRIS_BARRIER_H
2#define __ASM_CRIS_BARRIER_H
3
4#define nop() __asm__ __volatile__ ("nop");
5
6#define barrier() __asm__ __volatile__("": : :"memory")
7#define mb() barrier()
8#define rmb() mb()
9#define wmb() mb()
10#define read_barrier_depends() do { } while(0)
11#define set_mb(var, value) do { var = value; mb(); } while (0)
12
13#ifdef CONFIG_SMP
14#define smp_mb() mb()
15#define smp_rmb() rmb()
16#define smp_wmb() wmb()
17#define smp_read_barrier_depends() read_barrier_depends()
18#else
19#define smp_mb() barrier()
20#define smp_rmb() barrier()
21#define smp_wmb() barrier()
22#define smp_read_barrier_depends() do { } while(0)
23#endif
24
25#endif /* __ASM_CRIS_BARRIER_H */
diff --git a/arch/frv/include/asm/barrier.h b/arch/frv/include/asm/barrier.h
index 06776ad9f5e9..abbef470154c 100644
--- a/arch/frv/include/asm/barrier.h
+++ b/arch/frv/include/asm/barrier.h
@@ -17,13 +17,7 @@
17#define mb() asm volatile ("membar" : : :"memory") 17#define mb() asm volatile ("membar" : : :"memory")
18#define rmb() asm volatile ("membar" : : :"memory") 18#define rmb() asm volatile ("membar" : : :"memory")
19#define wmb() asm volatile ("membar" : : :"memory") 19#define wmb() asm volatile ("membar" : : :"memory")
20#define read_barrier_depends() do { } while (0)
21 20
22#define smp_mb() barrier() 21#include <asm-generic/barrier.h>
23#define smp_rmb() barrier()
24#define smp_wmb() barrier()
25#define smp_read_barrier_depends() do {} while(0)
26#define set_mb(var, value) \
27 do { var = (value); barrier(); } while (0)
28 22
29#endif /* _ASM_BARRIER_H */ 23#endif /* _ASM_BARRIER_H */
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 67c3450309b7..ada843c701ef 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -2,6 +2,7 @@
2header-y += ucontext.h 2header-y += ucontext.h
3 3
4generic-y += auxvec.h 4generic-y += auxvec.h
5generic-y += barrier.h
5generic-y += bug.h 6generic-y += bug.h
6generic-y += bugs.h 7generic-y += bugs.h
7generic-y += clkdev.h 8generic-y += clkdev.h
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 8a64ff2337f6..7aae4cb2a29a 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -160,8 +160,12 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
160#define atomic_sub_and_test(i, v) (atomic_sub_return(i, (v)) == 0) 160#define atomic_sub_and_test(i, v) (atomic_sub_return(i, (v)) == 0)
161#define atomic_add_negative(i, v) (atomic_add_return(i, (v)) < 0) 161#define atomic_add_negative(i, v) (atomic_add_return(i, (v)) < 0)
162 162
163
164#define atomic_inc_return(v) (atomic_add_return(1, v)) 163#define atomic_inc_return(v) (atomic_add_return(1, v))
165#define atomic_dec_return(v) (atomic_sub_return(1, v)) 164#define atomic_dec_return(v) (atomic_sub_return(1, v))
166 165
166#define smp_mb__before_atomic_dec() barrier()
167#define smp_mb__after_atomic_dec() barrier()
168#define smp_mb__before_atomic_inc() barrier()
169#define smp_mb__after_atomic_inc() barrier()
170
167#endif 171#endif
diff --git a/arch/hexagon/include/asm/barrier.h b/arch/hexagon/include/asm/barrier.h
index 1041a8e70ce8..4e863daea25b 100644
--- a/arch/hexagon/include/asm/barrier.h
+++ b/arch/hexagon/include/asm/barrier.h
@@ -29,10 +29,6 @@
29#define smp_read_barrier_depends() barrier() 29#define smp_read_barrier_depends() barrier()
30#define smp_wmb() barrier() 30#define smp_wmb() barrier()
31#define smp_mb() barrier() 31#define smp_mb() barrier()
32#define smp_mb__before_atomic_dec() barrier()
33#define smp_mb__after_atomic_dec() barrier()
34#define smp_mb__before_atomic_inc() barrier()
35#define smp_mb__after_atomic_inc() barrier()
36 32
37/* Set a value and use a memory barrier. Used by the scheduler somewhere. */ 33/* Set a value and use a memory barrier. Used by the scheduler somewhere. */
38#define set_mb(var, value) \ 34#define set_mb(var, value) \
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index 60576e06b6fb..d0a69aa35e27 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -45,14 +45,37 @@
45# define smp_rmb() rmb() 45# define smp_rmb() rmb()
46# define smp_wmb() wmb() 46# define smp_wmb() wmb()
47# define smp_read_barrier_depends() read_barrier_depends() 47# define smp_read_barrier_depends() read_barrier_depends()
48
48#else 49#else
50
49# define smp_mb() barrier() 51# define smp_mb() barrier()
50# define smp_rmb() barrier() 52# define smp_rmb() barrier()
51# define smp_wmb() barrier() 53# define smp_wmb() barrier()
52# define smp_read_barrier_depends() do { } while(0) 54# define smp_read_barrier_depends() do { } while(0)
55
53#endif 56#endif
54 57
55/* 58/*
59 * IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no
60 * need for asm trickery!
61 */
62
63#define smp_store_release(p, v) \
64do { \
65 compiletime_assert_atomic_type(*p); \
66 barrier(); \
67 ACCESS_ONCE(*p) = (v); \
68} while (0)
69
70#define smp_load_acquire(p) \
71({ \
72 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
73 compiletime_assert_atomic_type(*p); \
74 barrier(); \
75 ___p1; \
76})
77
78/*
56 * XXX check on this ---I suspect what Linus really wants here is 79 * XXX check on this ---I suspect what Linus really wants here is
57 * acquire vs release semantics but we can't discuss this stuff with 80 * acquire vs release semantics but we can't discuss this stuff with
58 * Linus just yet. Grrr... 81 * Linus just yet. Grrr...
diff --git a/arch/m32r/include/asm/barrier.h b/arch/m32r/include/asm/barrier.h
index 6976621efd3f..1a40265e8d88 100644
--- a/arch/m32r/include/asm/barrier.h
+++ b/arch/m32r/include/asm/barrier.h
@@ -11,84 +11,6 @@
11 11
12#define nop() __asm__ __volatile__ ("nop" : : ) 12#define nop() __asm__ __volatile__ ("nop" : : )
13 13
14/* 14#include <asm-generic/barrier.h>
15 * Memory barrier.
16 *
17 * mb() prevents loads and stores being reordered across this point.
18 * rmb() prevents loads being reordered across this point.
19 * wmb() prevents stores being reordered across this point.
20 */
21#define mb() barrier()
22#define rmb() mb()
23#define wmb() mb()
24
25/**
26 * read_barrier_depends - Flush all pending reads that subsequents reads
27 * depend on.
28 *
29 * No data-dependent reads from memory-like regions are ever reordered
30 * over this barrier. All reads preceding this primitive are guaranteed
31 * to access memory (but not necessarily other CPUs' caches) before any
32 * reads following this primitive that depend on the data return by
33 * any of the preceding reads. This primitive is much lighter weight than
34 * rmb() on most CPUs, and is never heavier weight than is
35 * rmb().
36 *
37 * These ordering constraints are respected by both the local CPU
38 * and the compiler.
39 *
40 * Ordering is not guaranteed by anything other than these primitives,
41 * not even by data dependencies. See the documentation for
42 * memory_barrier() for examples and URLs to more information.
43 *
44 * For example, the following code would force ordering (the initial
45 * value of "a" is zero, "b" is one, and "p" is "&a"):
46 *
47 * <programlisting>
48 * CPU 0 CPU 1
49 *
50 * b = 2;
51 * memory_barrier();
52 * p = &b; q = p;
53 * read_barrier_depends();
54 * d = *q;
55 * </programlisting>
56 *
57 *
58 * because the read of "*q" depends on the read of "p" and these
59 * two reads are separated by a read_barrier_depends(). However,
60 * the following code, with the same initial values for "a" and "b":
61 *
62 * <programlisting>
63 * CPU 0 CPU 1
64 *
65 * a = 2;
66 * memory_barrier();
67 * b = 3; y = b;
68 * read_barrier_depends();
69 * x = a;
70 * </programlisting>
71 *
72 * does not enforce ordering, since there is no data dependency between
73 * the read of "a" and the read of "b". Therefore, on some CPUs, such
74 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
75 * in cases like this where there are no data dependencies.
76 **/
77
78#define read_barrier_depends() do { } while (0)
79
80#ifdef CONFIG_SMP
81#define smp_mb() mb()
82#define smp_rmb() rmb()
83#define smp_wmb() wmb()
84#define smp_read_barrier_depends() read_barrier_depends()
85#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
86#else
87#define smp_mb() barrier()
88#define smp_rmb() barrier()
89#define smp_wmb() barrier()
90#define smp_read_barrier_depends() do { } while (0)
91#define set_mb(var, value) do { var = value; barrier(); } while (0)
92#endif
93 15
94#endif /* _ASM_M32R_BARRIER_H */ 16#endif /* _ASM_M32R_BARRIER_H */
diff --git a/arch/m68k/include/asm/barrier.h b/arch/m68k/include/asm/barrier.h
index 445ce22c23cb..15c5f77c1614 100644
--- a/arch/m68k/include/asm/barrier.h
+++ b/arch/m68k/include/asm/barrier.h
@@ -1,20 +1,8 @@
1#ifndef _M68K_BARRIER_H 1#ifndef _M68K_BARRIER_H
2#define _M68K_BARRIER_H 2#define _M68K_BARRIER_H
3 3
4/*
5 * Force strict CPU ordering.
6 * Not really required on m68k...
7 */
8#define nop() do { asm volatile ("nop"); barrier(); } while (0) 4#define nop() do { asm volatile ("nop"); barrier(); } while (0)
9#define mb() barrier()
10#define rmb() barrier()
11#define wmb() barrier()
12#define read_barrier_depends() ((void)0)
13#define set_mb(var, value) ({ (var) = (value); wmb(); })
14 5
15#define smp_mb() barrier() 6#include <asm-generic/barrier.h>
16#define smp_rmb() barrier()
17#define smp_wmb() barrier()
18#define smp_read_barrier_depends() ((void)0)
19 7
20#endif /* _M68K_BARRIER_H */ 8#endif /* _M68K_BARRIER_H */
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index c90bfc6bf648..5d6b4b407dda 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -82,4 +82,19 @@ static inline void fence(void)
82#define smp_read_barrier_depends() do { } while (0) 82#define smp_read_barrier_depends() do { } while (0)
83#define set_mb(var, value) do { var = value; smp_mb(); } while (0) 83#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
84 84
85#define smp_store_release(p, v) \
86do { \
87 compiletime_assert_atomic_type(*p); \
88 smp_mb(); \
89 ACCESS_ONCE(*p) = (v); \
90} while (0)
91
92#define smp_load_acquire(p) \
93({ \
94 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
95 compiletime_assert_atomic_type(*p); \
96 smp_mb(); \
97 ___p1; \
98})
99
85#endif /* _ASM_METAG_BARRIER_H */ 100#endif /* _ASM_METAG_BARRIER_H */
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index ce0bbf8f5640..a82426589fff 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -1,4 +1,5 @@
1 1
2generic-y += barrier.h
2generic-y += clkdev.h 3generic-y += clkdev.h
3generic-y += exec.h 4generic-y += exec.h
4generic-y += trace_clock.h 5generic-y += trace_clock.h
diff --git a/arch/microblaze/include/asm/barrier.h b/arch/microblaze/include/asm/barrier.h
deleted file mode 100644
index df5be3e87044..000000000000
--- a/arch/microblaze/include/asm/barrier.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/*
2 * Copyright (C) 2006 Atmark Techno, Inc.
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 */
8
9#ifndef _ASM_MICROBLAZE_BARRIER_H
10#define _ASM_MICROBLAZE_BARRIER_H
11
12#define nop() asm volatile ("nop")
13
14#define smp_read_barrier_depends() do {} while (0)
15#define read_barrier_depends() do {} while (0)
16
17#define mb() barrier()
18#define rmb() mb()
19#define wmb() mb()
20#define set_mb(var, value) do { var = value; mb(); } while (0)
21#define set_wmb(var, value) do { var = value; wmb(); } while (0)
22
23#define smp_mb() mb()
24#define smp_rmb() rmb()
25#define smp_wmb() wmb()
26
27#endif /* _ASM_MICROBLAZE_BARRIER_H */
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index f26d8e1bf3c3..e1aa4e4c2984 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -180,4 +180,19 @@
180#define nudge_writes() mb() 180#define nudge_writes() mb()
181#endif 181#endif
182 182
183#define smp_store_release(p, v) \
184do { \
185 compiletime_assert_atomic_type(*p); \
186 smp_mb(); \
187 ACCESS_ONCE(*p) = (v); \
188} while (0)
189
190#define smp_load_acquire(p) \
191({ \
192 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
193 compiletime_assert_atomic_type(*p); \
194 smp_mb(); \
195 ___p1; \
196})
197
183#endif /* __ASM_BARRIER_H */ 198#endif /* __ASM_BARRIER_H */
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 74742dc6a3da..032143ec2324 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -1,4 +1,5 @@
1 1
2generic-y += barrier.h
2generic-y += clkdev.h 3generic-y += clkdev.h
3generic-y += exec.h 4generic-y += exec.h
4generic-y += trace_clock.h 5generic-y += trace_clock.h
diff --git a/arch/mn10300/include/asm/barrier.h b/arch/mn10300/include/asm/barrier.h
deleted file mode 100644
index 2bd97a5c8af7..000000000000
--- a/arch/mn10300/include/asm/barrier.h
+++ /dev/null
@@ -1,37 +0,0 @@
1/* MN10300 memory barrier definitions
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11#ifndef _ASM_BARRIER_H
12#define _ASM_BARRIER_H
13
14#define nop() asm volatile ("nop")
15
16#define mb() asm volatile ("": : :"memory")
17#define rmb() mb()
18#define wmb() asm volatile ("": : :"memory")
19
20#ifdef CONFIG_SMP
21#define smp_mb() mb()
22#define smp_rmb() rmb()
23#define smp_wmb() wmb()
24#define set_mb(var, value) do { xchg(&var, value); } while (0)
25#else /* CONFIG_SMP */
26#define smp_mb() barrier()
27#define smp_rmb() barrier()
28#define smp_wmb() barrier()
29#define set_mb(var, value) do { var = value; mb(); } while (0)
30#endif /* CONFIG_SMP */
31
32#define set_wmb(var, value) do { var = value; wmb(); } while (0)
33
34#define read_barrier_depends() do {} while (0)
35#define smp_read_barrier_depends() do {} while (0)
36
37#endif /* _ASM_BARRIER_H */
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index a603b9ebe54c..34b0be4ca52d 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -1,4 +1,5 @@
1 1
2generic-y += barrier.h
2generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \ 3generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \
3 segment.h topology.h vga.h device.h percpu.h hw_irq.h mutex.h \ 4 segment.h topology.h vga.h device.h percpu.h hw_irq.h mutex.h \
4 div64.h irq_regs.h kdebug.h kvm_para.h local64.h local.h param.h \ 5 div64.h irq_regs.h kdebug.h kvm_para.h local64.h local.h param.h \
diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h
deleted file mode 100644
index e77d834aa803..000000000000
--- a/arch/parisc/include/asm/barrier.h
+++ /dev/null
@@ -1,35 +0,0 @@
1#ifndef __PARISC_BARRIER_H
2#define __PARISC_BARRIER_H
3
4/*
5** This is simply the barrier() macro from linux/kernel.h but when serial.c
6** uses tqueue.h uses smp_mb() defined using barrier(), linux/kernel.h
7** hasn't yet been included yet so it fails, thus repeating the macro here.
8**
9** PA-RISC architecture allows for weakly ordered memory accesses although
10** none of the processors use it. There is a strong ordered bit that is
11** set in the O-bit of the page directory entry. Operating systems that
12** can not tolerate out of order accesses should set this bit when mapping
13** pages. The O-bit of the PSW should also be set to 1 (I don't believe any
14** of the processor implemented the PSW O-bit). The PCX-W ERS states that
15** the TLB O-bit is not implemented so the page directory does not need to
16** have the O-bit set when mapping pages (section 3.1). This section also
17** states that the PSW Y, Z, G, and O bits are not implemented.
18** So it looks like nothing needs to be done for parisc-linux (yet).
19** (thanks to chada for the above comment -ggg)
20**
21** The __asm__ op below simple prevents gcc/ld from reordering
22** instructions across the mb() "call".
23*/
24#define mb() __asm__ __volatile__("":::"memory") /* barrier() */
25#define rmb() mb()
26#define wmb() mb()
27#define smp_mb() mb()
28#define smp_rmb() mb()
29#define smp_wmb() mb()
30#define smp_read_barrier_depends() do { } while(0)
31#define read_barrier_depends() do { } while(0)
32
33#define set_mb(var, value) do { var = value; mb(); } while (0)
34
35#endif /* __PARISC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index ae782254e731..f89da808ce31 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -45,11 +45,15 @@
45# define SMPWMB eieio 45# define SMPWMB eieio
46#endif 46#endif
47 47
48#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
49
48#define smp_mb() mb() 50#define smp_mb() mb()
49#define smp_rmb() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") 51#define smp_rmb() __lwsync()
50#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") 52#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
51#define smp_read_barrier_depends() read_barrier_depends() 53#define smp_read_barrier_depends() read_barrier_depends()
52#else 54#else
55#define __lwsync() barrier()
56
53#define smp_mb() barrier() 57#define smp_mb() barrier()
54#define smp_rmb() barrier() 58#define smp_rmb() barrier()
55#define smp_wmb() barrier() 59#define smp_wmb() barrier()
@@ -65,4 +69,19 @@
65#define data_barrier(x) \ 69#define data_barrier(x) \
66 asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory"); 70 asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
67 71
72#define smp_store_release(p, v) \
73do { \
74 compiletime_assert_atomic_type(*p); \
75 __lwsync(); \
76 ACCESS_ONCE(*p) = (v); \
77} while (0)
78
79#define smp_load_acquire(p) \
80({ \
81 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
82 compiletime_assert_atomic_type(*p); \
83 __lwsync(); \
84 ___p1; \
85})
86
68#endif /* _ASM_POWERPC_BARRIER_H */ 87#endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 5f54a744dcc5..f6e78d63fb6a 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -28,6 +28,8 @@
28#include <asm/synch.h> 28#include <asm/synch.h>
29#include <asm/ppc-opcode.h> 29#include <asm/ppc-opcode.h>
30 30
31#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */
32
31#define arch_spin_is_locked(x) ((x)->slock != 0) 33#define arch_spin_is_locked(x) ((x)->slock != 0)
32 34
33#ifdef CONFIG_PPC64 35#ifdef CONFIG_PPC64
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 16760eeb79b0..578680f6207a 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -32,4 +32,19 @@
32 32
33#define set_mb(var, value) do { var = value; mb(); } while (0) 33#define set_mb(var, value) do { var = value; mb(); } while (0)
34 34
35#define smp_store_release(p, v) \
36do { \
37 compiletime_assert_atomic_type(*p); \
38 barrier(); \
39 ACCESS_ONCE(*p) = (v); \
40} while (0)
41
42#define smp_load_acquire(p) \
43({ \
44 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
45 compiletime_assert_atomic_type(*p); \
46 barrier(); \
47 ___p1; \
48})
49
35#endif /* __ASM_BARRIER_H */ 50#endif /* __ASM_BARRIER_H */
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index f3414ade77a3..fe7471eb0167 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -1,6 +1,7 @@
1 1
2header-y += 2header-y +=
3 3
4generic-y += barrier.h
4generic-y += clkdev.h 5generic-y += clkdev.h
5generic-y += trace_clock.h 6generic-y += trace_clock.h
6generic-y += xor.h 7generic-y += xor.h
diff --git a/arch/score/include/asm/barrier.h b/arch/score/include/asm/barrier.h
deleted file mode 100644
index 0eacb6471e6d..000000000000
--- a/arch/score/include/asm/barrier.h
+++ /dev/null
@@ -1,16 +0,0 @@
1#ifndef _ASM_SCORE_BARRIER_H
2#define _ASM_SCORE_BARRIER_H
3
4#define mb() barrier()
5#define rmb() barrier()
6#define wmb() barrier()
7#define smp_mb() barrier()
8#define smp_rmb() barrier()
9#define smp_wmb() barrier()
10
11#define read_barrier_depends() do {} while (0)
12#define smp_read_barrier_depends() do {} while (0)
13
14#define set_mb(var, value) do {var = value; wmb(); } while (0)
15
16#endif /* _ASM_SCORE_BARRIER_H */
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index 72c103dae300..43715308b068 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -26,29 +26,14 @@
26#if defined(CONFIG_CPU_SH4A) || defined(CONFIG_CPU_SH5) 26#if defined(CONFIG_CPU_SH4A) || defined(CONFIG_CPU_SH5)
27#define mb() __asm__ __volatile__ ("synco": : :"memory") 27#define mb() __asm__ __volatile__ ("synco": : :"memory")
28#define rmb() mb() 28#define rmb() mb()
29#define wmb() __asm__ __volatile__ ("synco": : :"memory") 29#define wmb() mb()
30#define ctrl_barrier() __icbi(PAGE_OFFSET) 30#define ctrl_barrier() __icbi(PAGE_OFFSET)
31#define read_barrier_depends() do { } while(0)
32#else 31#else
33#define mb() __asm__ __volatile__ ("": : :"memory")
34#define rmb() mb()
35#define wmb() __asm__ __volatile__ ("": : :"memory")
36#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop") 32#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
37#define read_barrier_depends() do { } while(0)
38#endif
39
40#ifdef CONFIG_SMP
41#define smp_mb() mb()
42#define smp_rmb() rmb()
43#define smp_wmb() wmb()
44#define smp_read_barrier_depends() read_barrier_depends()
45#else
46#define smp_mb() barrier()
47#define smp_rmb() barrier()
48#define smp_wmb() barrier()
49#define smp_read_barrier_depends() do { } while(0)
50#endif 33#endif
51 34
52#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 35#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
53 36
37#include <asm-generic/barrier.h>
38
54#endif /* __ASM_SH_BARRIER_H */ 39#endif /* __ASM_SH_BARRIER_H */
diff --git a/arch/sparc/include/asm/barrier_32.h b/arch/sparc/include/asm/barrier_32.h
index c1b76654ee76..ae69eda288f4 100644
--- a/arch/sparc/include/asm/barrier_32.h
+++ b/arch/sparc/include/asm/barrier_32.h
@@ -1,15 +1,7 @@
1#ifndef __SPARC_BARRIER_H 1#ifndef __SPARC_BARRIER_H
2#define __SPARC_BARRIER_H 2#define __SPARC_BARRIER_H
3 3
4/* XXX Change this if we ever use a PSO mode kernel. */ 4#include <asm/processor.h> /* for nop() */
5#define mb() __asm__ __volatile__ ("" : : : "memory") 5#include <asm-generic/barrier.h>
6#define rmb() mb()
7#define wmb() mb()
8#define read_barrier_depends() do { } while(0)
9#define set_mb(__var, __value) do { __var = __value; mb(); } while(0)
10#define smp_mb() __asm__ __volatile__("":::"memory")
11#define smp_rmb() __asm__ __volatile__("":::"memory")
12#define smp_wmb() __asm__ __volatile__("":::"memory")
13#define smp_read_barrier_depends() do { } while(0)
14 6
15#endif /* !(__SPARC_BARRIER_H) */ 7#endif /* !(__SPARC_BARRIER_H) */
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 95d45986f908..b5aad964558e 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -53,4 +53,19 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
53 53
54#define smp_read_barrier_depends() do { } while(0) 54#define smp_read_barrier_depends() do { } while(0)
55 55
56#define smp_store_release(p, v) \
57do { \
58 compiletime_assert_atomic_type(*p); \
59 barrier(); \
60 ACCESS_ONCE(*p) = (v); \
61} while (0)
62
63#define smp_load_acquire(p) \
64({ \
65 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
66 compiletime_assert_atomic_type(*p); \
67 barrier(); \
68 ___p1; \
69})
70
56#endif /* !(__SPARC64_BARRIER_H) */ 71#endif /* !(__SPARC64_BARRIER_H) */
diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
index a9a73da5865d..b5a05d050a8f 100644
--- a/arch/tile/include/asm/barrier.h
+++ b/arch/tile/include/asm/barrier.h
@@ -22,59 +22,6 @@
22#include <arch/spr_def.h> 22#include <arch/spr_def.h>
23#include <asm/timex.h> 23#include <asm/timex.h>
24 24
25/*
26 * read_barrier_depends - Flush all pending reads that subsequents reads
27 * depend on.
28 *
29 * No data-dependent reads from memory-like regions are ever reordered
30 * over this barrier. All reads preceding this primitive are guaranteed
31 * to access memory (but not necessarily other CPUs' caches) before any
32 * reads following this primitive that depend on the data return by
33 * any of the preceding reads. This primitive is much lighter weight than
34 * rmb() on most CPUs, and is never heavier weight than is
35 * rmb().
36 *
37 * These ordering constraints are respected by both the local CPU
38 * and the compiler.
39 *
40 * Ordering is not guaranteed by anything other than these primitives,
41 * not even by data dependencies. See the documentation for
42 * memory_barrier() for examples and URLs to more information.
43 *
44 * For example, the following code would force ordering (the initial
45 * value of "a" is zero, "b" is one, and "p" is "&a"):
46 *
47 * <programlisting>
48 * CPU 0 CPU 1
49 *
50 * b = 2;
51 * memory_barrier();
52 * p = &b; q = p;
53 * read_barrier_depends();
54 * d = *q;
55 * </programlisting>
56 *
57 * because the read of "*q" depends on the read of "p" and these
58 * two reads are separated by a read_barrier_depends(). However,
59 * the following code, with the same initial values for "a" and "b":
60 *
61 * <programlisting>
62 * CPU 0 CPU 1
63 *
64 * a = 2;
65 * memory_barrier();
66 * b = 3; y = b;
67 * read_barrier_depends();
68 * x = a;
69 * </programlisting>
70 *
71 * does not enforce ordering, since there is no data dependency between
72 * the read of "a" and the read of "b". Therefore, on some CPUs, such
73 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
74 * in cases like this where there are no data dependencies.
75 */
76#define read_barrier_depends() do { } while (0)
77
78#define __sync() __insn_mf() 25#define __sync() __insn_mf()
79 26
80#include <hv/syscall_public.h> 27#include <hv/syscall_public.h>
@@ -125,20 +72,7 @@ mb_incoherent(void)
125#define mb() fast_mb() 72#define mb() fast_mb()
126#define iob() fast_iob() 73#define iob() fast_iob()
127 74
128#ifdef CONFIG_SMP 75#include <asm-generic/barrier.h>
129#define smp_mb() mb()
130#define smp_rmb() rmb()
131#define smp_wmb() wmb()
132#define smp_read_barrier_depends() read_barrier_depends()
133#else
134#define smp_mb() barrier()
135#define smp_rmb() barrier()
136#define smp_wmb() barrier()
137#define smp_read_barrier_depends() do { } while (0)
138#endif
139
140#define set_mb(var, value) \
141 do { var = value; mb(); } while (0)
142 76
143#endif /* !__ASSEMBLY__ */ 77#endif /* !__ASSEMBLY__ */
144#endif /* _ASM_TILE_BARRIER_H */ 78#endif /* _ASM_TILE_BARRIER_H */
diff --git a/arch/unicore32/include/asm/barrier.h b/arch/unicore32/include/asm/barrier.h
index a6620e5336b6..83d6a520f4bd 100644
--- a/arch/unicore32/include/asm/barrier.h
+++ b/arch/unicore32/include/asm/barrier.h
@@ -14,15 +14,6 @@
14#define dsb() __asm__ __volatile__ ("" : : : "memory") 14#define dsb() __asm__ __volatile__ ("" : : : "memory")
15#define dmb() __asm__ __volatile__ ("" : : : "memory") 15#define dmb() __asm__ __volatile__ ("" : : : "memory")
16 16
17#define mb() barrier() 17#include <asm-generic/barrier.h>
18#define rmb() barrier()
19#define wmb() barrier()
20#define smp_mb() barrier()
21#define smp_rmb() barrier()
22#define smp_wmb() barrier()
23#define read_barrier_depends() do { } while (0)
24#define smp_read_barrier_depends() do { } while (0)
25
26#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
27 18
28#endif /* __UNICORE_BARRIER_H__ */ 19#endif /* __UNICORE_BARRIER_H__ */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index c6cd358a1eec..04a48903b2eb 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -92,12 +92,53 @@
92#endif 92#endif
93#define smp_read_barrier_depends() read_barrier_depends() 93#define smp_read_barrier_depends() read_barrier_depends()
94#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 94#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
95#else 95#else /* !SMP */
96#define smp_mb() barrier() 96#define smp_mb() barrier()
97#define smp_rmb() barrier() 97#define smp_rmb() barrier()
98#define smp_wmb() barrier() 98#define smp_wmb() barrier()
99#define smp_read_barrier_depends() do { } while (0) 99#define smp_read_barrier_depends() do { } while (0)
100#define set_mb(var, value) do { var = value; barrier(); } while (0) 100#define set_mb(var, value) do { var = value; barrier(); } while (0)
101#endif /* SMP */
102
103#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
104
105/*
106 * For either of these options x86 doesn't have a strong TSO memory
107 * model and we should fall back to full barriers.
108 */
109
110#define smp_store_release(p, v) \
111do { \
112 compiletime_assert_atomic_type(*p); \
113 smp_mb(); \
114 ACCESS_ONCE(*p) = (v); \
115} while (0)
116
117#define smp_load_acquire(p) \
118({ \
119 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
120 compiletime_assert_atomic_type(*p); \
121 smp_mb(); \
122 ___p1; \
123})
124
125#else /* regular x86 TSO memory ordering */
126
127#define smp_store_release(p, v) \
128do { \
129 compiletime_assert_atomic_type(*p); \
130 barrier(); \
131 ACCESS_ONCE(*p) = (v); \
132} while (0)
133
134#define smp_load_acquire(p) \
135({ \
136 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
137 compiletime_assert_atomic_type(*p); \
138 barrier(); \
139 ___p1; \
140})
141
101#endif 142#endif
102 143
103/* 144/*
diff --git a/arch/xtensa/include/asm/barrier.h b/arch/xtensa/include/asm/barrier.h
index ef021677d536..e1ee6b51dfc5 100644
--- a/arch/xtensa/include/asm/barrier.h
+++ b/arch/xtensa/include/asm/barrier.h
@@ -9,21 +9,14 @@
9#ifndef _XTENSA_SYSTEM_H 9#ifndef _XTENSA_SYSTEM_H
10#define _XTENSA_SYSTEM_H 10#define _XTENSA_SYSTEM_H
11 11
12#define smp_read_barrier_depends() do { } while(0)
13#define read_barrier_depends() do { } while(0)
14
15#define mb() ({ __asm__ __volatile__("memw" : : : "memory"); }) 12#define mb() ({ __asm__ __volatile__("memw" : : : "memory"); })
16#define rmb() barrier() 13#define rmb() barrier()
17#define wmb() mb() 14#define wmb() mb()
18 15
19#ifdef CONFIG_SMP 16#ifdef CONFIG_SMP
20#error smp_* not defined 17#error smp_* not defined
21#else
22#define smp_mb() barrier()
23#define smp_rmb() barrier()
24#define smp_wmb() barrier()
25#endif 18#endif
26 19
27#define set_mb(var, value) do { var = value; mb(); } while (0) 20#include <asm-generic/barrier.h>
28 21
29#endif /* _XTENSA_SYSTEM_H */ 22#endif /* _XTENSA_SYSTEM_H */
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 639d7a4d033b..6f692f8ac664 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -1,4 +1,5 @@
1/* Generic barrier definitions, based on MN10300 definitions. 1/*
2 * Generic barrier definitions, originally based on MN10300 definitions.
2 * 3 *
3 * It should be possible to use these on really simple architectures, 4 * It should be possible to use these on really simple architectures,
4 * but it serves more as a starting point for new ports. 5 * but it serves more as a starting point for new ports.
@@ -16,35 +17,65 @@
16 17
17#ifndef __ASSEMBLY__ 18#ifndef __ASSEMBLY__
18 19
19#define nop() asm volatile ("nop") 20#include <linux/compiler.h>
21
22#ifndef nop
23#define nop() asm volatile ("nop")
24#endif
20 25
21/* 26/*
22 * Force strict CPU ordering. 27 * Force strict CPU ordering. And yes, this is required on UP too when we're
23 * And yes, this is required on UP too when we're talking 28 * talking to devices.
24 * to devices.
25 * 29 *
26 * This implementation only contains a compiler barrier. 30 * Fall back to compiler barriers if nothing better is provided.
27 */ 31 */
28 32
29#define mb() asm volatile ("": : :"memory") 33#ifndef mb
34#define mb() barrier()
35#endif
36
37#ifndef rmb
30#define rmb() mb() 38#define rmb() mb()
31#define wmb() asm volatile ("": : :"memory") 39#endif
40
41#ifndef wmb
42#define wmb() mb()
43#endif
44
45#ifndef read_barrier_depends
46#define read_barrier_depends() do { } while (0)
47#endif
32 48
33#ifdef CONFIG_SMP 49#ifdef CONFIG_SMP
34#define smp_mb() mb() 50#define smp_mb() mb()
35#define smp_rmb() rmb() 51#define smp_rmb() rmb()
36#define smp_wmb() wmb() 52#define smp_wmb() wmb()
53#define smp_read_barrier_depends() read_barrier_depends()
37#else 54#else
38#define smp_mb() barrier() 55#define smp_mb() barrier()
39#define smp_rmb() barrier() 56#define smp_rmb() barrier()
40#define smp_wmb() barrier() 57#define smp_wmb() barrier()
58#define smp_read_barrier_depends() do { } while (0)
59#endif
60
61#ifndef set_mb
62#define set_mb(var, value) do { (var) = (value); mb(); } while (0)
41#endif 63#endif
42 64
43#define set_mb(var, value) do { var = value; mb(); } while (0) 65#define smp_store_release(p, v) \
44#define set_wmb(var, value) do { var = value; wmb(); } while (0) 66do { \
67 compiletime_assert_atomic_type(*p); \
68 smp_mb(); \
69 ACCESS_ONCE(*p) = (v); \
70} while (0)
45 71
46#define read_barrier_depends() do {} while (0) 72#define smp_load_acquire(p) \
47#define smp_read_barrier_depends() do {} while (0) 73({ \
74 typeof(*p) ___p1 = ACCESS_ONCE(*p); \
75 compiletime_assert_atomic_type(*p); \
76 smp_mb(); \
77 ___p1; \
78})
48 79
49#endif /* !__ASSEMBLY__ */ 80#endif /* !__ASSEMBLY__ */
50#endif /* __ASM_GENERIC_BARRIER_H */ 81#endif /* __ASM_GENERIC_BARRIER_H */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 92669cd182a6..fe7a686dfd8d 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -298,6 +298,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
298# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) 298# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
299#endif 299#endif
300 300
301/* Is this type a native word size -- useful for atomic operations */
302#ifndef __native_word
303# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
304#endif
305
301/* Compile time object size, -1 for unknown */ 306/* Compile time object size, -1 for unknown */
302#ifndef __compiletime_object_size 307#ifndef __compiletime_object_size
303# define __compiletime_object_size(obj) -1 308# define __compiletime_object_size(obj) -1
@@ -337,6 +342,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
337#define compiletime_assert(condition, msg) \ 342#define compiletime_assert(condition, msg) \
338 _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) 343 _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
339 344
345#define compiletime_assert_atomic_type(t) \
346 compiletime_assert(__native_word(t), \
347 "Need native word sized stores/loads for atomicity.")
348
340/* 349/*
341 * Prevent the compiler from merging or refetching accesses. The compiler 350 * Prevent the compiler from merging or refetching accesses. The compiler
342 * is also forbidden from reordering successive instances of ACCESS_ONCE(), 351 * is also forbidden from reordering successive instances of ACCESS_ONCE(),
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 75f34949d9ab..3f2867ff0ced 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -130,6 +130,16 @@ do { \
130#define smp_mb__before_spinlock() smp_wmb() 130#define smp_mb__before_spinlock() smp_wmb()
131#endif 131#endif
132 132
133/*
134 * Place this after a lock-acquisition primitive to guarantee that
135 * an UNLOCK+LOCK pair act as a full barrier. This guarantee applies
136 * if the UNLOCK and LOCK are executed by the same CPU or if the
137 * UNLOCK and LOCK operate on the same lock variable.
138 */
139#ifndef smp_mb__after_unlock_lock
140#define smp_mb__after_unlock_lock() do { } while (0)
141#endif
142
133/** 143/**
134 * raw_spin_unlock_wait - wait until the spinlock gets unlocked 144 * raw_spin_unlock_wait - wait until the spinlock gets unlocked
135 * @lock: the spinlock in question. 145 * @lock: the spinlock in question.
diff --git a/kernel/futex.c b/kernel/futex.c
index f6ff0191ecf7..1ddc4498f1e1 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -63,14 +63,101 @@
63#include <linux/sched/rt.h> 63#include <linux/sched/rt.h>
64#include <linux/hugetlb.h> 64#include <linux/hugetlb.h>
65#include <linux/freezer.h> 65#include <linux/freezer.h>
66#include <linux/bootmem.h>
66 67
67#include <asm/futex.h> 68#include <asm/futex.h>
68 69
69#include "locking/rtmutex_common.h" 70#include "locking/rtmutex_common.h"
70 71
71int __read_mostly futex_cmpxchg_enabled; 72/*
73 * Basic futex operation and ordering guarantees:
74 *
75 * The waiter reads the futex value in user space and calls
76 * futex_wait(). This function computes the hash bucket and acquires
77 * the hash bucket lock. After that it reads the futex user space value
78 * again and verifies that the data has not changed. If it has not changed
79 * it enqueues itself into the hash bucket, releases the hash bucket lock
80 * and schedules.
81 *
82 * The waker side modifies the user space value of the futex and calls
83 * futex_wake(). This function computes the hash bucket and acquires the
84 * hash bucket lock. Then it looks for waiters on that futex in the hash
85 * bucket and wakes them.
86 *
87 * In futex wake up scenarios where no tasks are blocked on a futex, taking
88 * the hb spinlock can be avoided and simply return. In order for this
89 * optimization to work, ordering guarantees must exist so that the waiter
90 * being added to the list is acknowledged when the list is concurrently being
91 * checked by the waker, avoiding scenarios like the following:
92 *
93 * CPU 0 CPU 1
94 * val = *futex;
95 * sys_futex(WAIT, futex, val);
96 * futex_wait(futex, val);
97 * uval = *futex;
98 * *futex = newval;
99 * sys_futex(WAKE, futex);
100 * futex_wake(futex);
101 * if (queue_empty())
102 * return;
103 * if (uval == val)
104 * lock(hash_bucket(futex));
105 * queue();
106 * unlock(hash_bucket(futex));
107 * schedule();
108 *
109 * This would cause the waiter on CPU 0 to wait forever because it
110 * missed the transition of the user space value from val to newval
111 * and the waker did not find the waiter in the hash bucket queue.
112 *
113 * The correct serialization ensures that a waiter either observes
114 * the changed user space value before blocking or is woken by a
115 * concurrent waker:
116 *
117 * CPU 0 CPU 1
118 * val = *futex;
119 * sys_futex(WAIT, futex, val);
120 * futex_wait(futex, val);
121 *
122 * waiters++;
123 * mb(); (A) <-- paired with -.
124 * |
125 * lock(hash_bucket(futex)); |
126 * |
127 * uval = *futex; |
128 * | *futex = newval;
129 * | sys_futex(WAKE, futex);
130 * | futex_wake(futex);
131 * |
132 * `-------> mb(); (B)
133 * if (uval == val)
134 * queue();
135 * unlock(hash_bucket(futex));
136 * schedule(); if (waiters)
137 * lock(hash_bucket(futex));
138 * wake_waiters(futex);
139 * unlock(hash_bucket(futex));
140 *
141 * Where (A) orders the waiters increment and the futex value read -- this
142 * is guaranteed by the head counter in the hb spinlock; and where (B)
143 * orders the write to futex and the waiters read -- this is done by the
144 * barriers in get_futex_key_refs(), through either ihold or atomic_inc,
145 * depending on the futex type.
146 *
147 * This yields the following case (where X:=waiters, Y:=futex):
148 *
149 * X = Y = 0
150 *
151 * w[X]=1 w[Y]=1
152 * MB MB
153 * r[Y]=y r[X]=x
154 *
155 * Which guarantees that x==0 && y==0 is impossible; which translates back into
156 * the guarantee that we cannot both miss the futex variable change and the
157 * enqueue.
158 */
72 159
73#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) 160int __read_mostly futex_cmpxchg_enabled;
74 161
75/* 162/*
76 * Futex flags used to encode options to functions and preserve them across 163 * Futex flags used to encode options to functions and preserve them across
@@ -149,9 +236,41 @@ static const struct futex_q futex_q_init = {
149struct futex_hash_bucket { 236struct futex_hash_bucket {
150 spinlock_t lock; 237 spinlock_t lock;
151 struct plist_head chain; 238 struct plist_head chain;
152}; 239} ____cacheline_aligned_in_smp;
153 240
154static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; 241static unsigned long __read_mostly futex_hashsize;
242
243static struct futex_hash_bucket *futex_queues;
244
245static inline void futex_get_mm(union futex_key *key)
246{
247 atomic_inc(&key->private.mm->mm_count);
248 /*
249 * Ensure futex_get_mm() implies a full barrier such that
250 * get_futex_key() implies a full barrier. This is relied upon
251 * as full barrier (B), see the ordering comment above.
252 */
253 smp_mb__after_atomic_inc();
254}
255
256static inline bool hb_waiters_pending(struct futex_hash_bucket *hb)
257{
258#ifdef CONFIG_SMP
259 /*
260 * Tasks trying to enter the critical region are most likely
261 * potential waiters that will be added to the plist. Ensure
262 * that wakers won't miss to-be-slept tasks in the window between
263 * the wait call and the actual plist_add.
264 */
265 if (spin_is_locked(&hb->lock))
266 return true;
267 smp_rmb(); /* Make sure we check the lock state first */
268
269 return !plist_head_empty(&hb->chain);
270#else
271 return true;
272#endif
273}
155 274
156/* 275/*
157 * We hash on the keys returned from get_futex_key (see below). 276 * We hash on the keys returned from get_futex_key (see below).
@@ -161,7 +280,7 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key)
161 u32 hash = jhash2((u32*)&key->both.word, 280 u32 hash = jhash2((u32*)&key->both.word,
162 (sizeof(key->both.word)+sizeof(key->both.ptr))/4, 281 (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
163 key->both.offset); 282 key->both.offset);
164 return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)]; 283 return &futex_queues[hash & (futex_hashsize - 1)];
165} 284}
166 285
167/* 286/*
@@ -187,10 +306,10 @@ static void get_futex_key_refs(union futex_key *key)
187 306
188 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { 307 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
189 case FUT_OFF_INODE: 308 case FUT_OFF_INODE:
190 ihold(key->shared.inode); 309 ihold(key->shared.inode); /* implies MB (B) */
191 break; 310 break;
192 case FUT_OFF_MMSHARED: 311 case FUT_OFF_MMSHARED:
193 atomic_inc(&key->private.mm->mm_count); 312 futex_get_mm(key); /* implies MB (B) */
194 break; 313 break;
195 } 314 }
196} 315}
@@ -264,7 +383,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
264 if (!fshared) { 383 if (!fshared) {
265 key->private.mm = mm; 384 key->private.mm = mm;
266 key->private.address = address; 385 key->private.address = address;
267 get_futex_key_refs(key); 386 get_futex_key_refs(key); /* implies MB (B) */
268 return 0; 387 return 0;
269 } 388 }
270 389
@@ -371,7 +490,7 @@ again:
371 key->shared.pgoff = basepage_index(page); 490 key->shared.pgoff = basepage_index(page);
372 } 491 }
373 492
374 get_futex_key_refs(key); 493 get_futex_key_refs(key); /* implies MB (B) */
375 494
376out: 495out:
377 unlock_page(page_head); 496 unlock_page(page_head);
@@ -598,13 +717,10 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
598{ 717{
599 struct futex_pi_state *pi_state = NULL; 718 struct futex_pi_state *pi_state = NULL;
600 struct futex_q *this, *next; 719 struct futex_q *this, *next;
601 struct plist_head *head;
602 struct task_struct *p; 720 struct task_struct *p;
603 pid_t pid = uval & FUTEX_TID_MASK; 721 pid_t pid = uval & FUTEX_TID_MASK;
604 722
605 head = &hb->chain; 723 plist_for_each_entry_safe(this, next, &hb->chain, list) {
606
607 plist_for_each_entry_safe(this, next, head, list) {
608 if (match_futex(&this->key, key)) { 724 if (match_futex(&this->key, key)) {
609 /* 725 /*
610 * Another waiter already exists - bump up 726 * Another waiter already exists - bump up
@@ -986,7 +1102,6 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
986{ 1102{
987 struct futex_hash_bucket *hb; 1103 struct futex_hash_bucket *hb;
988 struct futex_q *this, *next; 1104 struct futex_q *this, *next;
989 struct plist_head *head;
990 union futex_key key = FUTEX_KEY_INIT; 1105 union futex_key key = FUTEX_KEY_INIT;
991 int ret; 1106 int ret;
992 1107
@@ -998,10 +1113,14 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
998 goto out; 1113 goto out;
999 1114
1000 hb = hash_futex(&key); 1115 hb = hash_futex(&key);
1116
1117 /* Make sure we really have tasks to wakeup */
1118 if (!hb_waiters_pending(hb))
1119 goto out_put_key;
1120
1001 spin_lock(&hb->lock); 1121 spin_lock(&hb->lock);
1002 head = &hb->chain;
1003 1122
1004 plist_for_each_entry_safe(this, next, head, list) { 1123 plist_for_each_entry_safe(this, next, &hb->chain, list) {
1005 if (match_futex (&this->key, &key)) { 1124 if (match_futex (&this->key, &key)) {
1006 if (this->pi_state || this->rt_waiter) { 1125 if (this->pi_state || this->rt_waiter) {
1007 ret = -EINVAL; 1126 ret = -EINVAL;
@@ -1019,6 +1138,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
1019 } 1138 }
1020 1139
1021 spin_unlock(&hb->lock); 1140 spin_unlock(&hb->lock);
1141out_put_key:
1022 put_futex_key(&key); 1142 put_futex_key(&key);
1023out: 1143out:
1024 return ret; 1144 return ret;
@@ -1034,7 +1154,6 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1034{ 1154{
1035 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; 1155 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1036 struct futex_hash_bucket *hb1, *hb2; 1156 struct futex_hash_bucket *hb1, *hb2;
1037 struct plist_head *head;
1038 struct futex_q *this, *next; 1157 struct futex_q *this, *next;
1039 int ret, op_ret; 1158 int ret, op_ret;
1040 1159
@@ -1082,9 +1201,7 @@ retry_private:
1082 goto retry; 1201 goto retry;
1083 } 1202 }
1084 1203
1085 head = &hb1->chain; 1204 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1086
1087 plist_for_each_entry_safe(this, next, head, list) {
1088 if (match_futex (&this->key, &key1)) { 1205 if (match_futex (&this->key, &key1)) {
1089 if (this->pi_state || this->rt_waiter) { 1206 if (this->pi_state || this->rt_waiter) {
1090 ret = -EINVAL; 1207 ret = -EINVAL;
@@ -1097,10 +1214,8 @@ retry_private:
1097 } 1214 }
1098 1215
1099 if (op_ret > 0) { 1216 if (op_ret > 0) {
1100 head = &hb2->chain;
1101
1102 op_ret = 0; 1217 op_ret = 0;
1103 plist_for_each_entry_safe(this, next, head, list) { 1218 plist_for_each_entry_safe(this, next, &hb2->chain, list) {
1104 if (match_futex (&this->key, &key2)) { 1219 if (match_futex (&this->key, &key2)) {
1105 if (this->pi_state || this->rt_waiter) { 1220 if (this->pi_state || this->rt_waiter) {
1106 ret = -EINVAL; 1221 ret = -EINVAL;
@@ -1270,7 +1385,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1270 int drop_count = 0, task_count = 0, ret; 1385 int drop_count = 0, task_count = 0, ret;
1271 struct futex_pi_state *pi_state = NULL; 1386 struct futex_pi_state *pi_state = NULL;
1272 struct futex_hash_bucket *hb1, *hb2; 1387 struct futex_hash_bucket *hb1, *hb2;
1273 struct plist_head *head1;
1274 struct futex_q *this, *next; 1388 struct futex_q *this, *next;
1275 u32 curval2; 1389 u32 curval2;
1276 1390
@@ -1393,8 +1507,7 @@ retry_private:
1393 } 1507 }
1394 } 1508 }
1395 1509
1396 head1 = &hb1->chain; 1510 plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1397 plist_for_each_entry_safe(this, next, head1, list) {
1398 if (task_count - nr_wake >= nr_requeue) 1511 if (task_count - nr_wake >= nr_requeue)
1399 break; 1512 break;
1400 1513
@@ -1489,12 +1602,12 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1489 hb = hash_futex(&q->key); 1602 hb = hash_futex(&q->key);
1490 q->lock_ptr = &hb->lock; 1603 q->lock_ptr = &hb->lock;
1491 1604
1492 spin_lock(&hb->lock); 1605 spin_lock(&hb->lock); /* implies MB (A) */
1493 return hb; 1606 return hb;
1494} 1607}
1495 1608
1496static inline void 1609static inline void
1497queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) 1610queue_unlock(struct futex_hash_bucket *hb)
1498 __releases(&hb->lock) 1611 __releases(&hb->lock)
1499{ 1612{
1500 spin_unlock(&hb->lock); 1613 spin_unlock(&hb->lock);
@@ -1867,7 +1980,7 @@ retry_private:
1867 ret = get_futex_value_locked(&uval, uaddr); 1980 ret = get_futex_value_locked(&uval, uaddr);
1868 1981
1869 if (ret) { 1982 if (ret) {
1870 queue_unlock(q, *hb); 1983 queue_unlock(*hb);
1871 1984
1872 ret = get_user(uval, uaddr); 1985 ret = get_user(uval, uaddr);
1873 if (ret) 1986 if (ret)
@@ -1881,7 +1994,7 @@ retry_private:
1881 } 1994 }
1882 1995
1883 if (uval != val) { 1996 if (uval != val) {
1884 queue_unlock(q, *hb); 1997 queue_unlock(*hb);
1885 ret = -EWOULDBLOCK; 1998 ret = -EWOULDBLOCK;
1886 } 1999 }
1887 2000
@@ -2029,7 +2142,7 @@ retry_private:
2029 * Task is exiting and we just wait for the 2142 * Task is exiting and we just wait for the
2030 * exit to complete. 2143 * exit to complete.
2031 */ 2144 */
2032 queue_unlock(&q, hb); 2145 queue_unlock(hb);
2033 put_futex_key(&q.key); 2146 put_futex_key(&q.key);
2034 cond_resched(); 2147 cond_resched();
2035 goto retry; 2148 goto retry;
@@ -2081,7 +2194,7 @@ retry_private:
2081 goto out_put_key; 2194 goto out_put_key;
2082 2195
2083out_unlock_put_key: 2196out_unlock_put_key:
2084 queue_unlock(&q, hb); 2197 queue_unlock(hb);
2085 2198
2086out_put_key: 2199out_put_key:
2087 put_futex_key(&q.key); 2200 put_futex_key(&q.key);
@@ -2091,7 +2204,7 @@ out:
2091 return ret != -EINTR ? ret : -ERESTARTNOINTR; 2204 return ret != -EINTR ? ret : -ERESTARTNOINTR;
2092 2205
2093uaddr_faulted: 2206uaddr_faulted:
2094 queue_unlock(&q, hb); 2207 queue_unlock(hb);
2095 2208
2096 ret = fault_in_user_writeable(uaddr); 2209 ret = fault_in_user_writeable(uaddr);
2097 if (ret) 2210 if (ret)
@@ -2113,7 +2226,6 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2113{ 2226{
2114 struct futex_hash_bucket *hb; 2227 struct futex_hash_bucket *hb;
2115 struct futex_q *this, *next; 2228 struct futex_q *this, *next;
2116 struct plist_head *head;
2117 union futex_key key = FUTEX_KEY_INIT; 2229 union futex_key key = FUTEX_KEY_INIT;
2118 u32 uval, vpid = task_pid_vnr(current); 2230 u32 uval, vpid = task_pid_vnr(current);
2119 int ret; 2231 int ret;
@@ -2153,9 +2265,7 @@ retry:
2153 * Ok, other tasks may need to be woken up - check waiters 2265 * Ok, other tasks may need to be woken up - check waiters
2154 * and do the wakeup if necessary: 2266 * and do the wakeup if necessary:
2155 */ 2267 */
2156 head = &hb->chain; 2268 plist_for_each_entry_safe(this, next, &hb->chain, list) {
2157
2158 plist_for_each_entry_safe(this, next, head, list) {
2159 if (!match_futex (&this->key, &key)) 2269 if (!match_futex (&this->key, &key))
2160 continue; 2270 continue;
2161 ret = wake_futex_pi(uaddr, uval, this); 2271 ret = wake_futex_pi(uaddr, uval, this);
@@ -2734,8 +2844,21 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2734static int __init futex_init(void) 2844static int __init futex_init(void)
2735{ 2845{
2736 u32 curval; 2846 u32 curval;
2737 int i; 2847 unsigned int futex_shift;
2848 unsigned long i;
2849
2850#if CONFIG_BASE_SMALL
2851 futex_hashsize = 16;
2852#else
2853 futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
2854#endif
2738 2855
2856 futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
2857 futex_hashsize, 0,
2858 futex_hashsize < 256 ? HASH_SMALL : 0,
2859 &futex_shift, NULL,
2860 futex_hashsize, futex_hashsize);
2861 futex_hashsize = 1UL << futex_shift;
2739 /* 2862 /*
2740 * This will fail and we want it. Some arch implementations do 2863 * This will fail and we want it. Some arch implementations do
2741 * runtime detection of the futex_atomic_cmpxchg_inatomic() 2864 * runtime detection of the futex_atomic_cmpxchg_inatomic()
@@ -2749,7 +2872,7 @@ static int __init futex_init(void)
2749 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) 2872 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2750 futex_cmpxchg_enabled = 1; 2873 futex_cmpxchg_enabled = 1;
2751 2874
2752 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { 2875 for (i = 0; i < futex_hashsize; i++) {
2753 plist_head_init(&futex_queues[i].chain); 2876 plist_head_init(&futex_queues[i].chain);
2754 spin_lock_init(&futex_queues[i].lock); 2877 spin_lock_init(&futex_queues[i].lock);
2755 } 2878 }
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 576ba756a32d..eb8a54783fa0 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -590,6 +590,7 @@ static int very_verbose(struct lock_class *class)
590/* 590/*
591 * Is this the address of a static object: 591 * Is this the address of a static object:
592 */ 592 */
593#ifdef __KERNEL__
593static int static_obj(void *obj) 594static int static_obj(void *obj)
594{ 595{
595 unsigned long start = (unsigned long) &_stext, 596 unsigned long start = (unsigned long) &_stext,
@@ -616,6 +617,7 @@ static int static_obj(void *obj)
616 */ 617 */
617 return is_module_address(addr) || is_module_percpu_address(addr); 618 return is_module_address(addr) || is_module_percpu_address(addr);
618} 619}
620#endif
619 621
620/* 622/*
621 * To make lock name printouts unique, we calculate a unique 623 * To make lock name printouts unique, we calculate a unique
@@ -4115,6 +4117,7 @@ void debug_check_no_locks_held(void)
4115} 4117}
4116EXPORT_SYMBOL_GPL(debug_check_no_locks_held); 4118EXPORT_SYMBOL_GPL(debug_check_no_locks_held);
4117 4119
4120#ifdef __KERNEL__
4118void debug_show_all_locks(void) 4121void debug_show_all_locks(void)
4119{ 4122{
4120 struct task_struct *g, *p; 4123 struct task_struct *g, *p;
@@ -4172,6 +4175,7 @@ retry:
4172 read_unlock(&tasklist_lock); 4175 read_unlock(&tasklist_lock);
4173} 4176}
4174EXPORT_SYMBOL_GPL(debug_show_all_locks); 4177EXPORT_SYMBOL_GPL(debug_show_all_locks);
4178#endif
4175 4179
4176/* 4180/*
4177 * Careful: only use this function if you are sure that 4181 * Careful: only use this function if you are sure that
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 7e3443fe1f48..faf6f5b53e77 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -75,7 +75,12 @@ void debug_mutex_unlock(struct mutex *lock)
75 return; 75 return;
76 76
77 DEBUG_LOCKS_WARN_ON(lock->magic != lock); 77 DEBUG_LOCKS_WARN_ON(lock->magic != lock);
78 DEBUG_LOCKS_WARN_ON(lock->owner != current); 78
79 if (!lock->owner)
80 DEBUG_LOCKS_WARN_ON(!lock->owner);
81 else
82 DEBUG_LOCKS_WARN_ON(lock->owner != current);
83
79 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); 84 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
80 mutex_clear_owner(lock); 85 mutex_clear_owner(lock);
81} 86}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index dd081987a8ec..a6205a05b5e4 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1133,8 +1133,10 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1133 * hold it, acquire the root rcu_node structure's lock in order to 1133 * hold it, acquire the root rcu_node structure's lock in order to
1134 * start one (if needed). 1134 * start one (if needed).
1135 */ 1135 */
1136 if (rnp != rnp_root) 1136 if (rnp != rnp_root) {
1137 raw_spin_lock(&rnp_root->lock); 1137 raw_spin_lock(&rnp_root->lock);
1138 smp_mb__after_unlock_lock();
1139 }
1138 1140
1139 /* 1141 /*
1140 * Get a new grace-period number. If there really is no grace 1142 * Get a new grace-period number. If there really is no grace
@@ -1354,6 +1356,7 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1354 local_irq_restore(flags); 1356 local_irq_restore(flags);
1355 return; 1357 return;
1356 } 1358 }
1359 smp_mb__after_unlock_lock();
1357 __note_gp_changes(rsp, rnp, rdp); 1360 __note_gp_changes(rsp, rnp, rdp);
1358 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1361 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1359} 1362}
@@ -1368,6 +1371,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1368 1371
1369 rcu_bind_gp_kthread(); 1372 rcu_bind_gp_kthread();
1370 raw_spin_lock_irq(&rnp->lock); 1373 raw_spin_lock_irq(&rnp->lock);
1374 smp_mb__after_unlock_lock();
1371 if (rsp->gp_flags == 0) { 1375 if (rsp->gp_flags == 0) {
1372 /* Spurious wakeup, tell caller to go back to sleep. */ 1376 /* Spurious wakeup, tell caller to go back to sleep. */
1373 raw_spin_unlock_irq(&rnp->lock); 1377 raw_spin_unlock_irq(&rnp->lock);
@@ -1409,6 +1413,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1409 */ 1413 */
1410 rcu_for_each_node_breadth_first(rsp, rnp) { 1414 rcu_for_each_node_breadth_first(rsp, rnp) {
1411 raw_spin_lock_irq(&rnp->lock); 1415 raw_spin_lock_irq(&rnp->lock);
1416 smp_mb__after_unlock_lock();
1412 rdp = this_cpu_ptr(rsp->rda); 1417 rdp = this_cpu_ptr(rsp->rda);
1413 rcu_preempt_check_blocked_tasks(rnp); 1418 rcu_preempt_check_blocked_tasks(rnp);
1414 rnp->qsmask = rnp->qsmaskinit; 1419 rnp->qsmask = rnp->qsmaskinit;
@@ -1463,6 +1468,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1463 /* Clear flag to prevent immediate re-entry. */ 1468 /* Clear flag to prevent immediate re-entry. */
1464 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 1469 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
1465 raw_spin_lock_irq(&rnp->lock); 1470 raw_spin_lock_irq(&rnp->lock);
1471 smp_mb__after_unlock_lock();
1466 rsp->gp_flags &= ~RCU_GP_FLAG_FQS; 1472 rsp->gp_flags &= ~RCU_GP_FLAG_FQS;
1467 raw_spin_unlock_irq(&rnp->lock); 1473 raw_spin_unlock_irq(&rnp->lock);
1468 } 1474 }
@@ -1480,6 +1486,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1480 struct rcu_node *rnp = rcu_get_root(rsp); 1486 struct rcu_node *rnp = rcu_get_root(rsp);
1481 1487
1482 raw_spin_lock_irq(&rnp->lock); 1488 raw_spin_lock_irq(&rnp->lock);
1489 smp_mb__after_unlock_lock();
1483 gp_duration = jiffies - rsp->gp_start; 1490 gp_duration = jiffies - rsp->gp_start;
1484 if (gp_duration > rsp->gp_max) 1491 if (gp_duration > rsp->gp_max)
1485 rsp->gp_max = gp_duration; 1492 rsp->gp_max = gp_duration;
@@ -1505,6 +1512,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1505 */ 1512 */
1506 rcu_for_each_node_breadth_first(rsp, rnp) { 1513 rcu_for_each_node_breadth_first(rsp, rnp) {
1507 raw_spin_lock_irq(&rnp->lock); 1514 raw_spin_lock_irq(&rnp->lock);
1515 smp_mb__after_unlock_lock();
1508 ACCESS_ONCE(rnp->completed) = rsp->gpnum; 1516 ACCESS_ONCE(rnp->completed) = rsp->gpnum;
1509 rdp = this_cpu_ptr(rsp->rda); 1517 rdp = this_cpu_ptr(rsp->rda);
1510 if (rnp == rdp->mynode) 1518 if (rnp == rdp->mynode)
@@ -1515,6 +1523,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1515 } 1523 }
1516 rnp = rcu_get_root(rsp); 1524 rnp = rcu_get_root(rsp);
1517 raw_spin_lock_irq(&rnp->lock); 1525 raw_spin_lock_irq(&rnp->lock);
1526 smp_mb__after_unlock_lock();
1518 rcu_nocb_gp_set(rnp, nocb); 1527 rcu_nocb_gp_set(rnp, nocb);
1519 1528
1520 rsp->completed = rsp->gpnum; /* Declare grace period done. */ 1529 rsp->completed = rsp->gpnum; /* Declare grace period done. */
@@ -1749,6 +1758,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
1749 rnp_c = rnp; 1758 rnp_c = rnp;
1750 rnp = rnp->parent; 1759 rnp = rnp->parent;
1751 raw_spin_lock_irqsave(&rnp->lock, flags); 1760 raw_spin_lock_irqsave(&rnp->lock, flags);
1761 smp_mb__after_unlock_lock();
1752 WARN_ON_ONCE(rnp_c->qsmask); 1762 WARN_ON_ONCE(rnp_c->qsmask);
1753 } 1763 }
1754 1764
@@ -1778,6 +1788,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1778 1788
1779 rnp = rdp->mynode; 1789 rnp = rdp->mynode;
1780 raw_spin_lock_irqsave(&rnp->lock, flags); 1790 raw_spin_lock_irqsave(&rnp->lock, flags);
1791 smp_mb__after_unlock_lock();
1781 if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum || 1792 if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum ||
1782 rnp->completed == rnp->gpnum) { 1793 rnp->completed == rnp->gpnum) {
1783 1794
@@ -1992,6 +2003,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1992 mask = rdp->grpmask; /* rnp->grplo is constant. */ 2003 mask = rdp->grpmask; /* rnp->grplo is constant. */
1993 do { 2004 do {
1994 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 2005 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
2006 smp_mb__after_unlock_lock();
1995 rnp->qsmaskinit &= ~mask; 2007 rnp->qsmaskinit &= ~mask;
1996 if (rnp->qsmaskinit != 0) { 2008 if (rnp->qsmaskinit != 0) {
1997 if (rnp != rdp->mynode) 2009 if (rnp != rdp->mynode)
@@ -2202,6 +2214,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
2202 cond_resched(); 2214 cond_resched();
2203 mask = 0; 2215 mask = 0;
2204 raw_spin_lock_irqsave(&rnp->lock, flags); 2216 raw_spin_lock_irqsave(&rnp->lock, flags);
2217 smp_mb__after_unlock_lock();
2205 if (!rcu_gp_in_progress(rsp)) { 2218 if (!rcu_gp_in_progress(rsp)) {
2206 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2219 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2207 return; 2220 return;
@@ -2231,6 +2244,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
2231 rnp = rcu_get_root(rsp); 2244 rnp = rcu_get_root(rsp);
2232 if (rnp->qsmask == 0) { 2245 if (rnp->qsmask == 0) {
2233 raw_spin_lock_irqsave(&rnp->lock, flags); 2246 raw_spin_lock_irqsave(&rnp->lock, flags);
2247 smp_mb__after_unlock_lock();
2234 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ 2248 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
2235 } 2249 }
2236} 2250}
@@ -2263,6 +2277,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
2263 2277
2264 /* Reached the root of the rcu_node tree, acquire lock. */ 2278 /* Reached the root of the rcu_node tree, acquire lock. */
2265 raw_spin_lock_irqsave(&rnp_old->lock, flags); 2279 raw_spin_lock_irqsave(&rnp_old->lock, flags);
2280 smp_mb__after_unlock_lock();
2266 raw_spin_unlock(&rnp_old->fqslock); 2281 raw_spin_unlock(&rnp_old->fqslock);
2267 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 2282 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
2268 rsp->n_force_qs_lh++; 2283 rsp->n_force_qs_lh++;
@@ -2378,6 +2393,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2378 struct rcu_node *rnp_root = rcu_get_root(rsp); 2393 struct rcu_node *rnp_root = rcu_get_root(rsp);
2379 2394
2380 raw_spin_lock(&rnp_root->lock); 2395 raw_spin_lock(&rnp_root->lock);
2396 smp_mb__after_unlock_lock();
2381 rcu_start_gp(rsp); 2397 rcu_start_gp(rsp);
2382 raw_spin_unlock(&rnp_root->lock); 2398 raw_spin_unlock(&rnp_root->lock);
2383 } else { 2399 } else {
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 08a765232432..506a7a97a2e2 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -204,6 +204,7 @@ static void rcu_preempt_note_context_switch(int cpu)
204 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); 204 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
205 rnp = rdp->mynode; 205 rnp = rdp->mynode;
206 raw_spin_lock_irqsave(&rnp->lock, flags); 206 raw_spin_lock_irqsave(&rnp->lock, flags);
207 smp_mb__after_unlock_lock();
207 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 208 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
208 t->rcu_blocked_node = rnp; 209 t->rcu_blocked_node = rnp;
209 210
@@ -312,6 +313,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
312 mask = rnp->grpmask; 313 mask = rnp->grpmask;
313 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 314 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
314 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ 315 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
316 smp_mb__after_unlock_lock();
315 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); 317 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
316} 318}
317 319
@@ -381,6 +383,7 @@ void rcu_read_unlock_special(struct task_struct *t)
381 for (;;) { 383 for (;;) {
382 rnp = t->rcu_blocked_node; 384 rnp = t->rcu_blocked_node;
383 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 385 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
386 smp_mb__after_unlock_lock();
384 if (rnp == t->rcu_blocked_node) 387 if (rnp == t->rcu_blocked_node)
385 break; 388 break;
386 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 389 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
@@ -605,6 +608,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
605 while (!list_empty(lp)) { 608 while (!list_empty(lp)) {
606 t = list_entry(lp->next, typeof(*t), rcu_node_entry); 609 t = list_entry(lp->next, typeof(*t), rcu_node_entry);
607 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 610 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
611 smp_mb__after_unlock_lock();
608 list_del(&t->rcu_node_entry); 612 list_del(&t->rcu_node_entry);
609 t->rcu_blocked_node = rnp_root; 613 t->rcu_blocked_node = rnp_root;
610 list_add(&t->rcu_node_entry, lp_root); 614 list_add(&t->rcu_node_entry, lp_root);
@@ -629,6 +633,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
629 * in this case. 633 * in this case.
630 */ 634 */
631 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 635 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
636 smp_mb__after_unlock_lock();
632 if (rnp_root->boost_tasks != NULL && 637 if (rnp_root->boost_tasks != NULL &&
633 rnp_root->boost_tasks != rnp_root->gp_tasks && 638 rnp_root->boost_tasks != rnp_root->gp_tasks &&
634 rnp_root->boost_tasks != rnp_root->exp_tasks) 639 rnp_root->boost_tasks != rnp_root->exp_tasks)
@@ -772,6 +777,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
772 unsigned long mask; 777 unsigned long mask;
773 778
774 raw_spin_lock_irqsave(&rnp->lock, flags); 779 raw_spin_lock_irqsave(&rnp->lock, flags);
780 smp_mb__after_unlock_lock();
775 for (;;) { 781 for (;;) {
776 if (!sync_rcu_preempt_exp_done(rnp)) { 782 if (!sync_rcu_preempt_exp_done(rnp)) {
777 raw_spin_unlock_irqrestore(&rnp->lock, flags); 783 raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -787,6 +793,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
787 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 793 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
788 rnp = rnp->parent; 794 rnp = rnp->parent;
789 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 795 raw_spin_lock(&rnp->lock); /* irqs already disabled */
796 smp_mb__after_unlock_lock();
790 rnp->expmask &= ~mask; 797 rnp->expmask &= ~mask;
791 } 798 }
792} 799}
@@ -806,6 +813,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
806 int must_wait = 0; 813 int must_wait = 0;
807 814
808 raw_spin_lock_irqsave(&rnp->lock, flags); 815 raw_spin_lock_irqsave(&rnp->lock, flags);
816 smp_mb__after_unlock_lock();
809 if (list_empty(&rnp->blkd_tasks)) { 817 if (list_empty(&rnp->blkd_tasks)) {
810 raw_spin_unlock_irqrestore(&rnp->lock, flags); 818 raw_spin_unlock_irqrestore(&rnp->lock, flags);
811 } else { 819 } else {
@@ -886,6 +894,7 @@ void synchronize_rcu_expedited(void)
886 /* Initialize ->expmask for all non-leaf rcu_node structures. */ 894 /* Initialize ->expmask for all non-leaf rcu_node structures. */
887 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { 895 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
888 raw_spin_lock_irqsave(&rnp->lock, flags); 896 raw_spin_lock_irqsave(&rnp->lock, flags);
897 smp_mb__after_unlock_lock();
889 rnp->expmask = rnp->qsmaskinit; 898 rnp->expmask = rnp->qsmaskinit;
890 raw_spin_unlock_irqrestore(&rnp->lock, flags); 899 raw_spin_unlock_irqrestore(&rnp->lock, flags);
891 } 900 }
@@ -1191,6 +1200,7 @@ static int rcu_boost(struct rcu_node *rnp)
1191 return 0; /* Nothing left to boost. */ 1200 return 0; /* Nothing left to boost. */
1192 1201
1193 raw_spin_lock_irqsave(&rnp->lock, flags); 1202 raw_spin_lock_irqsave(&rnp->lock, flags);
1203 smp_mb__after_unlock_lock();
1194 1204
1195 /* 1205 /*
1196 * Recheck under the lock: all tasks in need of boosting 1206 * Recheck under the lock: all tasks in need of boosting
@@ -1377,6 +1387,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1377 if (IS_ERR(t)) 1387 if (IS_ERR(t))
1378 return PTR_ERR(t); 1388 return PTR_ERR(t);
1379 raw_spin_lock_irqsave(&rnp->lock, flags); 1389 raw_spin_lock_irqsave(&rnp->lock, flags);
1390 smp_mb__after_unlock_lock();
1380 rnp->boost_kthread_task = t; 1391 rnp->boost_kthread_task = t;
1381 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1392 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1382 sp.sched_priority = RCU_BOOST_PRIO; 1393 sp.sched_priority = RCU_BOOST_PRIO;
@@ -1769,6 +1780,7 @@ static void rcu_prepare_for_idle(int cpu)
1769 continue; 1780 continue;
1770 rnp = rdp->mynode; 1781 rnp = rdp->mynode;
1771 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1782 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1783 smp_mb__after_unlock_lock();
1772 rcu_accelerate_cbs(rsp, rnp, rdp); 1784 rcu_accelerate_cbs(rsp, rnp, rdp);
1773 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1785 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1774 } 1786 }
@@ -2209,6 +2221,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2209 struct rcu_node *rnp = rdp->mynode; 2221 struct rcu_node *rnp = rdp->mynode;
2210 2222
2211 raw_spin_lock_irqsave(&rnp->lock, flags); 2223 raw_spin_lock_irqsave(&rnp->lock, flags);
2224 smp_mb__after_unlock_lock();
2212 c = rcu_start_future_gp(rnp, rdp); 2225 c = rcu_start_future_gp(rnp, rdp);
2213 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2226 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2214 2227
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 11025ccc06dd..9a4500e4c189 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -211,14 +211,48 @@ EXPORT_SYMBOL(local_bh_enable_ip);
211#define MAX_SOFTIRQ_TIME msecs_to_jiffies(2) 211#define MAX_SOFTIRQ_TIME msecs_to_jiffies(2)
212#define MAX_SOFTIRQ_RESTART 10 212#define MAX_SOFTIRQ_RESTART 10
213 213
214#ifdef CONFIG_TRACE_IRQFLAGS
215/*
216 * When we run softirqs from irq_exit() and thus on the hardirq stack we need
217 * to keep the lockdep irq context tracking as tight as possible in order to
218 * not miss-qualify lock contexts and miss possible deadlocks.
219 */
220
221static inline bool lockdep_softirq_start(void)
222{
223 bool in_hardirq = false;
224
225 if (trace_hardirq_context(current)) {
226 in_hardirq = true;
227 trace_hardirq_exit();
228 }
229
230 lockdep_softirq_enter();
231
232 return in_hardirq;
233}
234
235static inline void lockdep_softirq_end(bool in_hardirq)
236{
237 lockdep_softirq_exit();
238
239 if (in_hardirq)
240 trace_hardirq_enter();
241}
242#else
243static inline bool lockdep_softirq_start(void) { return false; }
244static inline void lockdep_softirq_end(bool in_hardirq) { }
245#endif
246
214asmlinkage void __do_softirq(void) 247asmlinkage void __do_softirq(void)
215{ 248{
216 struct softirq_action *h;
217 __u32 pending;
218 unsigned long end = jiffies + MAX_SOFTIRQ_TIME; 249 unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
219 int cpu;
220 unsigned long old_flags = current->flags; 250 unsigned long old_flags = current->flags;
221 int max_restart = MAX_SOFTIRQ_RESTART; 251 int max_restart = MAX_SOFTIRQ_RESTART;
252 struct softirq_action *h;
253 bool in_hardirq;
254 __u32 pending;
255 int cpu;
222 256
223 /* 257 /*
224 * Mask out PF_MEMALLOC s current task context is borrowed for the 258 * Mask out PF_MEMALLOC s current task context is borrowed for the
@@ -231,7 +265,7 @@ asmlinkage void __do_softirq(void)
231 account_irq_enter_time(current); 265 account_irq_enter_time(current);
232 266
233 __local_bh_disable(_RET_IP_, SOFTIRQ_OFFSET); 267 __local_bh_disable(_RET_IP_, SOFTIRQ_OFFSET);
234 lockdep_softirq_enter(); 268 in_hardirq = lockdep_softirq_start();
235 269
236 cpu = smp_processor_id(); 270 cpu = smp_processor_id();
237restart: 271restart:
@@ -278,16 +312,13 @@ restart:
278 wakeup_softirqd(); 312 wakeup_softirqd();
279 } 313 }
280 314
281 lockdep_softirq_exit(); 315 lockdep_softirq_end(in_hardirq);
282
283 account_irq_exit_time(current); 316 account_irq_exit_time(current);
284 __local_bh_enable(SOFTIRQ_OFFSET); 317 __local_bh_enable(SOFTIRQ_OFFSET);
285 WARN_ON_ONCE(in_interrupt()); 318 WARN_ON_ONCE(in_interrupt());
286 tsk_restore_flags(current, old_flags, PF_MEMALLOC); 319 tsk_restore_flags(current, old_flags, PF_MEMALLOC);
287} 320}
288 321
289
290
291asmlinkage void do_softirq(void) 322asmlinkage void do_softirq(void)
292{ 323{
293 __u32 pending; 324 __u32 pending;
@@ -375,13 +406,13 @@ void irq_exit(void)
375#endif 406#endif
376 407
377 account_irq_exit_time(current); 408 account_irq_exit_time(current);
378 trace_hardirq_exit();
379 preempt_count_sub(HARDIRQ_OFFSET); 409 preempt_count_sub(HARDIRQ_OFFSET);
380 if (!in_interrupt() && local_softirq_pending()) 410 if (!in_interrupt() && local_softirq_pending())
381 invoke_softirq(); 411 invoke_softirq();
382 412
383 tick_irq_exit(); 413 tick_irq_exit();
384 rcu_irq_exit(); 414 rcu_irq_exit();
415 trace_hardirq_exit(); /* must be last! */
385} 416}
386 417
387/* 418/*
diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile
new file mode 100644
index 000000000000..da8b7aa3d351
--- /dev/null
+++ b/tools/lib/lockdep/Makefile
@@ -0,0 +1,251 @@
1# liblockdep version
2LL_VERSION = 0
3LL_PATCHLEVEL = 0
4LL_EXTRAVERSION = 1
5
6# file format version
7FILE_VERSION = 1
8
9MAKEFLAGS += --no-print-directory
10
11
12# Makefiles suck: This macro sets a default value of $(2) for the
13# variable named by $(1), unless the variable has been set by
14# environment or command line. This is necessary for CC and AR
15# because make sets default values, so the simpler ?= approach
16# won't work as expected.
17define allow-override
18 $(if $(or $(findstring environment,$(origin $(1))),\
19 $(findstring command line,$(origin $(1)))),,\
20 $(eval $(1) = $(2)))
21endef
22
23# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
24$(call allow-override,CC,$(CROSS_COMPILE)gcc)
25$(call allow-override,AR,$(CROSS_COMPILE)ar)
26
27INSTALL = install
28
29# Use DESTDIR for installing into a different root directory.
30# This is useful for building a package. The program will be
31# installed in this directory as if it was the root directory.
32# Then the build tool can move it later.
33DESTDIR ?=
34DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
35
36prefix ?= /usr/local
37libdir_relative = lib
38libdir = $(prefix)/$(libdir_relative)
39bindir_relative = bin
40bindir = $(prefix)/$(bindir_relative)
41
42export DESTDIR DESTDIR_SQ INSTALL
43
44# copy a bit from Linux kbuild
45
46ifeq ("$(origin V)", "command line")
47 VERBOSE = $(V)
48endif
49ifndef VERBOSE
50 VERBOSE = 0
51endif
52
53ifeq ("$(origin O)", "command line")
54 BUILD_OUTPUT := $(O)
55endif
56
57ifeq ($(BUILD_SRC),)
58ifneq ($(BUILD_OUTPUT),)
59
60define build_output
61 $(if $(VERBOSE:1=),@)$(MAKE) -C $(BUILD_OUTPUT) \
62 BUILD_SRC=$(CURDIR) -f $(CURDIR)/Makefile $1
63endef
64
65saved-output := $(BUILD_OUTPUT)
66BUILD_OUTPUT := $(shell cd $(BUILD_OUTPUT) && /bin/pwd)
67$(if $(BUILD_OUTPUT),, \
68 $(error output directory "$(saved-output)" does not exist))
69
70all: sub-make
71
72gui: force
73 $(call build_output, all_cmd)
74
75$(filter-out gui,$(MAKECMDGOALS)): sub-make
76
77sub-make: force
78 $(call build_output, $(MAKECMDGOALS))
79
80
81# Leave processing to above invocation of make
82skip-makefile := 1
83
84endif # BUILD_OUTPUT
85endif # BUILD_SRC
86
87# We process the rest of the Makefile if this is the final invocation of make
88ifeq ($(skip-makefile),)
89
90srctree := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR))
91objtree := $(CURDIR)
92src := $(srctree)
93obj := $(objtree)
94
95export prefix libdir bindir src obj
96
97# Shell quotes
98libdir_SQ = $(subst ','\'',$(libdir))
99bindir_SQ = $(subst ','\'',$(bindir))
100
101LIB_FILE = liblockdep.a liblockdep.so
102BIN_FILE = lockdep
103
104CONFIG_INCLUDES =
105CONFIG_LIBS =
106CONFIG_FLAGS =
107
108OBJ = $@
109N =
110
111export Q VERBOSE
112
113LIBLOCKDEP_VERSION = $(LL_VERSION).$(LL_PATCHLEVEL).$(LL_EXTRAVERSION)
114
115INCLUDES = -I. -I/usr/local/include -I./uinclude $(CONFIG_INCLUDES)
116
117# Set compile option CFLAGS if not set elsewhere
118CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g
119
120override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
121
122ifeq ($(VERBOSE),1)
123 Q =
124 print_compile =
125 print_app_build =
126 print_fpic_compile =
127 print_shared_lib_compile =
128 print_install =
129else
130 Q = @
131 print_compile = echo ' CC '$(OBJ);
132 print_app_build = echo ' BUILD '$(OBJ);
133 print_fpic_compile = echo ' CC FPIC '$(OBJ);
134 print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ);
135 print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ);
136 print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2';
137endif
138
139do_fpic_compile = \
140 ($(print_fpic_compile) \
141 $(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@)
142
143do_app_build = \
144 ($(print_app_build) \
145 $(CC) $^ -rdynamic -o $@ $(CONFIG_LIBS) $(LIBS))
146
147do_compile_shared_library = \
148 ($(print_shared_lib_compile) \
149 $(CC) --shared $^ -o $@ -lpthread -ldl)
150
151do_build_static_lib = \
152 ($(print_static_lib_build) \
153 $(RM) $@; $(AR) rcs $@ $^)
154
155
156define do_compile
157 $(print_compile) \
158 $(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@;
159endef
160
161$(obj)/%.o: $(src)/%.c
162 $(Q)$(call do_compile)
163
164%.o: $(src)/%.c
165 $(Q)$(call do_compile)
166
167PEVENT_LIB_OBJS = common.o lockdep.o preload.o rbtree.o
168
169ALL_OBJS = $(PEVENT_LIB_OBJS)
170
171CMD_TARGETS = $(LIB_FILE)
172
173TARGETS = $(CMD_TARGETS)
174
175
176all: all_cmd
177
178all_cmd: $(CMD_TARGETS)
179
180liblockdep.so: $(PEVENT_LIB_OBJS)
181 $(Q)$(do_compile_shared_library)
182
183liblockdep.a: $(PEVENT_LIB_OBJS)
184 $(Q)$(do_build_static_lib)
185
186$(PEVENT_LIB_OBJS): %.o: $(src)/%.c
187 $(Q)$(do_fpic_compile)
188
189## make deps
190
191all_objs := $(sort $(ALL_OBJS))
192all_deps := $(all_objs:%.o=.%.d)
193
194# let .d file also depends on the source and header files
195define check_deps
196 @set -e; $(RM) $@; \
197 $(CC) -MM $(CFLAGS) $< > $@.$$$$; \
198 sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
199 $(RM) $@.$$$$
200endef
201
202$(all_deps): .%.d: $(src)/%.c
203 $(Q)$(call check_deps)
204
205$(all_objs) : %.o : .%.d
206
207dep_includes := $(wildcard $(all_deps))
208
209ifneq ($(dep_includes),)
210 include $(dep_includes)
211endif
212
213### Detect environment changes
214TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE)
215
216tags: force
217 $(RM) tags
218 find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
219 --regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/'
220
221TAGS: force
222 $(RM) TAGS
223 find . -name '*.[ch]' | xargs etags \
224 --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/'
225
226define do_install
227 $(print_install) \
228 if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
229 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
230 fi; \
231 $(INSTALL) $1 '$(DESTDIR_SQ)$2'
232endef
233
234install_lib: all_cmd
235 $(Q)$(call do_install,$(LIB_FILE),$(libdir_SQ))
236 $(Q)$(call do_install,$(BIN_FILE),$(bindir_SQ))
237
238install: install_lib
239
240clean:
241 $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d
242 $(RM) tags TAGS
243
244endif # skip-makefile
245
246PHONY += force
247force:
248
249# Declare the contents of the .PHONY variable as phony. We keep that
250# information in a variable so we can use it in if_changed and friends.
251.PHONY: $(PHONY)
diff --git a/tools/lib/lockdep/common.c b/tools/lib/lockdep/common.c
new file mode 100644
index 000000000000..8ef602f18a32
--- /dev/null
+++ b/tools/lib/lockdep/common.c
@@ -0,0 +1,33 @@
1#include <stddef.h>
2#include <stdbool.h>
3#include <linux/compiler.h>
4#include <linux/lockdep.h>
5#include <unistd.h>
6#include <sys/syscall.h>
7
8static __thread struct task_struct current_obj;
9
10/* lockdep wants these */
11bool debug_locks = true;
12bool debug_locks_silent;
13
14__attribute__((constructor)) static void liblockdep_init(void)
15{
16 lockdep_init();
17}
18
19__attribute__((destructor)) static void liblockdep_exit(void)
20{
21 debug_check_no_locks_held(&current_obj);
22}
23
24struct task_struct *__curr(void)
25{
26 if (current_obj.pid == 0) {
27 /* Makes lockdep output pretty */
28 prctl(PR_GET_NAME, current_obj.comm);
29 current_obj.pid = syscall(__NR_gettid);
30 }
31
32 return &current_obj;
33}
diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h
new file mode 100644
index 000000000000..0bda630027c3
--- /dev/null
+++ b/tools/lib/lockdep/include/liblockdep/common.h
@@ -0,0 +1,50 @@
1#ifndef _LIBLOCKDEP_COMMON_H
2#define _LIBLOCKDEP_COMMON_H
3
4#include <pthread.h>
5
6#define NR_LOCKDEP_CACHING_CLASSES 2
7#define MAX_LOCKDEP_SUBCLASSES 8UL
8
9#ifndef CALLER_ADDR0
10#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
11#endif
12
13#ifndef _RET_IP_
14#define _RET_IP_ CALLER_ADDR0
15#endif
16
17#ifndef _THIS_IP_
18#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
19#endif
20
21struct lockdep_subclass_key {
22 char __one_byte;
23};
24
25struct lock_class_key {
26 struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
27};
28
29struct lockdep_map {
30 struct lock_class_key *key;
31 struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
32 const char *name;
33#ifdef CONFIG_LOCK_STAT
34 int cpu;
35 unsigned long ip;
36#endif
37};
38
39void lockdep_init_map(struct lockdep_map *lock, const char *name,
40 struct lock_class_key *key, int subclass);
41void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
42 int trylock, int read, int check,
43 struct lockdep_map *nest_lock, unsigned long ip);
44void lock_release(struct lockdep_map *lock, int nested,
45 unsigned long ip);
46
47#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
48 { .name = (_name), .key = (void *)(_key), }
49
50#endif
diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h b/tools/lib/lockdep/include/liblockdep/mutex.h
new file mode 100644
index 000000000000..c342f7087147
--- /dev/null
+++ b/tools/lib/lockdep/include/liblockdep/mutex.h
@@ -0,0 +1,70 @@
1#ifndef _LIBLOCKDEP_MUTEX_H
2#define _LIBLOCKDEP_MUTEX_H
3
4#include <pthread.h>
5#include "common.h"
6
7struct liblockdep_pthread_mutex {
8 pthread_mutex_t mutex;
9 struct lockdep_map dep_map;
10};
11
12typedef struct liblockdep_pthread_mutex liblockdep_pthread_mutex_t;
13
14#define LIBLOCKDEP_PTHREAD_MUTEX_INITIALIZER(mtx) \
15 (const struct liblockdep_pthread_mutex) { \
16 .mutex = PTHREAD_MUTEX_INITIALIZER, \
17 .dep_map = STATIC_LOCKDEP_MAP_INIT(#mtx, &((&(mtx))->dep_map)), \
18}
19
20static inline int __mutex_init(liblockdep_pthread_mutex_t *lock,
21 const char *name,
22 struct lock_class_key *key,
23 const pthread_mutexattr_t *__mutexattr)
24{
25 lockdep_init_map(&lock->dep_map, name, key, 0);
26 return pthread_mutex_init(&lock->mutex, __mutexattr);
27}
28
29#define liblockdep_pthread_mutex_init(mutex, mutexattr) \
30({ \
31 static struct lock_class_key __key; \
32 \
33 __mutex_init((mutex), #mutex, &__key, (mutexattr)); \
34})
35
36static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock)
37{
38 lock_acquire(&lock->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_);
39 return pthread_mutex_lock(&lock->mutex);
40}
41
42static inline int liblockdep_pthread_mutex_unlock(liblockdep_pthread_mutex_t *lock)
43{
44 lock_release(&lock->dep_map, 0, (unsigned long)_RET_IP_);
45 return pthread_mutex_unlock(&lock->mutex);
46}
47
48static inline int liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *lock)
49{
50 lock_acquire(&lock->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_);
51 return pthread_mutex_trylock(&lock->mutex) == 0 ? 1 : 0;
52}
53
54static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t *lock)
55{
56 return pthread_mutex_destroy(&lock->mutex);
57}
58
59#ifdef __USE_LIBLOCKDEP
60
61#define pthread_mutex_t liblockdep_pthread_mutex_t
62#define pthread_mutex_init liblockdep_pthread_mutex_init
63#define pthread_mutex_lock liblockdep_pthread_mutex_lock
64#define pthread_mutex_unlock liblockdep_pthread_mutex_unlock
65#define pthread_mutex_trylock liblockdep_pthread_mutex_trylock
66#define pthread_mutex_destroy liblockdep_pthread_mutex_destroy
67
68#endif
69
70#endif
diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h b/tools/lib/lockdep/include/liblockdep/rwlock.h
new file mode 100644
index 000000000000..a680ab8c2e36
--- /dev/null
+++ b/tools/lib/lockdep/include/liblockdep/rwlock.h
@@ -0,0 +1,86 @@
1#ifndef _LIBLOCKDEP_RWLOCK_H
2#define _LIBLOCKDEP_RWLOCK_H
3
4#include <pthread.h>
5#include "common.h"
6
7struct liblockdep_pthread_rwlock {
8 pthread_rwlock_t rwlock;
9 struct lockdep_map dep_map;
10};
11
12typedef struct liblockdep_pthread_rwlock liblockdep_pthread_rwlock_t;
13
14#define LIBLOCKDEP_PTHREAD_RWLOCK_INITIALIZER(rwl) \
15 (struct liblockdep_pthread_rwlock) { \
16 .rwlock = PTHREAD_RWLOCK_INITIALIZER, \
17 .dep_map = STATIC_LOCKDEP_MAP_INIT(#rwl, &((&(rwl))->dep_map)), \
18}
19
20static inline int __rwlock_init(liblockdep_pthread_rwlock_t *lock,
21 const char *name,
22 struct lock_class_key *key,
23 const pthread_rwlockattr_t *attr)
24{
25 lockdep_init_map(&lock->dep_map, name, key, 0);
26
27 return pthread_rwlock_init(&lock->rwlock, attr);
28}
29
30#define liblockdep_pthread_rwlock_init(lock, attr) \
31({ \
32 static struct lock_class_key __key; \
33 \
34 __rwlock_init((lock), #lock, &__key, (attr)); \
35})
36
37static inline int liblockdep_pthread_rwlock_rdlock(liblockdep_pthread_rwlock_t *lock)
38{
39 lock_acquire(&lock->dep_map, 0, 0, 2, 2, NULL, (unsigned long)_RET_IP_);
40 return pthread_rwlock_rdlock(&lock->rwlock);
41
42}
43
44static inline int liblockdep_pthread_rwlock_unlock(liblockdep_pthread_rwlock_t *lock)
45{
46 lock_release(&lock->dep_map, 0, (unsigned long)_RET_IP_);
47 return pthread_rwlock_unlock(&lock->rwlock);
48}
49
50static inline int liblockdep_pthread_rwlock_wrlock(liblockdep_pthread_rwlock_t *lock)
51{
52 lock_acquire(&lock->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_);
53 return pthread_rwlock_wrlock(&lock->rwlock);
54}
55
56static inline int liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_t *lock)
57{
58 lock_acquire(&lock->dep_map, 0, 1, 2, 2, NULL, (unsigned long)_RET_IP_);
59 return pthread_rwlock_tryrdlock(&lock->rwlock) == 0 ? 1 : 0;
60}
61
62static inline int liblockdep_pthread_rwlock_trywlock(liblockdep_pthread_rwlock_t *lock)
63{
64 lock_acquire(&lock->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_);
65 return pthread_rwlock_trywlock(&lock->rwlock) == 0 ? 1 : 0;
66}
67
68static inline int liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock)
69{
70 return pthread_rwlock_destroy(&lock->rwlock);
71}
72
73#ifdef __USE_LIBLOCKDEP
74
75#define pthread_rwlock_t liblockdep_pthread_rwlock_t
76#define pthread_rwlock_init liblockdep_pthread_rwlock_init
77#define pthread_rwlock_rdlock liblockdep_pthread_rwlock_rdlock
78#define pthread_rwlock_unlock liblockdep_pthread_rwlock_unlock
79#define pthread_rwlock_wrlock liblockdep_pthread_rwlock_wrlock
80#define pthread_rwlock_tryrdlock liblockdep_pthread_rwlock_tryrdlock
81#define pthread_rwlock_trywlock liblockdep_pthread_rwlock_trywlock
82#define pthread_rwlock_destroy liblockdep_rwlock_destroy
83
84#endif
85
86#endif
diff --git a/tools/lib/lockdep/lockdep b/tools/lib/lockdep/lockdep
new file mode 100755
index 000000000000..49af9fe19f5b
--- /dev/null
+++ b/tools/lib/lockdep/lockdep
@@ -0,0 +1,3 @@
1#!/bin/bash
2
3LD_PRELOAD="./liblockdep.so $LD_PRELOAD" "$@"
diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c
new file mode 100644
index 000000000000..f42b7e9aa48f
--- /dev/null
+++ b/tools/lib/lockdep/lockdep.c
@@ -0,0 +1,2 @@
1#include <linux/lockdep.h>
2#include "../../../kernel/locking/lockdep.c"
diff --git a/tools/lib/lockdep/lockdep_internals.h b/tools/lib/lockdep/lockdep_internals.h
new file mode 100644
index 000000000000..29d0c954cc24
--- /dev/null
+++ b/tools/lib/lockdep/lockdep_internals.h
@@ -0,0 +1 @@
#include "../../../kernel/locking/lockdep_internals.h"
diff --git a/tools/lib/lockdep/lockdep_states.h b/tools/lib/lockdep/lockdep_states.h
new file mode 100644
index 000000000000..248d235efda9
--- /dev/null
+++ b/tools/lib/lockdep/lockdep_states.h
@@ -0,0 +1 @@
#include "../../../kernel/locking/lockdep_states.h"
diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
new file mode 100644
index 000000000000..f8465a811aa5
--- /dev/null
+++ b/tools/lib/lockdep/preload.c
@@ -0,0 +1,447 @@
1#define _GNU_SOURCE
2#include <pthread.h>
3#include <stdio.h>
4#include <dlfcn.h>
5#include <stdlib.h>
6#include <sysexits.h>
7#include "include/liblockdep/mutex.h"
8#include "../../../include/linux/rbtree.h"
9
10/**
11 * struct lock_lookup - liblockdep's view of a single unique lock
12 * @orig: pointer to the original pthread lock, used for lookups
13 * @dep_map: lockdep's dep_map structure
14 * @key: lockdep's key structure
15 * @node: rb-tree node used to store the lock in a global tree
16 * @name: a unique name for the lock
17 */
18struct lock_lookup {
19 void *orig; /* Original pthread lock, used for lookups */
20 struct lockdep_map dep_map; /* Since all locks are dynamic, we need
21 * a dep_map and a key for each lock */
22 /*
23 * Wait, there's no support for key classes? Yup :(
24 * Most big projects wrap the pthread api with their own calls to
25 * be compatible with different locking methods. This means that
26 * "classes" will be brokes since the function that creates all
27 * locks will point to a generic locking function instead of the
28 * actual code that wants to do the locking.
29 */
30 struct lock_class_key key;
31 struct rb_node node;
32#define LIBLOCKDEP_MAX_LOCK_NAME 22
33 char name[LIBLOCKDEP_MAX_LOCK_NAME];
34};
35
36/* This is where we store our locks */
37static struct rb_root locks = RB_ROOT;
38static pthread_rwlock_t locks_rwlock = PTHREAD_RWLOCK_INITIALIZER;
39
40/* pthread mutex API */
41
42#ifdef __GLIBC__
43extern int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr);
44extern int __pthread_mutex_lock(pthread_mutex_t *mutex);
45extern int __pthread_mutex_trylock(pthread_mutex_t *mutex);
46extern int __pthread_mutex_unlock(pthread_mutex_t *mutex);
47extern int __pthread_mutex_destroy(pthread_mutex_t *mutex);
48#else
49#define __pthread_mutex_init NULL
50#define __pthread_mutex_lock NULL
51#define __pthread_mutex_trylock NULL
52#define __pthread_mutex_unlock NULL
53#define __pthread_mutex_destroy NULL
54#endif
55static int (*ll_pthread_mutex_init)(pthread_mutex_t *mutex,
56 const pthread_mutexattr_t *attr) = __pthread_mutex_init;
57static int (*ll_pthread_mutex_lock)(pthread_mutex_t *mutex) = __pthread_mutex_lock;
58static int (*ll_pthread_mutex_trylock)(pthread_mutex_t *mutex) = __pthread_mutex_trylock;
59static int (*ll_pthread_mutex_unlock)(pthread_mutex_t *mutex) = __pthread_mutex_unlock;
60static int (*ll_pthread_mutex_destroy)(pthread_mutex_t *mutex) = __pthread_mutex_destroy;
61
62/* pthread rwlock API */
63
64#ifdef __GLIBC__
65extern int __pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr);
66extern int __pthread_rwlock_destroy(pthread_rwlock_t *rwlock);
67extern int __pthread_rwlock_wrlock(pthread_rwlock_t *rwlock);
68extern int __pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock);
69extern int __pthread_rwlock_rdlock(pthread_rwlock_t *rwlock);
70extern int __pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock);
71extern int __pthread_rwlock_unlock(pthread_rwlock_t *rwlock);
72#else
73#define __pthread_rwlock_init NULL
74#define __pthread_rwlock_destroy NULL
75#define __pthread_rwlock_wrlock NULL
76#define __pthread_rwlock_trywrlock NULL
77#define __pthread_rwlock_rdlock NULL
78#define __pthread_rwlock_tryrdlock NULL
79#define __pthread_rwlock_unlock NULL
80#endif
81
82static int (*ll_pthread_rwlock_init)(pthread_rwlock_t *rwlock,
83 const pthread_rwlockattr_t *attr) = __pthread_rwlock_init;
84static int (*ll_pthread_rwlock_destroy)(pthread_rwlock_t *rwlock) = __pthread_rwlock_destroy;
85static int (*ll_pthread_rwlock_rdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_rdlock;
86static int (*ll_pthread_rwlock_tryrdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_tryrdlock;
87static int (*ll_pthread_rwlock_trywrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_trywrlock;
88static int (*ll_pthread_rwlock_wrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_wrlock;
89static int (*ll_pthread_rwlock_unlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_unlock;
90
91enum { none, prepare, done, } __init_state;
92static void init_preload(void);
93static void try_init_preload(void)
94{
95 if (!__init_state != done)
96 init_preload();
97}
98
99static struct rb_node **__get_lock_node(void *lock, struct rb_node **parent)
100{
101 struct rb_node **node = &locks.rb_node;
102 struct lock_lookup *l;
103
104 *parent = NULL;
105
106 while (*node) {
107 l = rb_entry(*node, struct lock_lookup, node);
108
109 *parent = *node;
110 if (lock < l->orig)
111 node = &l->node.rb_left;
112 else if (lock > l->orig)
113 node = &l->node.rb_right;
114 else
115 return node;
116 }
117
118 return node;
119}
120
121#ifndef LIBLOCKDEP_STATIC_ENTRIES
122#define LIBLOCKDEP_STATIC_ENTRIES 1024
123#endif
124
125#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
126
127static struct lock_lookup __locks[LIBLOCKDEP_STATIC_ENTRIES];
128static int __locks_nr;
129
130static inline bool is_static_lock(struct lock_lookup *lock)
131{
132 return lock >= __locks && lock < __locks + ARRAY_SIZE(__locks);
133}
134
135static struct lock_lookup *alloc_lock(void)
136{
137 if (__init_state != done) {
138 /*
139 * Some programs attempt to initialize and use locks in their
140 * allocation path. This means that a call to malloc() would
141 * result in locks being initialized and locked.
142 *
143 * Why is it an issue for us? dlsym() below will try allocating
144 * to give us the original function. Since this allocation will
145 * result in a locking operations, we have to let pthread deal
146 * with it, but we can't! we don't have the pointer to the
147 * original API since we're inside dlsym() trying to get it
148 */
149
150 int idx = __locks_nr++;
151 if (idx >= ARRAY_SIZE(__locks)) {
152 fprintf(stderr,
153 "LOCKDEP error: insufficient LIBLOCKDEP_STATIC_ENTRIES\n");
154 exit(EX_UNAVAILABLE);
155 }
156 return __locks + idx;
157 }
158
159 return malloc(sizeof(struct lock_lookup));
160}
161
162static inline void free_lock(struct lock_lookup *lock)
163{
164 if (likely(!is_static_lock(lock)))
165 free(lock);
166}
167
168/**
169 * __get_lock - find or create a lock instance
170 * @lock: pointer to a pthread lock function
171 *
172 * Try to find an existing lock in the rbtree using the provided pointer. If
173 * one wasn't found - create it.
174 */
175static struct lock_lookup *__get_lock(void *lock)
176{
177 struct rb_node **node, *parent;
178 struct lock_lookup *l;
179
180 ll_pthread_rwlock_rdlock(&locks_rwlock);
181 node = __get_lock_node(lock, &parent);
182 ll_pthread_rwlock_unlock(&locks_rwlock);
183 if (*node) {
184 return rb_entry(*node, struct lock_lookup, node);
185 }
186
187 /* We didn't find the lock, let's create it */
188 l = alloc_lock();
189 if (l == NULL)
190 return NULL;
191
192 l->orig = lock;
193 /*
194 * Currently the name of the lock is the ptr value of the pthread lock,
195 * while not optimal, it makes debugging a bit easier.
196 *
197 * TODO: Get the real name of the lock using libdwarf
198 */
199 sprintf(l->name, "%p", lock);
200 lockdep_init_map(&l->dep_map, l->name, &l->key, 0);
201
202 ll_pthread_rwlock_wrlock(&locks_rwlock);
203 /* This might have changed since the last time we fetched it */
204 node = __get_lock_node(lock, &parent);
205 rb_link_node(&l->node, parent, node);
206 rb_insert_color(&l->node, &locks);
207 ll_pthread_rwlock_unlock(&locks_rwlock);
208
209 return l;
210}
211
212static void __del_lock(struct lock_lookup *lock)
213{
214 ll_pthread_rwlock_wrlock(&locks_rwlock);
215 rb_erase(&lock->node, &locks);
216 ll_pthread_rwlock_unlock(&locks_rwlock);
217 free_lock(lock);
218}
219
220int pthread_mutex_init(pthread_mutex_t *mutex,
221 const pthread_mutexattr_t *attr)
222{
223 int r;
224
225 /*
226 * We keep trying to init our preload module because there might be
227 * code in init sections that tries to touch locks before we are
228 * initialized, in that case we'll need to manually call preload
229 * to get us going.
230 *
231 * Funny enough, kernel's lockdep had the same issue, and used
232 * (almost) the same solution. See look_up_lock_class() in
233 * kernel/locking/lockdep.c for details.
234 */
235 try_init_preload();
236
237 r = ll_pthread_mutex_init(mutex, attr);
238 if (r == 0)
239 /*
240 * We do a dummy initialization here so that lockdep could
241 * warn us if something fishy is going on - such as
242 * initializing a held lock.
243 */
244 __get_lock(mutex);
245
246 return r;
247}
248
249int pthread_mutex_lock(pthread_mutex_t *mutex)
250{
251 int r;
252
253 try_init_preload();
254
255 lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 2, NULL,
256 (unsigned long)_RET_IP_);
257 /*
258 * Here's the thing with pthread mutexes: unlike the kernel variant,
259 * they can fail.
260 *
261 * This means that the behaviour here is a bit different from what's
262 * going on in the kernel: there we just tell lockdep that we took the
263 * lock before actually taking it, but here we must deal with the case
264 * that locking failed.
265 *
266 * To do that we'll "release" the lock if locking failed - this way
267 * we'll get lockdep doing the correct checks when we try to take
268 * the lock, and if that fails - we'll be back to the correct
269 * state by releasing it.
270 */
271 r = ll_pthread_mutex_lock(mutex);
272 if (r)
273 lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_);
274
275 return r;
276}
277
278int pthread_mutex_trylock(pthread_mutex_t *mutex)
279{
280 int r;
281
282 try_init_preload();
283
284 lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_);
285 r = ll_pthread_mutex_trylock(mutex);
286 if (r)
287 lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_);
288
289 return r;
290}
291
292int pthread_mutex_unlock(pthread_mutex_t *mutex)
293{
294 int r;
295
296 try_init_preload();
297
298 lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_);
299 /*
300 * Just like taking a lock, only in reverse!
301 *
302 * If we fail releasing the lock, tell lockdep we're holding it again.
303 */
304 r = ll_pthread_mutex_unlock(mutex);
305 if (r)
306 lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_);
307
308 return r;
309}
310
311int pthread_mutex_destroy(pthread_mutex_t *mutex)
312{
313 try_init_preload();
314
315 /*
316 * Let's see if we're releasing a lock that's held.
317 *
318 * TODO: Hook into free() and add that check there as well.
319 */
320 debug_check_no_locks_freed(mutex, mutex + sizeof(*mutex));
321 __del_lock(__get_lock(mutex));
322 return ll_pthread_mutex_destroy(mutex);
323}
324
325/* This is the rwlock part, very similar to what happened with mutex above */
326int pthread_rwlock_init(pthread_rwlock_t *rwlock,
327 const pthread_rwlockattr_t *attr)
328{
329 int r;
330
331 try_init_preload();
332
333 r = ll_pthread_rwlock_init(rwlock, attr);
334 if (r == 0)
335 __get_lock(rwlock);
336
337 return r;
338}
339
340int pthread_rwlock_destroy(pthread_rwlock_t *rwlock)
341{
342 try_init_preload();
343
344 debug_check_no_locks_freed(rwlock, rwlock + sizeof(*rwlock));
345 __del_lock(__get_lock(rwlock));
346 return ll_pthread_rwlock_destroy(rwlock);
347}
348
349int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock)
350{
351 int r;
352
353 init_preload();
354
355 lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 2, NULL, (unsigned long)_RET_IP_);
356 r = ll_pthread_rwlock_rdlock(rwlock);
357 if (r)
358 lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
359
360 return r;
361}
362
363int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock)
364{
365 int r;
366
367 init_preload();
368
369 lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 2, NULL, (unsigned long)_RET_IP_);
370 r = ll_pthread_rwlock_tryrdlock(rwlock);
371 if (r)
372 lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
373
374 return r;
375}
376
377int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock)
378{
379 int r;
380
381 init_preload();
382
383 lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_);
384 r = ll_pthread_rwlock_trywrlock(rwlock);
385 if (r)
386 lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
387
388 return r;
389}
390
391int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock)
392{
393 int r;
394
395 init_preload();
396
397 lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_);
398 r = ll_pthread_rwlock_wrlock(rwlock);
399 if (r)
400 lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
401
402 return r;
403}
404
405int pthread_rwlock_unlock(pthread_rwlock_t *rwlock)
406{
407 int r;
408
409 init_preload();
410
411 lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
412 r = ll_pthread_rwlock_unlock(rwlock);
413 if (r)
414 lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_);
415
416 return r;
417}
418
419__attribute__((constructor)) static void init_preload(void)
420{
421 if (__init_state != done)
422 return;
423
424#ifndef __GLIBC__
425 __init_state = prepare;
426
427 ll_pthread_mutex_init = dlsym(RTLD_NEXT, "pthread_mutex_init");
428 ll_pthread_mutex_lock = dlsym(RTLD_NEXT, "pthread_mutex_lock");
429 ll_pthread_mutex_trylock = dlsym(RTLD_NEXT, "pthread_mutex_trylock");
430 ll_pthread_mutex_unlock = dlsym(RTLD_NEXT, "pthread_mutex_unlock");
431 ll_pthread_mutex_destroy = dlsym(RTLD_NEXT, "pthread_mutex_destroy");
432
433 ll_pthread_rwlock_init = dlsym(RTLD_NEXT, "pthread_rwlock_init");
434 ll_pthread_rwlock_destroy = dlsym(RTLD_NEXT, "pthread_rwlock_destroy");
435 ll_pthread_rwlock_rdlock = dlsym(RTLD_NEXT, "pthread_rwlock_rdlock");
436 ll_pthread_rwlock_tryrdlock = dlsym(RTLD_NEXT, "pthread_rwlock_tryrdlock");
437 ll_pthread_rwlock_wrlock = dlsym(RTLD_NEXT, "pthread_rwlock_wrlock");
438 ll_pthread_rwlock_trywrlock = dlsym(RTLD_NEXT, "pthread_rwlock_trywrlock");
439 ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock");
440#endif
441
442 printf("%p\n", ll_pthread_mutex_trylock);fflush(stdout);
443
444 lockdep_init();
445
446 __init_state = done;
447}
diff --git a/tools/lib/lockdep/rbtree.c b/tools/lib/lockdep/rbtree.c
new file mode 100644
index 000000000000..f7f43033c8b7
--- /dev/null
+++ b/tools/lib/lockdep/rbtree.c
@@ -0,0 +1 @@
#include "../../../lib/rbtree.c"
diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
new file mode 100644
index 000000000000..5334ad9d39b7
--- /dev/null
+++ b/tools/lib/lockdep/run_tests.sh
@@ -0,0 +1,27 @@
1#! /bin/bash
2
3make &> /dev/null
4
5for i in `ls tests/*.c`; do
6 testname=$(basename -s .c "$i")
7 gcc -o tests/$testname -pthread -lpthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null
8 echo -ne "$testname... "
9 if [ $(timeout 1 ./tests/$testname | wc -l) -gt 0 ]; then
10 echo "PASSED!"
11 else
12 echo "FAILED!"
13 fi
14 rm tests/$testname
15done
16
17for i in `ls tests/*.c`; do
18 testname=$(basename -s .c "$i")
19 gcc -o tests/$testname -pthread -lpthread -Iinclude $i &> /dev/null
20 echo -ne "(PRELOAD) $testname... "
21 if [ $(timeout 1 ./lockdep ./tests/$testname | wc -l) -gt 0 ]; then
22 echo "PASSED!"
23 else
24 echo "FAILED!"
25 fi
26 rm tests/$testname
27done
diff --git a/tools/lib/lockdep/tests/AA.c b/tools/lib/lockdep/tests/AA.c
new file mode 100644
index 000000000000..0f782ff404ac
--- /dev/null
+++ b/tools/lib/lockdep/tests/AA.c
@@ -0,0 +1,13 @@
1#include <liblockdep/mutex.h>
2
3void main(void)
4{
5 pthread_mutex_t a, b;
6
7 pthread_mutex_init(&a, NULL);
8 pthread_mutex_init(&b, NULL);
9
10 pthread_mutex_lock(&a);
11 pthread_mutex_lock(&b);
12 pthread_mutex_lock(&a);
13}
diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c
new file mode 100644
index 000000000000..07f0e29d5485
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA.c
@@ -0,0 +1,13 @@
1#include <liblockdep/mutex.h>
2#include "common.h"
3
4void main(void)
5{
6 pthread_mutex_t a, b;
7
8 pthread_mutex_init(&a, NULL);
9 pthread_mutex_init(&b, NULL);
10
11 LOCK_UNLOCK_2(a, b);
12 LOCK_UNLOCK_2(b, a);
13}
diff --git a/tools/lib/lockdep/tests/ABBCCA.c b/tools/lib/lockdep/tests/ABBCCA.c
new file mode 100644
index 000000000000..843db09ac666
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCA.c
@@ -0,0 +1,15 @@
1#include <liblockdep/mutex.h>
2#include "common.h"
3
4void main(void)
5{
6 pthread_mutex_t a, b, c;
7
8 pthread_mutex_init(&a, NULL);
9 pthread_mutex_init(&b, NULL);
10 pthread_mutex_init(&c, NULL);
11
12 LOCK_UNLOCK_2(a, b);
13 LOCK_UNLOCK_2(b, c);
14 LOCK_UNLOCK_2(c, a);
15}
diff --git a/tools/lib/lockdep/tests/ABBCCDDA.c b/tools/lib/lockdep/tests/ABBCCDDA.c
new file mode 100644
index 000000000000..33620e268f85
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCDDA.c
@@ -0,0 +1,17 @@
1#include <liblockdep/mutex.h>
2#include "common.h"
3
4void main(void)
5{
6 pthread_mutex_t a, b, c, d;
7
8 pthread_mutex_init(&a, NULL);
9 pthread_mutex_init(&b, NULL);
10 pthread_mutex_init(&c, NULL);
11 pthread_mutex_init(&d, NULL);
12
13 LOCK_UNLOCK_2(a, b);
14 LOCK_UNLOCK_2(b, c);
15 LOCK_UNLOCK_2(c, d);
16 LOCK_UNLOCK_2(d, a);
17}
diff --git a/tools/lib/lockdep/tests/ABCABC.c b/tools/lib/lockdep/tests/ABCABC.c
new file mode 100644
index 000000000000..3fee51e3a68a
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCABC.c
@@ -0,0 +1,15 @@
1#include <liblockdep/mutex.h>
2#include "common.h"
3
4void main(void)
5{
6 pthread_mutex_t a, b, c;
7
8 pthread_mutex_init(&a, NULL);
9 pthread_mutex_init(&b, NULL);
10 pthread_mutex_init(&c, NULL);
11
12 LOCK_UNLOCK_2(a, b);
13 LOCK_UNLOCK_2(c, a);
14 LOCK_UNLOCK_2(b, c);
15}
diff --git a/tools/lib/lockdep/tests/ABCDBCDA.c b/tools/lib/lockdep/tests/ABCDBCDA.c
new file mode 100644
index 000000000000..427ba562c75b
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCDBCDA.c
@@ -0,0 +1,17 @@
1#include <liblockdep/mutex.h>
2#include "common.h"
3
4void main(void)
5{
6 pthread_mutex_t a, b, c, d;
7
8 pthread_mutex_init(&a, NULL);
9 pthread_mutex_init(&b, NULL);
10 pthread_mutex_init(&c, NULL);
11 pthread_mutex_init(&d, NULL);
12
13 LOCK_UNLOCK_2(a, b);
14 LOCK_UNLOCK_2(c, d);
15 LOCK_UNLOCK_2(b, c);
16 LOCK_UNLOCK_2(d, a);
17}
diff --git a/tools/lib/lockdep/tests/ABCDBDDA.c b/tools/lib/lockdep/tests/ABCDBDDA.c
new file mode 100644
index 000000000000..680c6cf3e919
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCDBDDA.c
@@ -0,0 +1,17 @@
1#include <liblockdep/mutex.h>
2#include "common.h"
3
4void main(void)
5{
6 pthread_mutex_t a, b, c, d;
7
8 pthread_mutex_init(&a, NULL);
9 pthread_mutex_init(&b, NULL);
10 pthread_mutex_init(&c, NULL);
11 pthread_mutex_init(&d, NULL);
12
13 LOCK_UNLOCK_2(a, b);
14 LOCK_UNLOCK_2(c, d);
15 LOCK_UNLOCK_2(b, d);
16 LOCK_UNLOCK_2(d, a);
17}
diff --git a/tools/lib/lockdep/tests/WW.c b/tools/lib/lockdep/tests/WW.c
new file mode 100644
index 000000000000..d44f77d71029
--- /dev/null
+++ b/tools/lib/lockdep/tests/WW.c
@@ -0,0 +1,13 @@
1#include <liblockdep/rwlock.h>
2
3void main(void)
4{
5 pthread_rwlock_t a, b;
6
7 pthread_rwlock_init(&a, NULL);
8 pthread_rwlock_init(&b, NULL);
9
10 pthread_rwlock_wrlock(&a);
11 pthread_rwlock_rdlock(&b);
12 pthread_rwlock_wrlock(&a);
13}
diff --git a/tools/lib/lockdep/tests/common.h b/tools/lib/lockdep/tests/common.h
new file mode 100644
index 000000000000..d89e94d47d86
--- /dev/null
+++ b/tools/lib/lockdep/tests/common.h
@@ -0,0 +1,12 @@
1#ifndef _LIBLOCKDEP_TEST_COMMON_H
2#define _LIBLOCKDEP_TEST_COMMON_H
3
4#define LOCK_UNLOCK_2(a, b) \
5 do { \
6 pthread_mutex_lock(&(a)); \
7 pthread_mutex_lock(&(b)); \
8 pthread_mutex_unlock(&(b)); \
9 pthread_mutex_unlock(&(a)); \
10 } while(0)
11
12#endif
diff --git a/tools/lib/lockdep/tests/unlock_balance.c b/tools/lib/lockdep/tests/unlock_balance.c
new file mode 100644
index 000000000000..0bc62de686f7
--- /dev/null
+++ b/tools/lib/lockdep/tests/unlock_balance.c
@@ -0,0 +1,12 @@
1#include <liblockdep/mutex.h>
2
3void main(void)
4{
5 pthread_mutex_t a;
6
7 pthread_mutex_init(&a, NULL);
8
9 pthread_mutex_lock(&a);
10 pthread_mutex_unlock(&a);
11 pthread_mutex_unlock(&a);
12}
diff --git a/tools/lib/lockdep/uinclude/asm/hweight.h b/tools/lib/lockdep/uinclude/asm/hweight.h
new file mode 100644
index 000000000000..fab00ff936d1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/asm/hweight.h
@@ -0,0 +1,3 @@
1
2/* empty file */
3
diff --git a/tools/lib/lockdep/uinclude/asm/sections.h b/tools/lib/lockdep/uinclude/asm/sections.h
new file mode 100644
index 000000000000..fab00ff936d1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/asm/sections.h
@@ -0,0 +1,3 @@
1
2/* empty file */
3
diff --git a/tools/lib/lockdep/uinclude/linux/bitops.h b/tools/lib/lockdep/uinclude/linux/bitops.h
new file mode 100644
index 000000000000..fab00ff936d1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/bitops.h
@@ -0,0 +1,3 @@
1
2/* empty file */
3
diff --git a/tools/lib/lockdep/uinclude/linux/compiler.h b/tools/lib/lockdep/uinclude/linux/compiler.h
new file mode 100644
index 000000000000..7ac838a1f196
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/compiler.h
@@ -0,0 +1,7 @@
1#ifndef _LIBLOCKDEP_LINUX_COMPILER_H_
2#define _LIBLOCKDEP_LINUX_COMPILER_H_
3
4#define __used __attribute__((__unused__))
5#define unlikely
6
7#endif
diff --git a/tools/lib/lockdep/uinclude/linux/debug_locks.h b/tools/lib/lockdep/uinclude/linux/debug_locks.h
new file mode 100644
index 000000000000..f38eb64df794
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/debug_locks.h
@@ -0,0 +1,12 @@
1#ifndef _LIBLOCKDEP_DEBUG_LOCKS_H_
2#define _LIBLOCKDEP_DEBUG_LOCKS_H_
3
4#include <stddef.h>
5#include <linux/compiler.h>
6
7#define DEBUG_LOCKS_WARN_ON(x) (x)
8
9extern bool debug_locks;
10extern bool debug_locks_silent;
11
12#endif
diff --git a/tools/lib/lockdep/uinclude/linux/delay.h b/tools/lib/lockdep/uinclude/linux/delay.h
new file mode 100644
index 000000000000..fab00ff936d1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/delay.h
@@ -0,0 +1,3 @@
1
2/* empty file */
3
diff --git a/tools/lib/lockdep/uinclude/linux/export.h b/tools/lib/lockdep/uinclude/linux/export.h
new file mode 100644
index 000000000000..6bdf3492c535
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/export.h
@@ -0,0 +1,7 @@
1#ifndef _LIBLOCKDEP_LINUX_EXPORT_H_
2#define _LIBLOCKDEP_LINUX_EXPORT_H_
3
4#define EXPORT_SYMBOL(sym)
5#define EXPORT_SYMBOL_GPL(sym)
6
7#endif
diff --git a/tools/lib/lockdep/uinclude/linux/ftrace.h b/tools/lib/lockdep/uinclude/linux/ftrace.h
new file mode 100644
index 000000000000..fab00ff936d1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/ftrace.h
@@ -0,0 +1,3 @@
1
2/* empty file */
3
diff --git a/tools/lib/lockdep/uinclude/linux/gfp.h b/tools/lib/lockdep/uinclude/linux/gfp.h
new file mode 100644
index 000000000000..fab00ff936d1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/gfp.h
@@ -0,0 +1,3 @@
1
2/* empty file */
3
diff --git a/tools/lib/lockdep/uinclude/linux/hardirq.h b/tools/lib/lockdep/uinclude/linux/hardirq.h
new file mode 100644
index 000000000000..c8f3f8f58729
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/hardirq.h
@@ -0,0 +1,11 @@
1#ifndef _LIBLOCKDEP_LINUX_HARDIRQ_H_
2#define _LIBLOCKDEP_LINUX_HARDIRQ_H_
3
4#define SOFTIRQ_BITS 0UL
5#define HARDIRQ_BITS 0UL
6#define SOFTIRQ_SHIFT 0UL
7#define HARDIRQ_SHIFT 0UL
8#define hardirq_count() 0UL
9#define softirq_count() 0UL
10
11#endif
diff --git a/tools/lib/lockdep/uinclude/linux/hash.h b/tools/lib/lockdep/uinclude/linux/hash.h
new file mode 100644
index 000000000000..0f8479858dc0
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/hash.h
@@ -0,0 +1 @@
#include "../../../include/linux/hash.h"
diff --git a/tools/lib/lockdep/uinclude/linux/interrupt.h b/tools/lib/lockdep/uinclude/linux/interrupt.h
new file mode 100644
index 000000000000..fab00ff936d1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/interrupt.h
@@ -0,0 +1,3 @@
1
2/* empty file */
3
diff --git a/tools/lib/lockdep/uinclude/linux/irqflags.h b/tools/lib/lockdep/uinclude/linux/irqflags.h
new file mode 100644
index 000000000000..6cc296f0fad0
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/irqflags.h
@@ -0,0 +1,38 @@
1#ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
2#define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
3
4# define trace_hardirq_context(p) 0
5# define trace_softirq_context(p) 0
6# define trace_hardirqs_enabled(p) 0
7# define trace_softirqs_enabled(p) 0
8# define trace_hardirq_enter() do { } while (0)
9# define trace_hardirq_exit() do { } while (0)
10# define lockdep_softirq_enter() do { } while (0)
11# define lockdep_softirq_exit() do { } while (0)
12# define INIT_TRACE_IRQFLAGS
13
14# define stop_critical_timings() do { } while (0)
15# define start_critical_timings() do { } while (0)
16
17#define raw_local_irq_disable() do { } while (0)
18#define raw_local_irq_enable() do { } while (0)
19#define raw_local_irq_save(flags) ((flags) = 0)
20#define raw_local_irq_restore(flags) do { } while (0)
21#define raw_local_save_flags(flags) ((flags) = 0)
22#define raw_irqs_disabled_flags(flags) do { } while (0)
23#define raw_irqs_disabled() 0
24#define raw_safe_halt()
25
26#define local_irq_enable() do { } while (0)
27#define local_irq_disable() do { } while (0)
28#define local_irq_save(flags) ((flags) = 0)
29#define local_irq_restore(flags) do { } while (0)
30#define local_save_flags(flags) ((flags) = 0)
31#define irqs_disabled() (1)
32#define irqs_disabled_flags(flags) (0)
33#define safe_halt() do { } while (0)
34
35#define trace_lock_release(x, y)
36#define trace_lock_acquire(a, b, c, d, e, f, g)
37
38#endif
diff --git a/tools/lib/lockdep/uinclude/linux/kallsyms.h b/tools/lib/lockdep/uinclude/linux/kallsyms.h
new file mode 100644
index 000000000000..b0f2dbdf1a15
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/kallsyms.h
@@ -0,0 +1,32 @@
1#ifndef _LIBLOCKDEP_LINUX_KALLSYMS_H_
2#define _LIBLOCKDEP_LINUX_KALLSYMS_H_
3
4#include <linux/kernel.h>
5#include <stdio.h>
6
7#define KSYM_NAME_LEN 128
8
9struct module;
10
11static inline const char *kallsyms_lookup(unsigned long addr,
12 unsigned long *symbolsize,
13 unsigned long *offset,
14 char **modname, char *namebuf)
15{
16 return NULL;
17}
18
19#include <execinfo.h>
20#include <stdlib.h>
21static inline void print_ip_sym(unsigned long ip)
22{
23 char **name;
24
25 name = backtrace_symbols((void **)&ip, 1);
26
27 printf("%s\n", *name);
28
29 free(name);
30}
31
32#endif
diff --git a/tools/lib/lockdep/uinclude/linux/kern_levels.h b/tools/lib/lockdep/uinclude/linux/kern_levels.h
new file mode 100644
index 000000000000..3b9bade28698
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/kern_levels.h
@@ -0,0 +1,25 @@
1#ifndef __KERN_LEVELS_H__
2#define __KERN_LEVELS_H__
3
4#define KERN_SOH "" /* ASCII Start Of Header */
5#define KERN_SOH_ASCII ''
6
7#define KERN_EMERG KERN_SOH "" /* system is unusable */
8#define KERN_ALERT KERN_SOH "" /* action must be taken immediately */
9#define KERN_CRIT KERN_SOH "" /* critical conditions */
10#define KERN_ERR KERN_SOH "" /* error conditions */
11#define KERN_WARNING KERN_SOH "" /* warning conditions */
12#define KERN_NOTICE KERN_SOH "" /* normal but significant condition */
13#define KERN_INFO KERN_SOH "" /* informational */
14#define KERN_DEBUG KERN_SOH "" /* debug-level messages */
15
16#define KERN_DEFAULT KERN_SOH "" /* the default kernel loglevel */
17
18/*
19 * Annotation for a "continued" line of log printout (only done after a
20 * line that had no enclosing \n). Only to be used by core/arch code
21 * during early bootup (a continued line is not SMP-safe otherwise).
22 */
23#define KERN_CONT ""
24
25#endif
diff --git a/tools/lib/lockdep/uinclude/linux/kernel.h b/tools/lib/lockdep/uinclude/linux/kernel.h
new file mode 100644
index 000000000000..a11e3c357be7
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/kernel.h
@@ -0,0 +1,44 @@
1#ifndef _LIBLOCKDEP_LINUX_KERNEL_H_
2#define _LIBLOCKDEP_LINUX_KERNEL_H_
3
4#include <linux/export.h>
5#include <linux/types.h>
6#include <linux/rcu.h>
7#include <linux/hardirq.h>
8#include <linux/kern_levels.h>
9
10#ifndef container_of
11#define container_of(ptr, type, member) ({ \
12 const typeof(((type *)0)->member) * __mptr = (ptr); \
13 (type *)((char *)__mptr - offsetof(type, member)); })
14#endif
15
16#define max(x, y) ({ \
17 typeof(x) _max1 = (x); \
18 typeof(y) _max2 = (y); \
19 (void) (&_max1 == &_max2); \
20 _max1 > _max2 ? _max1 : _max2; })
21
22#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
23#define WARN_ON(x) (x)
24#define WARN_ON_ONCE(x) (x)
25#define likely(x) (x)
26#define WARN(x, y, z) (x)
27#define uninitialized_var(x) x
28#define __init
29#define noinline
30#define list_add_tail_rcu list_add_tail
31
32#ifndef CALLER_ADDR0
33#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
34#endif
35
36#ifndef _RET_IP_
37#define _RET_IP_ CALLER_ADDR0
38#endif
39
40#ifndef _THIS_IP_
41#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
42#endif
43
44#endif
diff --git a/tools/lib/lockdep/uinclude/linux/kmemcheck.h b/tools/lib/lockdep/uinclude/linux/kmemcheck.h
new file mode 100644
index 000000000000..94d598bc6abe
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/kmemcheck.h
@@ -0,0 +1,8 @@
1#ifndef _LIBLOCKDEP_LINUX_KMEMCHECK_H_
2#define _LIBLOCKDEP_LINUX_KMEMCHECK_H_
3
4static inline void kmemcheck_mark_initialized(void *address, unsigned int n)
5{
6}
7
8#endif
diff --git a/tools/lib/lockdep/uinclude/linux/linkage.h b/tools/lib/lockdep/uinclude/linux/linkage.h
new file mode 100644
index 000000000000..fab00ff936d1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/linkage.h
@@ -0,0 +1,3 @@
1
2/* empty file */
3
diff --git a/tools/lib/lockdep/uinclude/linux/list.h b/tools/lib/lockdep/uinclude/linux/list.h
new file mode 100644
index 000000000000..6e9ef31ed82e
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/list.h
@@ -0,0 +1 @@
#include "../../../include/linux/list.h"
diff --git a/tools/lib/lockdep/uinclude/linux/lockdep.h b/tools/lib/lockdep/uinclude/linux/lockdep.h
new file mode 100644
index 000000000000..d0f5d6e50214
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/lockdep.h
@@ -0,0 +1,55 @@
1#ifndef _LIBLOCKDEP_LOCKDEP_H_
2#define _LIBLOCKDEP_LOCKDEP_H_
3
4#include <sys/prctl.h>
5#include <sys/syscall.h>
6#include <string.h>
7#include <limits.h>
8#include <linux/utsname.h>
9
10
11#define MAX_LOCK_DEPTH 2000UL
12
13#include "../../../include/linux/lockdep.h"
14
15struct task_struct {
16 u64 curr_chain_key;
17 int lockdep_depth;
18 unsigned int lockdep_recursion;
19 struct held_lock held_locks[MAX_LOCK_DEPTH];
20 gfp_t lockdep_reclaim_gfp;
21 int pid;
22 char comm[17];
23};
24
25extern struct task_struct *__curr(void);
26
27#define current (__curr())
28
29#define debug_locks_off() 1
30#define task_pid_nr(tsk) ((tsk)->pid)
31
32#define KSYM_NAME_LEN 128
33#define printk printf
34
35#define list_del_rcu list_del
36
37#define atomic_t unsigned long
38#define atomic_inc(x) ((*(x))++)
39
40static struct new_utsname *init_utsname(void)
41{
42 static struct new_utsname n = (struct new_utsname) {
43 .release = "liblockdep",
44 .version = LIBLOCKDEP_VERSION,
45 };
46
47 return &n;
48}
49
50#define print_tainted() ""
51#define static_obj(x) 1
52
53#define debug_show_all_locks()
54
55#endif
diff --git a/tools/lib/lockdep/uinclude/linux/module.h b/tools/lib/lockdep/uinclude/linux/module.h
new file mode 100644
index 000000000000..09c7a7be8ccc
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/module.h
@@ -0,0 +1,6 @@
1#ifndef _LIBLOCKDEP_LINUX_MODULE_H_
2#define _LIBLOCKDEP_LINUX_MODULE_H_
3
4#define module_param(name, type, perm)
5
6#endif
diff --git a/tools/lib/lockdep/uinclude/linux/mutex.h b/tools/lib/lockdep/uinclude/linux/mutex.h
new file mode 100644
index 000000000000..fab00ff936d1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/mutex.h
@@ -0,0 +1,3 @@
1
2/* empty file */
3
diff --git a/tools/lib/lockdep/uinclude/linux/poison.h b/tools/lib/lockdep/uinclude/linux/poison.h
new file mode 100644
index 000000000000..0c27bdf14233
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/poison.h
@@ -0,0 +1 @@
#include "../../../include/linux/poison.h"
diff --git a/tools/lib/lockdep/uinclude/linux/prefetch.h b/tools/lib/lockdep/uinclude/linux/prefetch.h
new file mode 100644
index 000000000000..d73fe6f850ac
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/prefetch.h
@@ -0,0 +1,6 @@
1#ifndef _LIBLOCKDEP_LINUX_PREFETCH_H_
2#define _LIBLOCKDEP_LINUX_PREFETCH_H
3
4static inline void prefetch(void *a __attribute__((unused))) { }
5
6#endif
diff --git a/tools/lib/lockdep/uinclude/linux/proc_fs.h b/tools/lib/lockdep/uinclude/linux/proc_fs.h
new file mode 100644
index 000000000000..fab00ff936d1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/proc_fs.h
@@ -0,0 +1,3 @@
1
2/* empty file */
3
diff --git a/tools/lib/lockdep/uinclude/linux/rbtree.h b/tools/lib/lockdep/uinclude/linux/rbtree.h
new file mode 100644
index 000000000000..965901db4862
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/rbtree.h
@@ -0,0 +1 @@
#include "../../../include/linux/rbtree.h"
diff --git a/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h b/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h
new file mode 100644
index 000000000000..c3759477379c
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h
@@ -0,0 +1,2 @@
1#define __always_inline
2#include "../../../include/linux/rbtree_augmented.h"
diff --git a/tools/lib/lockdep/uinclude/linux/rcu.h b/tools/lib/lockdep/uinclude/linux/rcu.h
new file mode 100644
index 000000000000..4c99fcb5da27
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/rcu.h
@@ -0,0 +1,16 @@
1#ifndef _LIBLOCKDEP_RCU_H_
2#define _LIBLOCKDEP_RCU_H_
3
4int rcu_scheduler_active;
5
6static inline int rcu_lockdep_current_cpu_online(void)
7{
8 return 1;
9}
10
11static inline int rcu_is_cpu_idle(void)
12{
13 return 1;
14}
15
16#endif
diff --git a/tools/lib/lockdep/uinclude/linux/seq_file.h b/tools/lib/lockdep/uinclude/linux/seq_file.h
new file mode 100644
index 000000000000..fab00ff936d1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/seq_file.h
@@ -0,0 +1,3 @@
1
2/* empty file */
3
diff --git a/tools/lib/lockdep/uinclude/linux/spinlock.h b/tools/lib/lockdep/uinclude/linux/spinlock.h
new file mode 100644
index 000000000000..68c1aa2bcba5
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/spinlock.h
@@ -0,0 +1,25 @@
1#ifndef _LIBLOCKDEP_SPINLOCK_H_
2#define _LIBLOCKDEP_SPINLOCK_H_
3
4#include <pthread.h>
5#include <stdbool.h>
6
7#define arch_spinlock_t pthread_mutex_t
8#define __ARCH_SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER
9
10static inline void arch_spin_lock(arch_spinlock_t *mutex)
11{
12 pthread_mutex_lock(mutex);
13}
14
15static inline void arch_spin_unlock(arch_spinlock_t *mutex)
16{
17 pthread_mutex_unlock(mutex);
18}
19
20static inline bool arch_spin_is_locked(arch_spinlock_t *mutex)
21{
22 return true;
23}
24
25#endif
diff --git a/tools/lib/lockdep/uinclude/linux/stacktrace.h b/tools/lib/lockdep/uinclude/linux/stacktrace.h
new file mode 100644
index 000000000000..39aecc6b19d1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/stacktrace.h
@@ -0,0 +1,32 @@
1#ifndef _LIBLOCKDEP_LINUX_STACKTRACE_H_
2#define _LIBLOCKDEP_LINUX_STACKTRACE_H_
3
4#include <execinfo.h>
5
6struct stack_trace {
7 unsigned int nr_entries, max_entries;
8 unsigned long *entries;
9 int skip;
10};
11
12static inline void print_stack_trace(struct stack_trace *trace, int spaces)
13{
14 backtrace_symbols_fd((void **)trace->entries, trace->nr_entries, 1);
15}
16
17#define save_stack_trace(trace) \
18 ((trace)->nr_entries = \
19 backtrace((void **)(trace)->entries, (trace)->max_entries))
20
21static inline int dump_stack(void)
22{
23 void *array[64];
24 size_t size;
25
26 size = backtrace(array, 64);
27 backtrace_symbols_fd(array, size, 1);
28
29 return 0;
30}
31
32#endif
diff --git a/tools/lib/lockdep/uinclude/linux/stringify.h b/tools/lib/lockdep/uinclude/linux/stringify.h
new file mode 100644
index 000000000000..05dfcd1ac118
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/stringify.h
@@ -0,0 +1,7 @@
1#ifndef _LIBLOCKDEP_LINUX_STRINGIFY_H_
2#define _LIBLOCKDEP_LINUX_STRINGIFY_H_
3
4#define __stringify_1(x...) #x
5#define __stringify(x...) __stringify_1(x)
6
7#endif
diff --git a/tools/lib/lockdep/uinclude/linux/types.h b/tools/lib/lockdep/uinclude/linux/types.h
new file mode 100644
index 000000000000..929938f426de
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/types.h
@@ -0,0 +1,58 @@
1#ifndef _LIBLOCKDEP_LINUX_TYPES_H_
2#define _LIBLOCKDEP_LINUX_TYPES_H_
3
4#include <stdbool.h>
5#include <stddef.h>
6
7#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
8#include <asm/types.h>
9
10struct page;
11struct kmem_cache;
12
13typedef unsigned gfp_t;
14
15typedef __u64 u64;
16typedef __s64 s64;
17
18typedef __u32 u32;
19typedef __s32 s32;
20
21typedef __u16 u16;
22typedef __s16 s16;
23
24typedef __u8 u8;
25typedef __s8 s8;
26
27#ifdef __CHECKER__
28#define __bitwise__ __attribute__((bitwise))
29#else
30#define __bitwise__
31#endif
32#ifdef __CHECK_ENDIAN__
33#define __bitwise __bitwise__
34#else
35#define __bitwise
36#endif
37
38
39typedef __u16 __bitwise __le16;
40typedef __u16 __bitwise __be16;
41typedef __u32 __bitwise __le32;
42typedef __u32 __bitwise __be32;
43typedef __u64 __bitwise __le64;
44typedef __u64 __bitwise __be64;
45
46struct list_head {
47 struct list_head *next, *prev;
48};
49
50struct hlist_head {
51 struct hlist_node *first;
52};
53
54struct hlist_node {
55 struct hlist_node *next, **pprev;
56};
57
58#endif
diff --git a/tools/lib/lockdep/uinclude/trace/events/lock.h b/tools/lib/lockdep/uinclude/trace/events/lock.h
new file mode 100644
index 000000000000..fab00ff936d1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/trace/events/lock.h
@@ -0,0 +1,3 @@
1
2/* empty file */
3