diff options
40 files changed, 875 insertions, 485 deletions
diff --git a/Documentation/core-api/atomic_ops.rst b/Documentation/core-api/atomic_ops.rst index fce929144ccd..2e7165f86f55 100644 --- a/Documentation/core-api/atomic_ops.rst +++ b/Documentation/core-api/atomic_ops.rst | |||
| @@ -111,7 +111,6 @@ If the compiler can prove that do_something() does not store to the | |||
| 111 | variable a, then the compiler is within its rights transforming this to | 111 | variable a, then the compiler is within its rights transforming this to |
| 112 | the following:: | 112 | the following:: |
| 113 | 113 | ||
| 114 | tmp = a; | ||
| 115 | if (a > 0) | 114 | if (a > 0) |
| 116 | for (;;) | 115 | for (;;) |
| 117 | do_something(); | 116 | do_something(); |
| @@ -119,7 +118,7 @@ the following:: | |||
| 119 | If you don't want the compiler to do this (and you probably don't), then | 118 | If you don't want the compiler to do this (and you probably don't), then |
| 120 | you should use something like the following:: | 119 | you should use something like the following:: |
| 121 | 120 | ||
| 122 | while (READ_ONCE(a) < 0) | 121 | while (READ_ONCE(a) > 0) |
| 123 | do_something(); | 122 | do_something(); |
| 124 | 123 | ||
| 125 | Alternatively, you could place a barrier() call in the loop. | 124 | Alternatively, you could place a barrier() call in the loop. |
| @@ -467,10 +466,12 @@ Like the above, except that these routines return a boolean which | |||
| 467 | indicates whether the changed bit was set _BEFORE_ the atomic bit | 466 | indicates whether the changed bit was set _BEFORE_ the atomic bit |
| 468 | operation. | 467 | operation. |
| 469 | 468 | ||
| 470 | WARNING! It is incredibly important that the value be a boolean, | 469 | |
| 471 | ie. "0" or "1". Do not try to be fancy and save a few instructions by | 470 | .. warning:: |
| 472 | declaring the above to return "long" and just returning something like | 471 | It is incredibly important that the value be a boolean, ie. "0" or "1". |
| 473 | "old_val & mask" because that will not work. | 472 | Do not try to be fancy and save a few instructions by declaring the |
| 473 | above to return "long" and just returning something like "old_val & | ||
| 474 | mask" because that will not work. | ||
| 474 | 475 | ||
| 475 | For one thing, this return value gets truncated to int in many code | 476 | For one thing, this return value gets truncated to int in many code |
| 476 | paths using these interfaces, so on 64-bit if the bit is set in the | 477 | paths using these interfaces, so on 64-bit if the bit is set in the |
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 33b8bc9573f8..a02d6bbfc9d0 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt | |||
| @@ -1920,9 +1920,6 @@ There are some more advanced barrier functions: | |||
| 1920 | /* assign ownership */ | 1920 | /* assign ownership */ |
| 1921 | desc->status = DEVICE_OWN; | 1921 | desc->status = DEVICE_OWN; |
| 1922 | 1922 | ||
| 1923 | /* force memory to sync before notifying device via MMIO */ | ||
| 1924 | wmb(); | ||
| 1925 | |||
| 1926 | /* notify device of new descriptors */ | 1923 | /* notify device of new descriptors */ |
| 1927 | writel(DESC_NOTIFY, doorbell); | 1924 | writel(DESC_NOTIFY, doorbell); |
| 1928 | } | 1925 | } |
| @@ -1930,11 +1927,15 @@ There are some more advanced barrier functions: | |||
| 1930 | The dma_rmb() allows us guarantee the device has released ownership | 1927 | The dma_rmb() allows us guarantee the device has released ownership |
| 1931 | before we read the data from the descriptor, and the dma_wmb() allows | 1928 | before we read the data from the descriptor, and the dma_wmb() allows |
| 1932 | us to guarantee the data is written to the descriptor before the device | 1929 | us to guarantee the data is written to the descriptor before the device |
| 1933 | can see it now has ownership. The wmb() is needed to guarantee that the | 1930 | can see it now has ownership. Note that, when using writel(), a prior |
| 1934 | cache coherent memory writes have completed before attempting a write to | 1931 | wmb() is not needed to guarantee that the cache coherent memory writes |
| 1935 | the cache incoherent MMIO region. | 1932 | have completed before writing to the MMIO region. The cheaper |
| 1936 | 1933 | writel_relaxed() does not provide this guarantee and must not be used | |
| 1937 | See Documentation/DMA-API.txt for more information on consistent memory. | 1934 | here. |
| 1935 | |||
| 1936 | See the subsection "Kernel I/O barrier effects" for more information on | ||
| 1937 | relaxed I/O accessors and the Documentation/DMA-API.txt file for more | ||
| 1938 | information on consistent memory. | ||
| 1938 | 1939 | ||
| 1939 | 1940 | ||
| 1940 | MMIO WRITE BARRIER | 1941 | MMIO WRITE BARRIER |
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt index 2ec5fe0c9cf4..921739d00f69 100644 --- a/Documentation/translations/ko_KR/memory-barriers.txt +++ b/Documentation/translations/ko_KR/memory-barriers.txt | |||
| @@ -36,6 +36,9 @@ Documentation/memory-barriers.txt | |||
| 36 | 부분도 있고, 의도하진 않았지만 사람에 의해 쓰였다보니 불완전한 부분도 있습니다. | 36 | 부분도 있고, 의도하진 않았지만 사람에 의해 쓰였다보니 불완전한 부분도 있습니다. |
| 37 | 이 문서는 리눅스에서 제공하는 다양한 메모리 배리어들을 사용하기 위한 | 37 | 이 문서는 리눅스에서 제공하는 다양한 메모리 배리어들을 사용하기 위한 |
| 38 | 안내서입니다만, 뭔가 이상하다 싶으면 (그런게 많을 겁니다) 질문을 부탁드립니다. | 38 | 안내서입니다만, 뭔가 이상하다 싶으면 (그런게 많을 겁니다) 질문을 부탁드립니다. |
| 39 | 일부 이상한 점들은 공식적인 메모리 일관성 모델과 tools/memory-model/ 에 있는 | ||
| 40 | 관련 문서를 참고해서 해결될 수 있을 겁니다. 그러나, 이 메모리 모델조차도 그 | ||
| 41 | 관리자들의 의견의 집합으로 봐야지, 절대 옳은 예언자로 신봉해선 안될 겁니다. | ||
| 39 | 42 | ||
| 40 | 다시 말하지만, 이 문서는 리눅스가 하드웨어에 기대하는 사항에 대한 명세서가 | 43 | 다시 말하지만, 이 문서는 리눅스가 하드웨어에 기대하는 사항에 대한 명세서가 |
| 41 | 아닙니다. | 44 | 아닙니다. |
| @@ -77,7 +80,7 @@ Documentation/memory-barriers.txt | |||
| 77 | 80 | ||
| 78 | - 메모리 배리어의 종류. | 81 | - 메모리 배리어의 종류. |
| 79 | - 메모리 배리어에 대해 가정해선 안될 것. | 82 | - 메모리 배리어에 대해 가정해선 안될 것. |
| 80 | - 데이터 의존성 배리어. | 83 | - 데이터 의존성 배리어 (역사적). |
| 81 | - 컨트롤 의존성. | 84 | - 컨트롤 의존성. |
| 82 | - SMP 배리어 짝맞추기. | 85 | - SMP 배리어 짝맞추기. |
| 83 | - 메모리 배리어 시퀀스의 예. | 86 | - 메모리 배리어 시퀀스의 예. |
| @@ -255,17 +258,20 @@ CPU 에게 기대할 수 있는 최소한의 보장사항 몇가지가 있습니 | |||
| 255 | (*) 어떤 CPU 든, 의존성이 존재하는 메모리 액세스들은 해당 CPU 자신에게 | 258 | (*) 어떤 CPU 든, 의존성이 존재하는 메모리 액세스들은 해당 CPU 자신에게 |
| 256 | 있어서는 순서대로 메모리 시스템에 수행 요청됩니다. 즉, 다음에 대해서: | 259 | 있어서는 순서대로 메모리 시스템에 수행 요청됩니다. 즉, 다음에 대해서: |
| 257 | 260 | ||
| 258 | Q = READ_ONCE(P); smp_read_barrier_depends(); D = READ_ONCE(*Q); | 261 | Q = READ_ONCE(P); D = READ_ONCE(*Q); |
| 259 | 262 | ||
| 260 | CPU 는 다음과 같은 메모리 오퍼레이션 시퀀스를 수행 요청합니다: | 263 | CPU 는 다음과 같은 메모리 오퍼레이션 시퀀스를 수행 요청합니다: |
| 261 | 264 | ||
| 262 | Q = LOAD P, D = LOAD *Q | 265 | Q = LOAD P, D = LOAD *Q |
| 263 | 266 | ||
| 264 | 그리고 그 시퀀스 내에서의 순서는 항상 지켜집니다. 대부분의 시스템에서 | 267 | 그리고 그 시퀀스 내에서의 순서는 항상 지켜집니다. 하지만, DEC Alpha 에서 |
| 265 | smp_read_barrier_depends() 는 아무일도 안하지만 DEC Alpha 에서는 | 268 | READ_ONCE() 는 메모리 배리어 명령도 내게 되어 있어서, DEC Alpha CPU 는 |
| 266 | 명시적으로 사용되어야 합니다. 보통의 경우에는 smp_read_barrier_depends() | 269 | 다음과 같은 메모리 오퍼레이션들을 내놓게 됩니다: |
| 267 | 를 직접 사용하는 대신 rcu_dereference() 같은 것들을 사용해야 함을 | 270 | |
| 268 | 알아두세요. | 271 | Q = LOAD P, MEMORY_BARRIER, D = LOAD *Q, MEMORY_BARRIER |
| 272 | |||
| 273 | DEC Alpha 에서 수행되든 아니든, READ_ONCE() 는 컴파일러로부터의 악영향 | ||
| 274 | 또한 제거합니다. | ||
| 269 | 275 | ||
| 270 | (*) 특정 CPU 내에서 겹치는 영역의 메모리에 행해지는 로드와 스토어 들은 해당 | 276 | (*) 특정 CPU 내에서 겹치는 영역의 메모리에 행해지는 로드와 스토어 들은 해당 |
| 271 | CPU 안에서는 순서가 바뀌지 않은 것으로 보여집니다. 즉, 다음에 대해서: | 277 | CPU 안에서는 순서가 바뀌지 않은 것으로 보여집니다. 즉, 다음에 대해서: |
| @@ -421,8 +427,8 @@ CPU 에게 기대할 수 있는 최소한의 보장사항 몇가지가 있습니 | |||
| 421 | 데이터 의존성 배리어는 읽기 배리어의 보다 완화된 형태입니다. 두개의 로드 | 427 | 데이터 의존성 배리어는 읽기 배리어의 보다 완화된 형태입니다. 두개의 로드 |
| 422 | 오퍼레이션이 있고 두번째 것이 첫번째 것의 결과에 의존하고 있을 때(예: | 428 | 오퍼레이션이 있고 두번째 것이 첫번째 것의 결과에 의존하고 있을 때(예: |
| 423 | 두번째 로드가 참조할 주소를 첫번째 로드가 읽는 경우), 두번째 로드가 읽어올 | 429 | 두번째 로드가 참조할 주소를 첫번째 로드가 읽는 경우), 두번째 로드가 읽어올 |
| 424 | 데이터는 첫번째 로드에 의해 그 주소가 얻어기 에 업데이트 음을 | 430 | 데이터는 첫번째 로드에 의해 그 주소가 얻어진 뒤에 업데이트 보하기 |
| 425 | 보하기 해서 데이터 의존성 배리어가 필요할 수 있습니다. | 431 | 위해서 데이터 의존성 배리어가 필요할 수 있습니다. |
| 426 | 432 | ||
| 427 | 데이터 의존성 배리어는 상호 의존적인 로드 오퍼레이션들 사이의 부분적 순서 | 433 | 데이터 의존성 배리어는 상호 의존적인 로드 오퍼레이션들 사이의 부분적 순서 |
| 428 | 세우기입니다; 스토어 오퍼레이션들이나 독립적인 로드들, 또는 중복되는 | 434 | 세우기입니다; 스토어 오퍼레이션들이나 독립적인 로드들, 또는 중복되는 |
| @@ -570,8 +576,14 @@ ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE | |||
| 570 | Documentation/DMA-API.txt | 576 | Documentation/DMA-API.txt |
| 571 | 577 | ||
| 572 | 578 | ||
| 573 | 데이터 의존성 배리어 | 579 | 데이터 의존성 배리어 (역사적) |
| 574 | -------------------- | 580 | ----------------------------- |
| 581 | |||
| 582 | 리눅스 커널 v4.15 기준으로, smp_read_barrier_depends() 가 READ_ONCE() 에 | ||
| 583 | 추가되었는데, 이는 이 섹션에 주의를 기울여야 하는 사람들은 DEC Alpha 아키텍쳐 | ||
| 584 | 전용 코드를 만드는 사람들과 READ_ONCE() 자체를 만드는 사람들 뿐임을 의미합니다. | ||
| 585 | 그런 분들을 위해, 그리고 역사에 관심 있는 분들을 위해, 여기 데이터 의존성 | ||
| 586 | 배리어에 대한 이야기를 적습니다. | ||
| 575 | 587 | ||
| 576 | 데이터 의존성 배리어의 사용에 있어 지켜야 하는 사항들은 약간 미묘하고, 데이터 | 588 | 데이터 의존성 배리어의 사용에 있어 지켜야 하는 사항들은 약간 미묘하고, 데이터 |
| 577 | 의존성 배리어가 사용되어야 하는 상황도 항상 명백하지는 않습니다. 설명을 위해 | 589 | 의존성 배리어가 사용되어야 하는 상황도 항상 명백하지는 않습니다. 설명을 위해 |
| @@ -1787,7 +1799,7 @@ CPU 메모리 배리어 | |||
| 1787 | 범용 mb() smp_mb() | 1799 | 범용 mb() smp_mb() |
| 1788 | 쓰기 wmb() smp_wmb() | 1800 | 쓰기 wmb() smp_wmb() |
| 1789 | 읽기 rmb() smp_rmb() | 1801 | 읽기 rmb() smp_rmb() |
| 1790 | 데이터 의존성 read_barrier_depends() smp_read_barrier_depends() | 1802 | 데이터 의존성 READ_ONCE() |
| 1791 | 1803 | ||
| 1792 | 1804 | ||
| 1793 | 데이터 의존성 배리어를 제외한 모든 메모리 배리어는 컴파일러 배리어를 | 1805 | 데이터 의존성 배리어를 제외한 모든 메모리 배리어는 컴파일러 배리어를 |
| @@ -2796,8 +2808,9 @@ CPU 2 는 C/D 를 갖습니다)가 병렬로 연결되어 있는 시스템을 | |||
| 2796 | 2808 | ||
| 2797 | 2809 | ||
| 2798 | 여기에 개입하기 위해선, 데이터 의존성 배리어나 읽기 배리어를 로드 오퍼레이션들 | 2810 | 여기에 개입하기 위해선, 데이터 의존성 배리어나 읽기 배리어를 로드 오퍼레이션들 |
| 2799 | 사이에 넣어야 합니다. 이렇게 함으로써 캐시가 다음 요청을 처리하기 전에 일관성 | 2811 | 사이에 넣어야 합니다 (v4.15 부터는 READ_ONCE() 매크로에 의해 무조건적으로 |
| 2800 | 큐를 처리하도록 강제하게 됩니다. | 2812 | 그렇게 됩니다). 이렇게 함으로써 캐시가 다음 요청을 처리하기 전에 일관성 큐를 |
| 2813 | 처리하도록 강제하게 됩니다. | ||
| 2801 | 2814 | ||
| 2802 | CPU 1 CPU 2 COMMENT | 2815 | CPU 1 CPU 2 COMMENT |
| 2803 | =============== =============== ======================================= | 2816 | =============== =============== ======================================= |
| @@ -2826,7 +2839,10 @@ CPU 2 는 C/D 를 갖습니다)가 병렬로 연결되어 있는 시스템을 | |||
| 2826 | 다른 CPU 들도 분할된 캐시를 가지고 있을 수 있지만, 그런 CPU 들은 평범한 메모리 | 2839 | 다른 CPU 들도 분할된 캐시를 가지고 있을 수 있지만, 그런 CPU 들은 평범한 메모리 |
| 2827 | 액세스를 위해서도 이 분할된 캐시들 사이의 조정을 해야만 합니다. Alpha 는 가장 | 2840 | 액세스를 위해서도 이 분할된 캐시들 사이의 조정을 해야만 합니다. Alpha 는 가장 |
| 2828 | 약한 메모리 순서 시맨틱 (semantic) 을 선택함으로써 메모리 배리어가 명시적으로 | 2841 | 약한 메모리 순서 시맨틱 (semantic) 을 선택함으로써 메모리 배리어가 명시적으로 |
| 2829 | 사용되지 않았을 때에는 그런 조정이 필요하지 않게 했습니다. | 2842 | 사용되지 않았을 때에는 그런 조정이 필요하지 않게 했으며, 이는 Alpha 가 당시에 |
| 2843 | 더 높은 CPU 클락 속도를 가질 수 있게 했습니다. 하지만, (다시 말하건대, v4.15 | ||
| 2844 | 이후부터는) Alpha 아키텍쳐 전용 코드와 READ_ONCE() 매크로 내부에서를 제외하고는 | ||
| 2845 | smp_read_barrier_depends() 가 사용되지 않아야 함을 알아두시기 바랍니다. | ||
| 2830 | 2846 | ||
| 2831 | 2847 | ||
| 2832 | 캐시 일관성 VS DMA | 2848 | 캐시 일관성 VS DMA |
| @@ -2988,7 +3004,9 @@ Alpha CPU 의 일부 버전은 분할된 데이터 캐시를 가지고 있어서 | |||
| 2988 | 메모리 일관성 시스템과 함께 두개의 캐시를 동기화 시켜서, 포인터 변경과 새로운 | 3004 | 메모리 일관성 시스템과 함께 두개의 캐시를 동기화 시켜서, 포인터 변경과 새로운 |
| 2989 | 데이터의 발견을 올바른 순서로 일어나게 하기 때문입니다. | 3005 | 데이터의 발견을 올바른 순서로 일어나게 하기 때문입니다. |
| 2990 | 3006 | ||
| 2991 | 리눅스 커널의 메모리 배리어 모델은 Alpha 에 기초해서 정의되었습니다. | 3007 | 리눅스 커널의 메모리 배리어 모델은 Alpha 에 기초해서 정의되었습니다만, v4.15 |
| 3008 | 부터는 리눅스 커널이 READ_ONCE() 내에 smp_read_barrier_depends() 를 추가해서 | ||
| 3009 | Alpha 의 메모리 모델로의 영향력이 크게 줄어들긴 했습니다. | ||
| 2992 | 3010 | ||
| 2993 | 위의 "캐시 일관성" 서브섹션을 참고하세요. | 3011 | 위의 "캐시 일관성" 서브섹션을 참고하세요. |
| 2994 | 3012 | ||
diff --git a/MAINTAINERS b/MAINTAINERS index fdf15f3de473..aa635837a6af 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -8212,7 +8212,7 @@ F: drivers/misc/lkdtm/* | |||
| 8212 | 8212 | ||
| 8213 | LINUX KERNEL MEMORY CONSISTENCY MODEL (LKMM) | 8213 | LINUX KERNEL MEMORY CONSISTENCY MODEL (LKMM) |
| 8214 | M: Alan Stern <stern@rowland.harvard.edu> | 8214 | M: Alan Stern <stern@rowland.harvard.edu> |
| 8215 | M: Andrea Parri <parri.andrea@gmail.com> | 8215 | M: Andrea Parri <andrea.parri@amarulasolutions.com> |
| 8216 | M: Will Deacon <will.deacon@arm.com> | 8216 | M: Will Deacon <will.deacon@arm.com> |
| 8217 | M: Peter Zijlstra <peterz@infradead.org> | 8217 | M: Peter Zijlstra <peterz@infradead.org> |
| 8218 | M: Boqun Feng <boqun.feng@gmail.com> | 8218 | M: Boqun Feng <boqun.feng@gmail.com> |
| @@ -8319,6 +8319,7 @@ F: Documentation/admin-guide/LSM/LoadPin.rst | |||
| 8319 | LOCKING PRIMITIVES | 8319 | LOCKING PRIMITIVES |
| 8320 | M: Peter Zijlstra <peterz@infradead.org> | 8320 | M: Peter Zijlstra <peterz@infradead.org> |
| 8321 | M: Ingo Molnar <mingo@redhat.com> | 8321 | M: Ingo Molnar <mingo@redhat.com> |
| 8322 | M: Will Deacon <will.deacon@arm.com> | ||
| 8322 | L: linux-kernel@vger.kernel.org | 8323 | L: linux-kernel@vger.kernel.org |
| 8323 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core | 8324 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core |
| 8324 | S: Maintained | 8325 | S: Maintained |
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index ebdae15d665d..26c5bd7d88d8 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h | |||
| @@ -122,11 +122,6 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) | |||
| 122 | 122 | ||
| 123 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) | 123 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) |
| 124 | { | 124 | { |
| 125 | /* | ||
| 126 | * Ensure prior spin_lock operations to other locks have completed | ||
| 127 | * on this CPU before we test whether "lock" is locked. | ||
| 128 | */ | ||
| 129 | smp_mb(); /* ^^^ */ | ||
| 130 | return !arch_spin_value_unlocked(READ_ONCE(*lock)); | 125 | return !arch_spin_value_unlocked(READ_ONCE(*lock)); |
| 131 | } | 126 | } |
| 132 | 127 | ||
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 5e16b5d40d32..3e70bed8a978 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h | |||
| @@ -7,6 +7,14 @@ | |||
| 7 | #include <asm-generic/qspinlock_types.h> | 7 | #include <asm-generic/qspinlock_types.h> |
| 8 | #include <asm/paravirt.h> | 8 | #include <asm/paravirt.h> |
| 9 | 9 | ||
| 10 | #define _Q_PENDING_LOOPS (1 << 9) | ||
| 11 | |||
| 12 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
| 13 | extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); | ||
| 14 | extern void __pv_init_lock_hash(void); | ||
| 15 | extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); | ||
| 16 | extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); | ||
| 17 | |||
| 10 | #define queued_spin_unlock queued_spin_unlock | 18 | #define queued_spin_unlock queued_spin_unlock |
| 11 | /** | 19 | /** |
| 12 | * queued_spin_unlock - release a queued spinlock | 20 | * queued_spin_unlock - release a queued spinlock |
| @@ -16,15 +24,9 @@ | |||
| 16 | */ | 24 | */ |
| 17 | static inline void native_queued_spin_unlock(struct qspinlock *lock) | 25 | static inline void native_queued_spin_unlock(struct qspinlock *lock) |
| 18 | { | 26 | { |
| 19 | smp_store_release((u8 *)lock, 0); | 27 | smp_store_release(&lock->locked, 0); |
| 20 | } | 28 | } |
| 21 | 29 | ||
| 22 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
| 23 | extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); | ||
| 24 | extern void __pv_init_lock_hash(void); | ||
| 25 | extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); | ||
| 26 | extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); | ||
| 27 | |||
| 28 | static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | 30 | static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) |
| 29 | { | 31 | { |
| 30 | pv_queued_spin_lock_slowpath(lock, val); | 32 | pv_queued_spin_lock_slowpath(lock, val); |
| @@ -40,11 +42,6 @@ static inline bool vcpu_is_preempted(long cpu) | |||
| 40 | { | 42 | { |
| 41 | return pv_vcpu_is_preempted(cpu); | 43 | return pv_vcpu_is_preempted(cpu); |
| 42 | } | 44 | } |
| 43 | #else | ||
| 44 | static inline void queued_spin_unlock(struct qspinlock *lock) | ||
| 45 | { | ||
| 46 | native_queued_spin_unlock(lock); | ||
| 47 | } | ||
| 48 | #endif | 45 | #endif |
| 49 | 46 | ||
| 50 | #ifdef CONFIG_PARAVIRT | 47 | #ifdef CONFIG_PARAVIRT |
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 923307ea11c7..9ef5ee03d2d7 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h | |||
| @@ -22,8 +22,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); | |||
| 22 | * | 22 | * |
| 23 | * void __pv_queued_spin_unlock(struct qspinlock *lock) | 23 | * void __pv_queued_spin_unlock(struct qspinlock *lock) |
| 24 | * { | 24 | * { |
| 25 | * struct __qspinlock *l = (void *)lock; | 25 | * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); |
| 26 | * u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); | ||
| 27 | * | 26 | * |
| 28 | * if (likely(lockval == _Q_LOCKED_VAL)) | 27 | * if (likely(lockval == _Q_LOCKED_VAL)) |
| 29 | * return; | 28 | * return; |
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h index 34a028a7bcc5..87d14476edc2 100644 --- a/include/asm-generic/atomic-long.h +++ b/include/asm-generic/atomic-long.h | |||
| @@ -25,6 +25,7 @@ typedef atomic64_t atomic_long_t; | |||
| 25 | 25 | ||
| 26 | #define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i) | 26 | #define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i) |
| 27 | #define ATOMIC_LONG_PFX(x) atomic64 ## x | 27 | #define ATOMIC_LONG_PFX(x) atomic64 ## x |
| 28 | #define ATOMIC_LONG_TYPE s64 | ||
| 28 | 29 | ||
| 29 | #else | 30 | #else |
| 30 | 31 | ||
| @@ -32,6 +33,7 @@ typedef atomic_t atomic_long_t; | |||
| 32 | 33 | ||
| 33 | #define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i) | 34 | #define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i) |
| 34 | #define ATOMIC_LONG_PFX(x) atomic ## x | 35 | #define ATOMIC_LONG_PFX(x) atomic ## x |
| 36 | #define ATOMIC_LONG_TYPE int | ||
| 35 | 37 | ||
| 36 | #endif | 38 | #endif |
| 37 | 39 | ||
| @@ -90,6 +92,21 @@ ATOMIC_LONG_ADD_SUB_OP(sub, _release) | |||
| 90 | #define atomic_long_cmpxchg(l, old, new) \ | 92 | #define atomic_long_cmpxchg(l, old, new) \ |
| 91 | (ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new))) | 93 | (ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new))) |
| 92 | 94 | ||
| 95 | |||
| 96 | #define atomic_long_try_cmpxchg_relaxed(l, old, new) \ | ||
| 97 | (ATOMIC_LONG_PFX(_try_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \ | ||
| 98 | (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new))) | ||
| 99 | #define atomic_long_try_cmpxchg_acquire(l, old, new) \ | ||
| 100 | (ATOMIC_LONG_PFX(_try_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \ | ||
| 101 | (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new))) | ||
| 102 | #define atomic_long_try_cmpxchg_release(l, old, new) \ | ||
| 103 | (ATOMIC_LONG_PFX(_try_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \ | ||
| 104 | (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new))) | ||
| 105 | #define atomic_long_try_cmpxchg(l, old, new) \ | ||
| 106 | (ATOMIC_LONG_PFX(_try_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), \ | ||
| 107 | (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new))) | ||
| 108 | |||
| 109 | |||
| 93 | #define atomic_long_xchg_relaxed(v, new) \ | 110 | #define atomic_long_xchg_relaxed(v, new) \ |
| 94 | (ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new))) | 111 | (ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new))) |
| 95 | #define atomic_long_xchg_acquire(v, new) \ | 112 | #define atomic_long_xchg_acquire(v, new) \ |
| @@ -244,6 +261,8 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) | |||
| 244 | #define atomic_long_inc_not_zero(l) \ | 261 | #define atomic_long_inc_not_zero(l) \ |
| 245 | ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l)) | 262 | ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l)) |
| 246 | 263 | ||
| 264 | #define atomic_long_cond_read_relaxed(v, c) \ | ||
| 265 | ATOMIC_LONG_PFX(_cond_read_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (c)) | ||
| 247 | #define atomic_long_cond_read_acquire(v, c) \ | 266 | #define atomic_long_cond_read_acquire(v, c) \ |
| 248 | ATOMIC_LONG_PFX(_cond_read_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (c)) | 267 | ATOMIC_LONG_PFX(_cond_read_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (c)) |
| 249 | 268 | ||
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 29458bbb2fa0..2cafdbb9ae4c 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h | |||
| @@ -221,18 +221,17 @@ do { \ | |||
| 221 | #endif | 221 | #endif |
| 222 | 222 | ||
| 223 | /** | 223 | /** |
| 224 | * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering | 224 | * smp_cond_load_relaxed() - (Spin) wait for cond with no ordering guarantees |
| 225 | * @ptr: pointer to the variable to wait on | 225 | * @ptr: pointer to the variable to wait on |
| 226 | * @cond: boolean expression to wait for | 226 | * @cond: boolean expression to wait for |
| 227 | * | 227 | * |
| 228 | * Equivalent to using smp_load_acquire() on the condition variable but employs | 228 | * Equivalent to using READ_ONCE() on the condition variable. |
| 229 | * the control dependency of the wait to reduce the barrier on many platforms. | ||
| 230 | * | 229 | * |
| 231 | * Due to C lacking lambda expressions we load the value of *ptr into a | 230 | * Due to C lacking lambda expressions we load the value of *ptr into a |
| 232 | * pre-named variable @VAL to be used in @cond. | 231 | * pre-named variable @VAL to be used in @cond. |
| 233 | */ | 232 | */ |
| 234 | #ifndef smp_cond_load_acquire | 233 | #ifndef smp_cond_load_relaxed |
| 235 | #define smp_cond_load_acquire(ptr, cond_expr) ({ \ | 234 | #define smp_cond_load_relaxed(ptr, cond_expr) ({ \ |
| 236 | typeof(ptr) __PTR = (ptr); \ | 235 | typeof(ptr) __PTR = (ptr); \ |
| 237 | typeof(*ptr) VAL; \ | 236 | typeof(*ptr) VAL; \ |
| 238 | for (;;) { \ | 237 | for (;;) { \ |
| @@ -241,10 +240,26 @@ do { \ | |||
| 241 | break; \ | 240 | break; \ |
| 242 | cpu_relax(); \ | 241 | cpu_relax(); \ |
| 243 | } \ | 242 | } \ |
| 244 | smp_acquire__after_ctrl_dep(); \ | ||
| 245 | VAL; \ | 243 | VAL; \ |
| 246 | }) | 244 | }) |
| 247 | #endif | 245 | #endif |
| 248 | 246 | ||
| 247 | /** | ||
| 248 | * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering | ||
| 249 | * @ptr: pointer to the variable to wait on | ||
| 250 | * @cond: boolean expression to wait for | ||
| 251 | * | ||
| 252 | * Equivalent to using smp_load_acquire() on the condition variable but employs | ||
| 253 | * the control dependency of the wait to reduce the barrier on many platforms. | ||
| 254 | */ | ||
| 255 | #ifndef smp_cond_load_acquire | ||
| 256 | #define smp_cond_load_acquire(ptr, cond_expr) ({ \ | ||
| 257 | typeof(*ptr) _val; \ | ||
| 258 | _val = smp_cond_load_relaxed(ptr, cond_expr); \ | ||
| 259 | smp_acquire__after_ctrl_dep(); \ | ||
| 260 | _val; \ | ||
| 261 | }) | ||
| 262 | #endif | ||
| 263 | |||
| 249 | #endif /* !__ASSEMBLY__ */ | 264 | #endif /* !__ASSEMBLY__ */ |
| 250 | #endif /* __ASM_GENERIC_BARRIER_H */ | 265 | #endif /* __ASM_GENERIC_BARRIER_H */ |
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index b37b4ad7eb94..9cc457597ddf 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h | |||
| @@ -26,7 +26,6 @@ | |||
| 26 | * @lock: Pointer to queued spinlock structure | 26 | * @lock: Pointer to queued spinlock structure |
| 27 | * Return: 1 if it is locked, 0 otherwise | 27 | * Return: 1 if it is locked, 0 otherwise |
| 28 | */ | 28 | */ |
| 29 | #ifndef queued_spin_is_locked | ||
| 30 | static __always_inline int queued_spin_is_locked(struct qspinlock *lock) | 29 | static __always_inline int queued_spin_is_locked(struct qspinlock *lock) |
| 31 | { | 30 | { |
| 32 | /* | 31 | /* |
| @@ -35,7 +34,6 @@ static __always_inline int queued_spin_is_locked(struct qspinlock *lock) | |||
| 35 | */ | 34 | */ |
| 36 | return atomic_read(&lock->val); | 35 | return atomic_read(&lock->val); |
| 37 | } | 36 | } |
| 38 | #endif | ||
| 39 | 37 | ||
| 40 | /** | 38 | /** |
| 41 | * queued_spin_value_unlocked - is the spinlock structure unlocked? | 39 | * queued_spin_value_unlocked - is the spinlock structure unlocked? |
| @@ -100,7 +98,7 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock) | |||
| 100 | /* | 98 | /* |
| 101 | * unlock() needs release semantics: | 99 | * unlock() needs release semantics: |
| 102 | */ | 100 | */ |
| 103 | (void)atomic_sub_return_release(_Q_LOCKED_VAL, &lock->val); | 101 | smp_store_release(&lock->locked, 0); |
| 104 | } | 102 | } |
| 105 | #endif | 103 | #endif |
| 106 | 104 | ||
diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index 034acd0c4956..0763f065b975 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h | |||
| @@ -29,13 +29,41 @@ | |||
| 29 | #endif | 29 | #endif |
| 30 | 30 | ||
| 31 | typedef struct qspinlock { | 31 | typedef struct qspinlock { |
| 32 | atomic_t val; | 32 | union { |
| 33 | atomic_t val; | ||
| 34 | |||
| 35 | /* | ||
| 36 | * By using the whole 2nd least significant byte for the | ||
| 37 | * pending bit, we can allow better optimization of the lock | ||
| 38 | * acquisition for the pending bit holder. | ||
| 39 | */ | ||
| 40 | #ifdef __LITTLE_ENDIAN | ||
| 41 | struct { | ||
| 42 | u8 locked; | ||
| 43 | u8 pending; | ||
| 44 | }; | ||
| 45 | struct { | ||
| 46 | u16 locked_pending; | ||
| 47 | u16 tail; | ||
| 48 | }; | ||
| 49 | #else | ||
| 50 | struct { | ||
| 51 | u16 tail; | ||
| 52 | u16 locked_pending; | ||
| 53 | }; | ||
| 54 | struct { | ||
| 55 | u8 reserved[2]; | ||
| 56 | u8 pending; | ||
| 57 | u8 locked; | ||
| 58 | }; | ||
| 59 | #endif | ||
| 60 | }; | ||
| 33 | } arch_spinlock_t; | 61 | } arch_spinlock_t; |
| 34 | 62 | ||
| 35 | /* | 63 | /* |
| 36 | * Initializier | 64 | * Initializier |
| 37 | */ | 65 | */ |
| 38 | #define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) } | 66 | #define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } |
| 39 | 67 | ||
| 40 | /* | 68 | /* |
| 41 | * Bitfields in the atomic value: | 69 | * Bitfields in the atomic value: |
diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 8b276fd9a127..01ce3997cb42 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h | |||
| @@ -654,6 +654,7 @@ static inline int atomic_dec_if_positive(atomic_t *v) | |||
| 654 | } | 654 | } |
| 655 | #endif | 655 | #endif |
| 656 | 656 | ||
| 657 | #define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) | ||
| 657 | #define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) | 658 | #define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) |
| 658 | 659 | ||
| 659 | #ifdef CONFIG_GENERIC_ATOMIC64 | 660 | #ifdef CONFIG_GENERIC_ATOMIC64 |
| @@ -1075,6 +1076,7 @@ static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v | |||
| 1075 | } | 1076 | } |
| 1076 | #endif | 1077 | #endif |
| 1077 | 1078 | ||
| 1079 | #define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) | ||
| 1078 | #define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) | 1080 | #define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) |
| 1079 | 1081 | ||
| 1080 | #include <asm-generic/atomic-long.h> | 1082 | #include <asm-generic/atomic-long.h> |
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 5e335b6203f4..e6c0448ebcc7 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h | |||
| @@ -29,7 +29,7 @@ | |||
| 29 | 29 | ||
| 30 | #ifdef CONFIG_TASK_DELAY_ACCT | 30 | #ifdef CONFIG_TASK_DELAY_ACCT |
| 31 | struct task_delay_info { | 31 | struct task_delay_info { |
| 32 | spinlock_t lock; | 32 | raw_spinlock_t lock; |
| 33 | unsigned int flags; /* Private per-task flags */ | 33 | unsigned int flags; /* Private per-task flags */ |
| 34 | 34 | ||
| 35 | /* For each stat XXX, add following, aligned appropriately | 35 | /* For each stat XXX, add following, aligned appropriately |
diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 14bc0d5d0ee5..3093dd162424 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h | |||
| @@ -146,9 +146,6 @@ extern void __mutex_init(struct mutex *lock, const char *name, | |||
| 146 | */ | 146 | */ |
| 147 | static inline bool mutex_is_locked(struct mutex *lock) | 147 | static inline bool mutex_is_locked(struct mutex *lock) |
| 148 | { | 148 | { |
| 149 | /* | ||
| 150 | * XXX think about spin_is_locked | ||
| 151 | */ | ||
| 152 | return __mutex_owner(lock) != NULL; | 149 | return __mutex_owner(lock) != NULL; |
| 153 | } | 150 | } |
| 154 | 151 | ||
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 4894d322d258..1e8a46435838 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h | |||
| @@ -380,6 +380,24 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock) | |||
| 380 | raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ | 380 | raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ |
| 381 | }) | 381 | }) |
| 382 | 382 | ||
| 383 | /** | ||
| 384 | * spin_is_locked() - Check whether a spinlock is locked. | ||
| 385 | * @lock: Pointer to the spinlock. | ||
| 386 | * | ||
| 387 | * This function is NOT required to provide any memory ordering | ||
| 388 | * guarantees; it could be used for debugging purposes or, when | ||
| 389 | * additional synchronization is needed, accompanied with other | ||
| 390 | * constructs (memory barriers) enforcing the synchronization. | ||
| 391 | * | ||
| 392 | * Returns: 1 if @lock is locked, 0 otherwise. | ||
| 393 | * | ||
| 394 | * Note that the function only tells you that the spinlock is | ||
| 395 | * seen to be locked, not that it is locked on your CPU. | ||
| 396 | * | ||
| 397 | * Further, on CONFIG_SMP=n builds with CONFIG_DEBUG_SPINLOCK=n, | ||
| 398 | * the return value is always 0 (see include/linux/spinlock_up.h). | ||
| 399 | * Therefore you should not rely heavily on the return value. | ||
| 400 | */ | ||
| 383 | static __always_inline int spin_is_locked(spinlock_t *lock) | 401 | static __always_inline int spin_is_locked(spinlock_t *lock) |
| 384 | { | 402 | { |
| 385 | return raw_spin_is_locked(&lock->rlock); | 403 | return raw_spin_is_locked(&lock->rlock); |
diff --git a/kernel/delayacct.c b/kernel/delayacct.c index e2764d767f18..ca8ac2824f0b 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c | |||
| @@ -44,23 +44,24 @@ void __delayacct_tsk_init(struct task_struct *tsk) | |||
| 44 | { | 44 | { |
| 45 | tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL); | 45 | tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL); |
| 46 | if (tsk->delays) | 46 | if (tsk->delays) |
| 47 | spin_lock_init(&tsk->delays->lock); | 47 | raw_spin_lock_init(&tsk->delays->lock); |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | /* | 50 | /* |
| 51 | * Finish delay accounting for a statistic using its timestamps (@start), | 51 | * Finish delay accounting for a statistic using its timestamps (@start), |
| 52 | * accumalator (@total) and @count | 52 | * accumalator (@total) and @count |
| 53 | */ | 53 | */ |
| 54 | static void delayacct_end(spinlock_t *lock, u64 *start, u64 *total, u32 *count) | 54 | static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, |
| 55 | u32 *count) | ||
| 55 | { | 56 | { |
| 56 | s64 ns = ktime_get_ns() - *start; | 57 | s64 ns = ktime_get_ns() - *start; |
| 57 | unsigned long flags; | 58 | unsigned long flags; |
| 58 | 59 | ||
| 59 | if (ns > 0) { | 60 | if (ns > 0) { |
| 60 | spin_lock_irqsave(lock, flags); | 61 | raw_spin_lock_irqsave(lock, flags); |
| 61 | *total += ns; | 62 | *total += ns; |
| 62 | (*count)++; | 63 | (*count)++; |
| 63 | spin_unlock_irqrestore(lock, flags); | 64 | raw_spin_unlock_irqrestore(lock, flags); |
| 64 | } | 65 | } |
| 65 | } | 66 | } |
| 66 | 67 | ||
| @@ -127,7 +128,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) | |||
| 127 | 128 | ||
| 128 | /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ | 129 | /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ |
| 129 | 130 | ||
| 130 | spin_lock_irqsave(&tsk->delays->lock, flags); | 131 | raw_spin_lock_irqsave(&tsk->delays->lock, flags); |
| 131 | tmp = d->blkio_delay_total + tsk->delays->blkio_delay; | 132 | tmp = d->blkio_delay_total + tsk->delays->blkio_delay; |
| 132 | d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; | 133 | d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; |
| 133 | tmp = d->swapin_delay_total + tsk->delays->swapin_delay; | 134 | tmp = d->swapin_delay_total + tsk->delays->swapin_delay; |
| @@ -137,7 +138,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) | |||
| 137 | d->blkio_count += tsk->delays->blkio_count; | 138 | d->blkio_count += tsk->delays->blkio_count; |
| 138 | d->swapin_count += tsk->delays->swapin_count; | 139 | d->swapin_count += tsk->delays->swapin_count; |
| 139 | d->freepages_count += tsk->delays->freepages_count; | 140 | d->freepages_count += tsk->delays->freepages_count; |
| 140 | spin_unlock_irqrestore(&tsk->delays->lock, flags); | 141 | raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); |
| 141 | 142 | ||
| 142 | return 0; | 143 | return 0; |
| 143 | } | 144 | } |
| @@ -147,10 +148,10 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk) | |||
| 147 | __u64 ret; | 148 | __u64 ret; |
| 148 | unsigned long flags; | 149 | unsigned long flags; |
| 149 | 150 | ||
| 150 | spin_lock_irqsave(&tsk->delays->lock, flags); | 151 | raw_spin_lock_irqsave(&tsk->delays->lock, flags); |
| 151 | ret = nsec_to_clock_t(tsk->delays->blkio_delay + | 152 | ret = nsec_to_clock_t(tsk->delays->blkio_delay + |
| 152 | tsk->delays->swapin_delay); | 153 | tsk->delays->swapin_delay); |
| 153 | spin_unlock_irqrestore(&tsk->delays->lock, flags); | 154 | raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); |
| 154 | return ret; | 155 | return ret; |
| 155 | } | 156 | } |
| 156 | 157 | ||
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 023386338269..edcac5de7ebc 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
| @@ -561,20 +561,24 @@ static void print_lock(struct held_lock *hlock) | |||
| 561 | printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip); | 561 | printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip); |
| 562 | } | 562 | } |
| 563 | 563 | ||
| 564 | static void lockdep_print_held_locks(struct task_struct *curr) | 564 | static void lockdep_print_held_locks(struct task_struct *p) |
| 565 | { | 565 | { |
| 566 | int i, depth = curr->lockdep_depth; | 566 | int i, depth = READ_ONCE(p->lockdep_depth); |
| 567 | 567 | ||
| 568 | if (!depth) { | 568 | if (!depth) |
| 569 | printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr)); | 569 | printk("no locks held by %s/%d.\n", p->comm, task_pid_nr(p)); |
| 570 | else | ||
| 571 | printk("%d lock%s held by %s/%d:\n", depth, | ||
| 572 | depth > 1 ? "s" : "", p->comm, task_pid_nr(p)); | ||
| 573 | /* | ||
| 574 | * It's not reliable to print a task's held locks if it's not sleeping | ||
| 575 | * and it's not the current task. | ||
| 576 | */ | ||
| 577 | if (p->state == TASK_RUNNING && p != current) | ||
| 570 | return; | 578 | return; |
| 571 | } | ||
| 572 | printk("%d lock%s held by %s/%d:\n", | ||
| 573 | depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr)); | ||
| 574 | |||
| 575 | for (i = 0; i < depth; i++) { | 579 | for (i = 0; i < depth; i++) { |
| 576 | printk(" #%d: ", i); | 580 | printk(" #%d: ", i); |
| 577 | print_lock(curr->held_locks + i); | 581 | print_lock(p->held_locks + i); |
| 578 | } | 582 | } |
| 579 | } | 583 | } |
| 580 | 584 | ||
| @@ -4451,8 +4455,6 @@ EXPORT_SYMBOL_GPL(debug_check_no_locks_held); | |||
| 4451 | void debug_show_all_locks(void) | 4455 | void debug_show_all_locks(void) |
| 4452 | { | 4456 | { |
| 4453 | struct task_struct *g, *p; | 4457 | struct task_struct *g, *p; |
| 4454 | int count = 10; | ||
| 4455 | int unlock = 1; | ||
| 4456 | 4458 | ||
| 4457 | if (unlikely(!debug_locks)) { | 4459 | if (unlikely(!debug_locks)) { |
| 4458 | pr_warn("INFO: lockdep is turned off.\n"); | 4460 | pr_warn("INFO: lockdep is turned off.\n"); |
| @@ -4460,50 +4462,18 @@ void debug_show_all_locks(void) | |||
| 4460 | } | 4462 | } |
| 4461 | pr_warn("\nShowing all locks held in the system:\n"); | 4463 | pr_warn("\nShowing all locks held in the system:\n"); |
| 4462 | 4464 | ||
| 4463 | /* | 4465 | rcu_read_lock(); |
| 4464 | * Here we try to get the tasklist_lock as hard as possible, | 4466 | for_each_process_thread(g, p) { |
| 4465 | * if not successful after 2 seconds we ignore it (but keep | 4467 | if (!p->lockdep_depth) |
| 4466 | * trying). This is to enable a debug printout even if a | ||
| 4467 | * tasklist_lock-holding task deadlocks or crashes. | ||
| 4468 | */ | ||
| 4469 | retry: | ||
| 4470 | if (!read_trylock(&tasklist_lock)) { | ||
| 4471 | if (count == 10) | ||
| 4472 | pr_warn("hm, tasklist_lock locked, retrying... "); | ||
| 4473 | if (count) { | ||
| 4474 | count--; | ||
| 4475 | pr_cont(" #%d", 10-count); | ||
| 4476 | mdelay(200); | ||
| 4477 | goto retry; | ||
| 4478 | } | ||
| 4479 | pr_cont(" ignoring it.\n"); | ||
| 4480 | unlock = 0; | ||
| 4481 | } else { | ||
| 4482 | if (count != 10) | ||
| 4483 | pr_cont(" locked it.\n"); | ||
| 4484 | } | ||
| 4485 | |||
| 4486 | do_each_thread(g, p) { | ||
| 4487 | /* | ||
| 4488 | * It's not reliable to print a task's held locks | ||
| 4489 | * if it's not sleeping (or if it's not the current | ||
| 4490 | * task): | ||
| 4491 | */ | ||
| 4492 | if (p->state == TASK_RUNNING && p != current) | ||
| 4493 | continue; | 4468 | continue; |
| 4494 | if (p->lockdep_depth) | 4469 | lockdep_print_held_locks(p); |
| 4495 | lockdep_print_held_locks(p); | ||
| 4496 | if (!unlock) | ||
| 4497 | if (read_trylock(&tasklist_lock)) | ||
| 4498 | unlock = 1; | ||
| 4499 | touch_nmi_watchdog(); | 4470 | touch_nmi_watchdog(); |
| 4500 | } while_each_thread(g, p); | 4471 | touch_all_softlockup_watchdogs(); |
| 4472 | } | ||
| 4473 | rcu_read_unlock(); | ||
| 4501 | 4474 | ||
| 4502 | pr_warn("\n"); | 4475 | pr_warn("\n"); |
| 4503 | pr_warn("=============================================\n\n"); | 4476 | pr_warn("=============================================\n\n"); |
| 4504 | |||
| 4505 | if (unlock) | ||
| 4506 | read_unlock(&tasklist_lock); | ||
| 4507 | } | 4477 | } |
| 4508 | EXPORT_SYMBOL_GPL(debug_show_all_locks); | 4478 | EXPORT_SYMBOL_GPL(debug_show_all_locks); |
| 4509 | #endif | 4479 | #endif |
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index f046b7ce9dd6..5e10153b4d3c 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h | |||
| @@ -23,13 +23,15 @@ struct mcs_spinlock { | |||
| 23 | 23 | ||
| 24 | #ifndef arch_mcs_spin_lock_contended | 24 | #ifndef arch_mcs_spin_lock_contended |
| 25 | /* | 25 | /* |
| 26 | * Using smp_load_acquire() provides a memory barrier that ensures | 26 | * Using smp_cond_load_acquire() provides the acquire semantics |
| 27 | * subsequent operations happen after the lock is acquired. | 27 | * required so that subsequent operations happen after the |
| 28 | * lock is acquired. Additionally, some architectures such as | ||
| 29 | * ARM64 would like to do spin-waiting instead of purely | ||
| 30 | * spinning, and smp_cond_load_acquire() provides that behavior. | ||
| 28 | */ | 31 | */ |
| 29 | #define arch_mcs_spin_lock_contended(l) \ | 32 | #define arch_mcs_spin_lock_contended(l) \ |
| 30 | do { \ | 33 | do { \ |
| 31 | while (!(smp_load_acquire(l))) \ | 34 | smp_cond_load_acquire(l, VAL); \ |
| 32 | cpu_relax(); \ | ||
| 33 | } while (0) | 35 | } while (0) |
| 34 | #endif | 36 | #endif |
| 35 | 37 | ||
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 2048359f33d2..f44f658ae629 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c | |||
| @@ -139,8 +139,9 @@ static inline bool __mutex_trylock(struct mutex *lock) | |||
| 139 | static __always_inline bool __mutex_trylock_fast(struct mutex *lock) | 139 | static __always_inline bool __mutex_trylock_fast(struct mutex *lock) |
| 140 | { | 140 | { |
| 141 | unsigned long curr = (unsigned long)current; | 141 | unsigned long curr = (unsigned long)current; |
| 142 | unsigned long zero = 0UL; | ||
| 142 | 143 | ||
| 143 | if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr)) | 144 | if (atomic_long_try_cmpxchg_acquire(&lock->owner, &zero, curr)) |
| 144 | return true; | 145 | return true; |
| 145 | 146 | ||
| 146 | return false; | 147 | return false; |
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index d880296245c5..bfaeb05123ff 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c | |||
| @@ -12,11 +12,11 @@ | |||
| 12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
| 13 | * | 13 | * |
| 14 | * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. | 14 | * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. |
| 15 | * (C) Copyright 2013-2014 Red Hat, Inc. | 15 | * (C) Copyright 2013-2014,2018 Red Hat, Inc. |
| 16 | * (C) Copyright 2015 Intel Corp. | 16 | * (C) Copyright 2015 Intel Corp. |
| 17 | * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP | 17 | * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP |
| 18 | * | 18 | * |
| 19 | * Authors: Waiman Long <waiman.long@hpe.com> | 19 | * Authors: Waiman Long <longman@redhat.com> |
| 20 | * Peter Zijlstra <peterz@infradead.org> | 20 | * Peter Zijlstra <peterz@infradead.org> |
| 21 | */ | 21 | */ |
| 22 | 22 | ||
| @@ -33,6 +33,11 @@ | |||
| 33 | #include <asm/qspinlock.h> | 33 | #include <asm/qspinlock.h> |
| 34 | 34 | ||
| 35 | /* | 35 | /* |
| 36 | * Include queued spinlock statistics code | ||
| 37 | */ | ||
| 38 | #include "qspinlock_stat.h" | ||
| 39 | |||
| 40 | /* | ||
| 36 | * The basic principle of a queue-based spinlock can best be understood | 41 | * The basic principle of a queue-based spinlock can best be understood |
| 37 | * by studying a classic queue-based spinlock implementation called the | 42 | * by studying a classic queue-based spinlock implementation called the |
| 38 | * MCS lock. The paper below provides a good description for this kind | 43 | * MCS lock. The paper below provides a good description for this kind |
| @@ -77,6 +82,18 @@ | |||
| 77 | #endif | 82 | #endif |
| 78 | 83 | ||
| 79 | /* | 84 | /* |
| 85 | * The pending bit spinning loop count. | ||
| 86 | * This heuristic is used to limit the number of lockword accesses | ||
| 87 | * made by atomic_cond_read_relaxed when waiting for the lock to | ||
| 88 | * transition out of the "== _Q_PENDING_VAL" state. We don't spin | ||
| 89 | * indefinitely because there's no guarantee that we'll make forward | ||
| 90 | * progress. | ||
| 91 | */ | ||
| 92 | #ifndef _Q_PENDING_LOOPS | ||
| 93 | #define _Q_PENDING_LOOPS 1 | ||
| 94 | #endif | ||
| 95 | |||
| 96 | /* | ||
| 80 | * Per-CPU queue node structures; we can never have more than 4 nested | 97 | * Per-CPU queue node structures; we can never have more than 4 nested |
| 81 | * contexts: task, softirq, hardirq, nmi. | 98 | * contexts: task, softirq, hardirq, nmi. |
| 82 | * | 99 | * |
| @@ -114,41 +131,18 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail) | |||
| 114 | 131 | ||
| 115 | #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) | 132 | #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) |
| 116 | 133 | ||
| 117 | /* | 134 | #if _Q_PENDING_BITS == 8 |
| 118 | * By using the whole 2nd least significant byte for the pending bit, we | 135 | /** |
| 119 | * can allow better optimization of the lock acquisition for the pending | 136 | * clear_pending - clear the pending bit. |
| 120 | * bit holder. | 137 | * @lock: Pointer to queued spinlock structure |
| 121 | * | 138 | * |
| 122 | * This internal structure is also used by the set_locked function which | 139 | * *,1,* -> *,0,* |
| 123 | * is not restricted to _Q_PENDING_BITS == 8. | ||
| 124 | */ | 140 | */ |
| 125 | struct __qspinlock { | 141 | static __always_inline void clear_pending(struct qspinlock *lock) |
| 126 | union { | 142 | { |
| 127 | atomic_t val; | 143 | WRITE_ONCE(lock->pending, 0); |
| 128 | #ifdef __LITTLE_ENDIAN | 144 | } |
| 129 | struct { | ||
| 130 | u8 locked; | ||
| 131 | u8 pending; | ||
| 132 | }; | ||
| 133 | struct { | ||
| 134 | u16 locked_pending; | ||
| 135 | u16 tail; | ||
| 136 | }; | ||
| 137 | #else | ||
| 138 | struct { | ||
| 139 | u16 tail; | ||
| 140 | u16 locked_pending; | ||
| 141 | }; | ||
| 142 | struct { | ||
| 143 | u8 reserved[2]; | ||
| 144 | u8 pending; | ||
| 145 | u8 locked; | ||
| 146 | }; | ||
| 147 | #endif | ||
| 148 | }; | ||
| 149 | }; | ||
| 150 | 145 | ||
| 151 | #if _Q_PENDING_BITS == 8 | ||
| 152 | /** | 146 | /** |
| 153 | * clear_pending_set_locked - take ownership and clear the pending bit. | 147 | * clear_pending_set_locked - take ownership and clear the pending bit. |
| 154 | * @lock: Pointer to queued spinlock structure | 148 | * @lock: Pointer to queued spinlock structure |
| @@ -159,9 +153,7 @@ struct __qspinlock { | |||
| 159 | */ | 153 | */ |
| 160 | static __always_inline void clear_pending_set_locked(struct qspinlock *lock) | 154 | static __always_inline void clear_pending_set_locked(struct qspinlock *lock) |
| 161 | { | 155 | { |
| 162 | struct __qspinlock *l = (void *)lock; | 156 | WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL); |
| 163 | |||
| 164 | WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL); | ||
| 165 | } | 157 | } |
| 166 | 158 | ||
| 167 | /* | 159 | /* |
| @@ -176,19 +168,28 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) | |||
| 176 | */ | 168 | */ |
| 177 | static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) | 169 | static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) |
| 178 | { | 170 | { |
| 179 | struct __qspinlock *l = (void *)lock; | ||
| 180 | |||
| 181 | /* | 171 | /* |
| 182 | * Use release semantics to make sure that the MCS node is properly | 172 | * We can use relaxed semantics since the caller ensures that the |
| 183 | * initialized before changing the tail code. | 173 | * MCS node is properly initialized before updating the tail. |
| 184 | */ | 174 | */ |
| 185 | return (u32)xchg_release(&l->tail, | 175 | return (u32)xchg_relaxed(&lock->tail, |
| 186 | tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; | 176 | tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; |
| 187 | } | 177 | } |
| 188 | 178 | ||
| 189 | #else /* _Q_PENDING_BITS == 8 */ | 179 | #else /* _Q_PENDING_BITS == 8 */ |
| 190 | 180 | ||
| 191 | /** | 181 | /** |
| 182 | * clear_pending - clear the pending bit. | ||
| 183 | * @lock: Pointer to queued spinlock structure | ||
| 184 | * | ||
| 185 | * *,1,* -> *,0,* | ||
| 186 | */ | ||
| 187 | static __always_inline void clear_pending(struct qspinlock *lock) | ||
| 188 | { | ||
| 189 | atomic_andnot(_Q_PENDING_VAL, &lock->val); | ||
| 190 | } | ||
| 191 | |||
| 192 | /** | ||
| 192 | * clear_pending_set_locked - take ownership and clear the pending bit. | 193 | * clear_pending_set_locked - take ownership and clear the pending bit. |
| 193 | * @lock: Pointer to queued spinlock structure | 194 | * @lock: Pointer to queued spinlock structure |
| 194 | * | 195 | * |
| @@ -216,10 +217,11 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) | |||
| 216 | for (;;) { | 217 | for (;;) { |
| 217 | new = (val & _Q_LOCKED_PENDING_MASK) | tail; | 218 | new = (val & _Q_LOCKED_PENDING_MASK) | tail; |
| 218 | /* | 219 | /* |
| 219 | * Use release semantics to make sure that the MCS node is | 220 | * We can use relaxed semantics since the caller ensures that |
| 220 | * properly initialized before changing the tail code. | 221 | * the MCS node is properly initialized before updating the |
| 222 | * tail. | ||
| 221 | */ | 223 | */ |
| 222 | old = atomic_cmpxchg_release(&lock->val, val, new); | 224 | old = atomic_cmpxchg_relaxed(&lock->val, val, new); |
| 223 | if (old == val) | 225 | if (old == val) |
| 224 | break; | 226 | break; |
| 225 | 227 | ||
| @@ -237,9 +239,7 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) | |||
| 237 | */ | 239 | */ |
| 238 | static __always_inline void set_locked(struct qspinlock *lock) | 240 | static __always_inline void set_locked(struct qspinlock *lock) |
| 239 | { | 241 | { |
| 240 | struct __qspinlock *l = (void *)lock; | 242 | WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); |
| 241 | |||
| 242 | WRITE_ONCE(l->locked, _Q_LOCKED_VAL); | ||
| 243 | } | 243 | } |
| 244 | 244 | ||
| 245 | 245 | ||
| @@ -294,86 +294,83 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, | |||
| 294 | void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | 294 | void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) |
| 295 | { | 295 | { |
| 296 | struct mcs_spinlock *prev, *next, *node; | 296 | struct mcs_spinlock *prev, *next, *node; |
| 297 | u32 new, old, tail; | 297 | u32 old, tail; |
| 298 | int idx; | 298 | int idx; |
| 299 | 299 | ||
| 300 | BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); | 300 | BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); |
| 301 | 301 | ||
| 302 | if (pv_enabled()) | 302 | if (pv_enabled()) |
| 303 | goto queue; | 303 | goto pv_queue; |
| 304 | 304 | ||
| 305 | if (virt_spin_lock(lock)) | 305 | if (virt_spin_lock(lock)) |
| 306 | return; | 306 | return; |
| 307 | 307 | ||
| 308 | /* | 308 | /* |
| 309 | * wait for in-progress pending->locked hand-overs | 309 | * Wait for in-progress pending->locked hand-overs with a bounded |
| 310 | * number of spins so that we guarantee forward progress. | ||
| 310 | * | 311 | * |
| 311 | * 0,1,0 -> 0,0,1 | 312 | * 0,1,0 -> 0,0,1 |
| 312 | */ | 313 | */ |
| 313 | if (val == _Q_PENDING_VAL) { | 314 | if (val == _Q_PENDING_VAL) { |
| 314 | while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL) | 315 | int cnt = _Q_PENDING_LOOPS; |
| 315 | cpu_relax(); | 316 | val = atomic_cond_read_relaxed(&lock->val, |
| 317 | (VAL != _Q_PENDING_VAL) || !cnt--); | ||
| 316 | } | 318 | } |
| 317 | 319 | ||
| 318 | /* | 320 | /* |
| 321 | * If we observe any contention; queue. | ||
| 322 | */ | ||
| 323 | if (val & ~_Q_LOCKED_MASK) | ||
| 324 | goto queue; | ||
| 325 | |||
| 326 | /* | ||
| 319 | * trylock || pending | 327 | * trylock || pending |
| 320 | * | 328 | * |
| 321 | * 0,0,0 -> 0,0,1 ; trylock | 329 | * 0,0,0 -> 0,0,1 ; trylock |
| 322 | * 0,0,1 -> 0,1,1 ; pending | 330 | * 0,0,1 -> 0,1,1 ; pending |
| 323 | */ | 331 | */ |
| 324 | for (;;) { | 332 | val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); |
| 333 | if (!(val & ~_Q_LOCKED_MASK)) { | ||
| 325 | /* | 334 | /* |
| 326 | * If we observe any contention; queue. | 335 | * We're pending, wait for the owner to go away. |
| 336 | * | ||
| 337 | * *,1,1 -> *,1,0 | ||
| 338 | * | ||
| 339 | * this wait loop must be a load-acquire such that we match the | ||
| 340 | * store-release that clears the locked bit and create lock | ||
| 341 | * sequentiality; this is because not all | ||
| 342 | * clear_pending_set_locked() implementations imply full | ||
| 343 | * barriers. | ||
| 327 | */ | 344 | */ |
| 328 | if (val & ~_Q_LOCKED_MASK) | 345 | if (val & _Q_LOCKED_MASK) { |
| 329 | goto queue; | 346 | atomic_cond_read_acquire(&lock->val, |
| 330 | 347 | !(VAL & _Q_LOCKED_MASK)); | |
| 331 | new = _Q_LOCKED_VAL; | 348 | } |
| 332 | if (val == new) | ||
| 333 | new |= _Q_PENDING_VAL; | ||
| 334 | 349 | ||
| 335 | /* | 350 | /* |
| 336 | * Acquire semantic is required here as the function may | 351 | * take ownership and clear the pending bit. |
| 337 | * return immediately if the lock was free. | 352 | * |
| 353 | * *,1,0 -> *,0,1 | ||
| 338 | */ | 354 | */ |
| 339 | old = atomic_cmpxchg_acquire(&lock->val, val, new); | 355 | clear_pending_set_locked(lock); |
| 340 | if (old == val) | 356 | qstat_inc(qstat_lock_pending, true); |
| 341 | break; | ||
| 342 | |||
| 343 | val = old; | ||
| 344 | } | ||
| 345 | |||
| 346 | /* | ||
| 347 | * we won the trylock | ||
| 348 | */ | ||
| 349 | if (new == _Q_LOCKED_VAL) | ||
| 350 | return; | 357 | return; |
| 358 | } | ||
| 351 | 359 | ||
| 352 | /* | 360 | /* |
| 353 | * we're pending, wait for the owner to go away. | 361 | * If pending was clear but there are waiters in the queue, then |
| 354 | * | 362 | * we need to undo our setting of pending before we queue ourselves. |
| 355 | * *,1,1 -> *,1,0 | ||
| 356 | * | ||
| 357 | * this wait loop must be a load-acquire such that we match the | ||
| 358 | * store-release that clears the locked bit and create lock | ||
| 359 | * sequentiality; this is because not all clear_pending_set_locked() | ||
| 360 | * implementations imply full barriers. | ||
| 361 | */ | ||
| 362 | smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK)); | ||
| 363 | |||
| 364 | /* | ||
| 365 | * take ownership and clear the pending bit. | ||
| 366 | * | ||
| 367 | * *,1,0 -> *,0,1 | ||
| 368 | */ | 363 | */ |
| 369 | clear_pending_set_locked(lock); | 364 | if (!(val & _Q_PENDING_MASK)) |
| 370 | return; | 365 | clear_pending(lock); |
| 371 | 366 | ||
| 372 | /* | 367 | /* |
| 373 | * End of pending bit optimistic spinning and beginning of MCS | 368 | * End of pending bit optimistic spinning and beginning of MCS |
| 374 | * queuing. | 369 | * queuing. |
| 375 | */ | 370 | */ |
| 376 | queue: | 371 | queue: |
| 372 | qstat_inc(qstat_lock_slowpath, true); | ||
| 373 | pv_queue: | ||
| 377 | node = this_cpu_ptr(&mcs_nodes[0]); | 374 | node = this_cpu_ptr(&mcs_nodes[0]); |
| 378 | idx = node->count++; | 375 | idx = node->count++; |
| 379 | tail = encode_tail(smp_processor_id(), idx); | 376 | tail = encode_tail(smp_processor_id(), idx); |
| @@ -400,12 +397,18 @@ queue: | |||
| 400 | goto release; | 397 | goto release; |
| 401 | 398 | ||
| 402 | /* | 399 | /* |
| 400 | * Ensure that the initialisation of @node is complete before we | ||
| 401 | * publish the updated tail via xchg_tail() and potentially link | ||
| 402 | * @node into the waitqueue via WRITE_ONCE(prev->next, node) below. | ||
| 403 | */ | ||
| 404 | smp_wmb(); | ||
| 405 | |||
| 406 | /* | ||
| 407 | * Publish the updated tail. | ||
| 403 | * We have already touched the queueing cacheline; don't bother with | 408 | * We have already touched the queueing cacheline; don't bother with |
| 404 | * pending stuff. | 409 | * pending stuff. |
| 405 | * | 410 | * |
| 406 | * p,*,* -> n,*,* | 411 | * p,*,* -> n,*,* |
| 407 | * | ||
| 408 | * RELEASE, such that the stores to @node must be complete. | ||
| 409 | */ | 412 | */ |
| 410 | old = xchg_tail(lock, tail); | 413 | old = xchg_tail(lock, tail); |
| 411 | next = NULL; | 414 | next = NULL; |
| @@ -417,14 +420,8 @@ queue: | |||
| 417 | if (old & _Q_TAIL_MASK) { | 420 | if (old & _Q_TAIL_MASK) { |
| 418 | prev = decode_tail(old); | 421 | prev = decode_tail(old); |
| 419 | 422 | ||
| 420 | /* | 423 | /* Link @node into the waitqueue. */ |
| 421 | * We must ensure that the stores to @node are observed before | 424 | WRITE_ONCE(prev->next, node); |
| 422 | * the write to prev->next. The address dependency from | ||
| 423 | * xchg_tail is not sufficient to ensure this because the read | ||
| 424 | * component of xchg_tail is unordered with respect to the | ||
| 425 | * initialisation of @node. | ||
| 426 | */ | ||
| 427 | smp_store_release(&prev->next, node); | ||
| 428 | 425 | ||
| 429 | pv_wait_node(node, prev); | 426 | pv_wait_node(node, prev); |
| 430 | arch_mcs_spin_lock_contended(&node->locked); | 427 | arch_mcs_spin_lock_contended(&node->locked); |
| @@ -453,8 +450,8 @@ queue: | |||
| 453 | * | 450 | * |
| 454 | * The PV pv_wait_head_or_lock function, if active, will acquire | 451 | * The PV pv_wait_head_or_lock function, if active, will acquire |
| 455 | * the lock and return a non-zero value. So we have to skip the | 452 | * the lock and return a non-zero value. So we have to skip the |
| 456 | * smp_cond_load_acquire() call. As the next PV queue head hasn't been | 453 | * atomic_cond_read_acquire() call. As the next PV queue head hasn't |
| 457 | * designated yet, there is no way for the locked value to become | 454 | * been designated yet, there is no way for the locked value to become |
| 458 | * _Q_SLOW_VAL. So both the set_locked() and the | 455 | * _Q_SLOW_VAL. So both the set_locked() and the |
| 459 | * atomic_cmpxchg_relaxed() calls will be safe. | 456 | * atomic_cmpxchg_relaxed() calls will be safe. |
| 460 | * | 457 | * |
| @@ -464,44 +461,38 @@ queue: | |||
| 464 | if ((val = pv_wait_head_or_lock(lock, node))) | 461 | if ((val = pv_wait_head_or_lock(lock, node))) |
| 465 | goto locked; | 462 | goto locked; |
| 466 | 463 | ||
| 467 | val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK)); | 464 | val = atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK)); |
| 468 | 465 | ||
| 469 | locked: | 466 | locked: |
| 470 | /* | 467 | /* |
| 471 | * claim the lock: | 468 | * claim the lock: |
| 472 | * | 469 | * |
| 473 | * n,0,0 -> 0,0,1 : lock, uncontended | 470 | * n,0,0 -> 0,0,1 : lock, uncontended |
| 474 | * *,0,0 -> *,0,1 : lock, contended | 471 | * *,*,0 -> *,*,1 : lock, contended |
| 475 | * | 472 | * |
| 476 | * If the queue head is the only one in the queue (lock value == tail), | 473 | * If the queue head is the only one in the queue (lock value == tail) |
| 477 | * clear the tail code and grab the lock. Otherwise, we only need | 474 | * and nobody is pending, clear the tail code and grab the lock. |
| 478 | * to grab the lock. | 475 | * Otherwise, we only need to grab the lock. |
| 479 | */ | 476 | */ |
| 480 | for (;;) { | ||
| 481 | /* In the PV case we might already have _Q_LOCKED_VAL set */ | ||
| 482 | if ((val & _Q_TAIL_MASK) != tail) { | ||
| 483 | set_locked(lock); | ||
| 484 | break; | ||
| 485 | } | ||
| 486 | /* | ||
| 487 | * The smp_cond_load_acquire() call above has provided the | ||
| 488 | * necessary acquire semantics required for locking. At most | ||
| 489 | * two iterations of this loop may be ran. | ||
| 490 | */ | ||
| 491 | old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL); | ||
| 492 | if (old == val) | ||
| 493 | goto release; /* No contention */ | ||
| 494 | 477 | ||
| 495 | val = old; | 478 | /* |
| 496 | } | 479 | * In the PV case we might already have _Q_LOCKED_VAL set. |
| 480 | * | ||
| 481 | * The atomic_cond_read_acquire() call above has provided the | ||
| 482 | * necessary acquire semantics required for locking. | ||
| 483 | */ | ||
| 484 | if (((val & _Q_TAIL_MASK) == tail) && | ||
| 485 | atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) | ||
| 486 | goto release; /* No contention */ | ||
| 487 | |||
| 488 | /* Either somebody is queued behind us or _Q_PENDING_VAL is set */ | ||
| 489 | set_locked(lock); | ||
| 497 | 490 | ||
| 498 | /* | 491 | /* |
| 499 | * contended path; wait for next if not observed yet, release. | 492 | * contended path; wait for next if not observed yet, release. |
| 500 | */ | 493 | */ |
| 501 | if (!next) { | 494 | if (!next) |
| 502 | while (!(next = READ_ONCE(node->next))) | 495 | next = smp_cond_load_relaxed(&node->next, (VAL)); |
| 503 | cpu_relax(); | ||
| 504 | } | ||
| 505 | 496 | ||
| 506 | arch_mcs_spin_unlock_contended(&next->locked); | 497 | arch_mcs_spin_unlock_contended(&next->locked); |
| 507 | pv_kick_node(lock, next); | 498 | pv_kick_node(lock, next); |
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 6ee477765e6c..5a0cf5f9008c 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h | |||
| @@ -56,11 +56,6 @@ struct pv_node { | |||
| 56 | }; | 56 | }; |
| 57 | 57 | ||
| 58 | /* | 58 | /* |
| 59 | * Include queued spinlock statistics code | ||
| 60 | */ | ||
| 61 | #include "qspinlock_stat.h" | ||
| 62 | |||
| 63 | /* | ||
| 64 | * Hybrid PV queued/unfair lock | 59 | * Hybrid PV queued/unfair lock |
| 65 | * | 60 | * |
| 66 | * By replacing the regular queued_spin_trylock() with the function below, | 61 | * By replacing the regular queued_spin_trylock() with the function below, |
| @@ -87,8 +82,6 @@ struct pv_node { | |||
| 87 | #define queued_spin_trylock(l) pv_hybrid_queued_unfair_trylock(l) | 82 | #define queued_spin_trylock(l) pv_hybrid_queued_unfair_trylock(l) |
| 88 | static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) | 83 | static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) |
| 89 | { | 84 | { |
| 90 | struct __qspinlock *l = (void *)lock; | ||
| 91 | |||
| 92 | /* | 85 | /* |
| 93 | * Stay in unfair lock mode as long as queued mode waiters are | 86 | * Stay in unfair lock mode as long as queued mode waiters are |
| 94 | * present in the MCS wait queue but the pending bit isn't set. | 87 | * present in the MCS wait queue but the pending bit isn't set. |
| @@ -97,7 +90,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) | |||
| 97 | int val = atomic_read(&lock->val); | 90 | int val = atomic_read(&lock->val); |
| 98 | 91 | ||
| 99 | if (!(val & _Q_LOCKED_PENDING_MASK) && | 92 | if (!(val & _Q_LOCKED_PENDING_MASK) && |
| 100 | (cmpxchg_acquire(&l->locked, 0, _Q_LOCKED_VAL) == 0)) { | 93 | (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) { |
| 101 | qstat_inc(qstat_pv_lock_stealing, true); | 94 | qstat_inc(qstat_pv_lock_stealing, true); |
| 102 | return true; | 95 | return true; |
| 103 | } | 96 | } |
| @@ -117,16 +110,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) | |||
| 117 | #if _Q_PENDING_BITS == 8 | 110 | #if _Q_PENDING_BITS == 8 |
| 118 | static __always_inline void set_pending(struct qspinlock *lock) | 111 | static __always_inline void set_pending(struct qspinlock *lock) |
| 119 | { | 112 | { |
| 120 | struct __qspinlock *l = (void *)lock; | 113 | WRITE_ONCE(lock->pending, 1); |
| 121 | |||
| 122 | WRITE_ONCE(l->pending, 1); | ||
| 123 | } | ||
| 124 | |||
| 125 | static __always_inline void clear_pending(struct qspinlock *lock) | ||
| 126 | { | ||
| 127 | struct __qspinlock *l = (void *)lock; | ||
| 128 | |||
| 129 | WRITE_ONCE(l->pending, 0); | ||
| 130 | } | 114 | } |
| 131 | 115 | ||
| 132 | /* | 116 | /* |
| @@ -136,10 +120,8 @@ static __always_inline void clear_pending(struct qspinlock *lock) | |||
| 136 | */ | 120 | */ |
| 137 | static __always_inline int trylock_clear_pending(struct qspinlock *lock) | 121 | static __always_inline int trylock_clear_pending(struct qspinlock *lock) |
| 138 | { | 122 | { |
| 139 | struct __qspinlock *l = (void *)lock; | 123 | return !READ_ONCE(lock->locked) && |
| 140 | 124 | (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL, | |
| 141 | return !READ_ONCE(l->locked) && | ||
| 142 | (cmpxchg_acquire(&l->locked_pending, _Q_PENDING_VAL, | ||
| 143 | _Q_LOCKED_VAL) == _Q_PENDING_VAL); | 125 | _Q_LOCKED_VAL) == _Q_PENDING_VAL); |
| 144 | } | 126 | } |
| 145 | #else /* _Q_PENDING_BITS == 8 */ | 127 | #else /* _Q_PENDING_BITS == 8 */ |
| @@ -148,11 +130,6 @@ static __always_inline void set_pending(struct qspinlock *lock) | |||
| 148 | atomic_or(_Q_PENDING_VAL, &lock->val); | 130 | atomic_or(_Q_PENDING_VAL, &lock->val); |
| 149 | } | 131 | } |
| 150 | 132 | ||
| 151 | static __always_inline void clear_pending(struct qspinlock *lock) | ||
| 152 | { | ||
| 153 | atomic_andnot(_Q_PENDING_VAL, &lock->val); | ||
| 154 | } | ||
| 155 | |||
| 156 | static __always_inline int trylock_clear_pending(struct qspinlock *lock) | 133 | static __always_inline int trylock_clear_pending(struct qspinlock *lock) |
| 157 | { | 134 | { |
| 158 | int val = atomic_read(&lock->val); | 135 | int val = atomic_read(&lock->val); |
| @@ -384,7 +361,6 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) | |||
| 384 | static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) | 361 | static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) |
| 385 | { | 362 | { |
| 386 | struct pv_node *pn = (struct pv_node *)node; | 363 | struct pv_node *pn = (struct pv_node *)node; |
| 387 | struct __qspinlock *l = (void *)lock; | ||
| 388 | 364 | ||
| 389 | /* | 365 | /* |
| 390 | * If the vCPU is indeed halted, advance its state to match that of | 366 | * If the vCPU is indeed halted, advance its state to match that of |
| @@ -413,7 +389,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) | |||
| 413 | * the hash table later on at unlock time, no atomic instruction is | 389 | * the hash table later on at unlock time, no atomic instruction is |
| 414 | * needed. | 390 | * needed. |
| 415 | */ | 391 | */ |
| 416 | WRITE_ONCE(l->locked, _Q_SLOW_VAL); | 392 | WRITE_ONCE(lock->locked, _Q_SLOW_VAL); |
| 417 | (void)pv_hash(lock, pn); | 393 | (void)pv_hash(lock, pn); |
| 418 | } | 394 | } |
| 419 | 395 | ||
| @@ -428,7 +404,6 @@ static u32 | |||
| 428 | pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) | 404 | pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) |
| 429 | { | 405 | { |
| 430 | struct pv_node *pn = (struct pv_node *)node; | 406 | struct pv_node *pn = (struct pv_node *)node; |
| 431 | struct __qspinlock *l = (void *)lock; | ||
| 432 | struct qspinlock **lp = NULL; | 407 | struct qspinlock **lp = NULL; |
| 433 | int waitcnt = 0; | 408 | int waitcnt = 0; |
| 434 | int loop; | 409 | int loop; |
| @@ -443,7 +418,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) | |||
| 443 | /* | 418 | /* |
| 444 | * Tracking # of slowpath locking operations | 419 | * Tracking # of slowpath locking operations |
| 445 | */ | 420 | */ |
| 446 | qstat_inc(qstat_pv_lock_slowpath, true); | 421 | qstat_inc(qstat_lock_slowpath, true); |
| 447 | 422 | ||
| 448 | for (;; waitcnt++) { | 423 | for (;; waitcnt++) { |
| 449 | /* | 424 | /* |
| @@ -479,13 +454,13 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) | |||
| 479 | * | 454 | * |
| 480 | * Matches the smp_rmb() in __pv_queued_spin_unlock(). | 455 | * Matches the smp_rmb() in __pv_queued_spin_unlock(). |
| 481 | */ | 456 | */ |
| 482 | if (xchg(&l->locked, _Q_SLOW_VAL) == 0) { | 457 | if (xchg(&lock->locked, _Q_SLOW_VAL) == 0) { |
| 483 | /* | 458 | /* |
| 484 | * The lock was free and now we own the lock. | 459 | * The lock was free and now we own the lock. |
| 485 | * Change the lock value back to _Q_LOCKED_VAL | 460 | * Change the lock value back to _Q_LOCKED_VAL |
| 486 | * and unhash the table. | 461 | * and unhash the table. |
| 487 | */ | 462 | */ |
| 488 | WRITE_ONCE(l->locked, _Q_LOCKED_VAL); | 463 | WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); |
| 489 | WRITE_ONCE(*lp, NULL); | 464 | WRITE_ONCE(*lp, NULL); |
| 490 | goto gotlock; | 465 | goto gotlock; |
| 491 | } | 466 | } |
| @@ -493,7 +468,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) | |||
| 493 | WRITE_ONCE(pn->state, vcpu_hashed); | 468 | WRITE_ONCE(pn->state, vcpu_hashed); |
| 494 | qstat_inc(qstat_pv_wait_head, true); | 469 | qstat_inc(qstat_pv_wait_head, true); |
| 495 | qstat_inc(qstat_pv_wait_again, waitcnt); | 470 | qstat_inc(qstat_pv_wait_again, waitcnt); |
| 496 | pv_wait(&l->locked, _Q_SLOW_VAL); | 471 | pv_wait(&lock->locked, _Q_SLOW_VAL); |
| 497 | 472 | ||
| 498 | /* | 473 | /* |
| 499 | * Because of lock stealing, the queue head vCPU may not be | 474 | * Because of lock stealing, the queue head vCPU may not be |
| @@ -518,7 +493,6 @@ gotlock: | |||
| 518 | __visible void | 493 | __visible void |
| 519 | __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) | 494 | __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) |
| 520 | { | 495 | { |
| 521 | struct __qspinlock *l = (void *)lock; | ||
| 522 | struct pv_node *node; | 496 | struct pv_node *node; |
| 523 | 497 | ||
| 524 | if (unlikely(locked != _Q_SLOW_VAL)) { | 498 | if (unlikely(locked != _Q_SLOW_VAL)) { |
| @@ -547,7 +521,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) | |||
| 547 | * Now that we have a reference to the (likely) blocked pv_node, | 521 | * Now that we have a reference to the (likely) blocked pv_node, |
| 548 | * release the lock. | 522 | * release the lock. |
| 549 | */ | 523 | */ |
| 550 | smp_store_release(&l->locked, 0); | 524 | smp_store_release(&lock->locked, 0); |
| 551 | 525 | ||
| 552 | /* | 526 | /* |
| 553 | * At this point the memory pointed at by lock can be freed/reused, | 527 | * At this point the memory pointed at by lock can be freed/reused, |
| @@ -573,7 +547,6 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) | |||
| 573 | #ifndef __pv_queued_spin_unlock | 547 | #ifndef __pv_queued_spin_unlock |
| 574 | __visible void __pv_queued_spin_unlock(struct qspinlock *lock) | 548 | __visible void __pv_queued_spin_unlock(struct qspinlock *lock) |
| 575 | { | 549 | { |
| 576 | struct __qspinlock *l = (void *)lock; | ||
| 577 | u8 locked; | 550 | u8 locked; |
| 578 | 551 | ||
| 579 | /* | 552 | /* |
| @@ -581,7 +554,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) | |||
| 581 | * unhash. Otherwise it would be possible to have multiple @lock | 554 | * unhash. Otherwise it would be possible to have multiple @lock |
| 582 | * entries, which would be BAD. | 555 | * entries, which would be BAD. |
| 583 | */ | 556 | */ |
| 584 | locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0); | 557 | locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0); |
| 585 | if (likely(locked == _Q_LOCKED_VAL)) | 558 | if (likely(locked == _Q_LOCKED_VAL)) |
| 586 | return; | 559 | return; |
| 587 | 560 | ||
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index 4a30ef63c607..6bd78c0740fc 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h | |||
| @@ -22,13 +22,14 @@ | |||
| 22 | * pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake | 22 | * pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake |
| 23 | * pv_latency_kick - average latency (ns) of vCPU kick operation | 23 | * pv_latency_kick - average latency (ns) of vCPU kick operation |
| 24 | * pv_latency_wake - average latency (ns) from vCPU kick to wakeup | 24 | * pv_latency_wake - average latency (ns) from vCPU kick to wakeup |
| 25 | * pv_lock_slowpath - # of locking operations via the slowpath | ||
| 26 | * pv_lock_stealing - # of lock stealing operations | 25 | * pv_lock_stealing - # of lock stealing operations |
| 27 | * pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs | 26 | * pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs |
| 28 | * pv_wait_again - # of wait's after a queue head vCPU kick | 27 | * pv_wait_again - # of wait's after a queue head vCPU kick |
| 29 | * pv_wait_early - # of early vCPU wait's | 28 | * pv_wait_early - # of early vCPU wait's |
| 30 | * pv_wait_head - # of vCPU wait's at the queue head | 29 | * pv_wait_head - # of vCPU wait's at the queue head |
| 31 | * pv_wait_node - # of vCPU wait's at a non-head queue node | 30 | * pv_wait_node - # of vCPU wait's at a non-head queue node |
| 31 | * lock_pending - # of locking operations via pending code | ||
| 32 | * lock_slowpath - # of locking operations via MCS lock queue | ||
| 32 | * | 33 | * |
| 33 | * Writing to the "reset_counters" file will reset all the above counter | 34 | * Writing to the "reset_counters" file will reset all the above counter |
| 34 | * values. | 35 | * values. |
| @@ -46,13 +47,14 @@ enum qlock_stats { | |||
| 46 | qstat_pv_kick_wake, | 47 | qstat_pv_kick_wake, |
| 47 | qstat_pv_latency_kick, | 48 | qstat_pv_latency_kick, |
| 48 | qstat_pv_latency_wake, | 49 | qstat_pv_latency_wake, |
| 49 | qstat_pv_lock_slowpath, | ||
| 50 | qstat_pv_lock_stealing, | 50 | qstat_pv_lock_stealing, |
| 51 | qstat_pv_spurious_wakeup, | 51 | qstat_pv_spurious_wakeup, |
| 52 | qstat_pv_wait_again, | 52 | qstat_pv_wait_again, |
| 53 | qstat_pv_wait_early, | 53 | qstat_pv_wait_early, |
| 54 | qstat_pv_wait_head, | 54 | qstat_pv_wait_head, |
| 55 | qstat_pv_wait_node, | 55 | qstat_pv_wait_node, |
| 56 | qstat_lock_pending, | ||
| 57 | qstat_lock_slowpath, | ||
| 56 | qstat_num, /* Total number of statistical counters */ | 58 | qstat_num, /* Total number of statistical counters */ |
| 57 | qstat_reset_cnts = qstat_num, | 59 | qstat_reset_cnts = qstat_num, |
| 58 | }; | 60 | }; |
| @@ -73,12 +75,13 @@ static const char * const qstat_names[qstat_num + 1] = { | |||
| 73 | [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup", | 75 | [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup", |
| 74 | [qstat_pv_latency_kick] = "pv_latency_kick", | 76 | [qstat_pv_latency_kick] = "pv_latency_kick", |
| 75 | [qstat_pv_latency_wake] = "pv_latency_wake", | 77 | [qstat_pv_latency_wake] = "pv_latency_wake", |
| 76 | [qstat_pv_lock_slowpath] = "pv_lock_slowpath", | ||
| 77 | [qstat_pv_lock_stealing] = "pv_lock_stealing", | 78 | [qstat_pv_lock_stealing] = "pv_lock_stealing", |
| 78 | [qstat_pv_wait_again] = "pv_wait_again", | 79 | [qstat_pv_wait_again] = "pv_wait_again", |
| 79 | [qstat_pv_wait_early] = "pv_wait_early", | 80 | [qstat_pv_wait_early] = "pv_wait_early", |
| 80 | [qstat_pv_wait_head] = "pv_wait_head", | 81 | [qstat_pv_wait_head] = "pv_wait_head", |
| 81 | [qstat_pv_wait_node] = "pv_wait_node", | 82 | [qstat_pv_wait_node] = "pv_wait_node", |
| 83 | [qstat_lock_pending] = "lock_pending", | ||
| 84 | [qstat_lock_slowpath] = "lock_slowpath", | ||
| 82 | [qstat_reset_cnts] = "reset_counters", | 85 | [qstat_reset_cnts] = "reset_counters", |
| 83 | }; | 86 | }; |
| 84 | 87 | ||
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index a90336779375..3064c50e181e 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
| @@ -347,6 +347,15 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) | |||
| 347 | } | 347 | } |
| 348 | } | 348 | } |
| 349 | 349 | ||
| 350 | static inline bool owner_on_cpu(struct task_struct *owner) | ||
| 351 | { | ||
| 352 | /* | ||
| 353 | * As lock holder preemption issue, we both skip spinning if | ||
| 354 | * task is not on cpu or its cpu is preempted | ||
| 355 | */ | ||
| 356 | return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); | ||
| 357 | } | ||
| 358 | |||
| 350 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) | 359 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) |
| 351 | { | 360 | { |
| 352 | struct task_struct *owner; | 361 | struct task_struct *owner; |
| @@ -359,17 +368,10 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) | |||
| 359 | 368 | ||
| 360 | rcu_read_lock(); | 369 | rcu_read_lock(); |
| 361 | owner = READ_ONCE(sem->owner); | 370 | owner = READ_ONCE(sem->owner); |
| 362 | if (!owner || !is_rwsem_owner_spinnable(owner)) { | 371 | if (owner) { |
| 363 | ret = !owner; /* !owner is spinnable */ | 372 | ret = is_rwsem_owner_spinnable(owner) && |
| 364 | goto done; | 373 | owner_on_cpu(owner); |
| 365 | } | 374 | } |
| 366 | |||
| 367 | /* | ||
| 368 | * As lock holder preemption issue, we both skip spinning if task is not | ||
| 369 | * on cpu or its cpu is preempted | ||
| 370 | */ | ||
| 371 | ret = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); | ||
| 372 | done: | ||
| 373 | rcu_read_unlock(); | 375 | rcu_read_unlock(); |
| 374 | return ret; | 376 | return ret; |
| 375 | } | 377 | } |
| @@ -398,8 +400,7 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem) | |||
| 398 | * abort spinning when need_resched or owner is not running or | 400 | * abort spinning when need_resched or owner is not running or |
| 399 | * owner's cpu is preempted. | 401 | * owner's cpu is preempted. |
| 400 | */ | 402 | */ |
| 401 | if (!owner->on_cpu || need_resched() || | 403 | if (need_resched() || !owner_on_cpu(owner)) { |
| 402 | vcpu_is_preempted(task_cpu(owner))) { | ||
| 403 | rcu_read_unlock(); | 404 | rcu_read_unlock(); |
| 404 | return false; | 405 | return false; |
| 405 | } | 406 | } |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 64c0291b579c..f89014a2c238 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -37,7 +37,7 @@ struct cpu_stop_done { | |||
| 37 | struct cpu_stopper { | 37 | struct cpu_stopper { |
| 38 | struct task_struct *thread; | 38 | struct task_struct *thread; |
| 39 | 39 | ||
| 40 | spinlock_t lock; | 40 | raw_spinlock_t lock; |
| 41 | bool enabled; /* is this stopper enabled? */ | 41 | bool enabled; /* is this stopper enabled? */ |
| 42 | struct list_head works; /* list of pending works */ | 42 | struct list_head works; /* list of pending works */ |
| 43 | 43 | ||
| @@ -81,13 +81,13 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) | |||
| 81 | unsigned long flags; | 81 | unsigned long flags; |
| 82 | bool enabled; | 82 | bool enabled; |
| 83 | 83 | ||
| 84 | spin_lock_irqsave(&stopper->lock, flags); | 84 | raw_spin_lock_irqsave(&stopper->lock, flags); |
| 85 | enabled = stopper->enabled; | 85 | enabled = stopper->enabled; |
| 86 | if (enabled) | 86 | if (enabled) |
| 87 | __cpu_stop_queue_work(stopper, work, &wakeq); | 87 | __cpu_stop_queue_work(stopper, work, &wakeq); |
| 88 | else if (work->done) | 88 | else if (work->done) |
| 89 | cpu_stop_signal_done(work->done); | 89 | cpu_stop_signal_done(work->done); |
| 90 | spin_unlock_irqrestore(&stopper->lock, flags); | 90 | raw_spin_unlock_irqrestore(&stopper->lock, flags); |
| 91 | 91 | ||
| 92 | wake_up_q(&wakeq); | 92 | wake_up_q(&wakeq); |
| 93 | 93 | ||
| @@ -237,8 +237,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, | |||
| 237 | DEFINE_WAKE_Q(wakeq); | 237 | DEFINE_WAKE_Q(wakeq); |
| 238 | int err; | 238 | int err; |
| 239 | retry: | 239 | retry: |
| 240 | spin_lock_irq(&stopper1->lock); | 240 | raw_spin_lock_irq(&stopper1->lock); |
| 241 | spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); | 241 | raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); |
| 242 | 242 | ||
| 243 | err = -ENOENT; | 243 | err = -ENOENT; |
| 244 | if (!stopper1->enabled || !stopper2->enabled) | 244 | if (!stopper1->enabled || !stopper2->enabled) |
| @@ -261,8 +261,8 @@ retry: | |||
| 261 | __cpu_stop_queue_work(stopper1, work1, &wakeq); | 261 | __cpu_stop_queue_work(stopper1, work1, &wakeq); |
| 262 | __cpu_stop_queue_work(stopper2, work2, &wakeq); | 262 | __cpu_stop_queue_work(stopper2, work2, &wakeq); |
| 263 | unlock: | 263 | unlock: |
| 264 | spin_unlock(&stopper2->lock); | 264 | raw_spin_unlock(&stopper2->lock); |
| 265 | spin_unlock_irq(&stopper1->lock); | 265 | raw_spin_unlock_irq(&stopper1->lock); |
| 266 | 266 | ||
| 267 | if (unlikely(err == -EDEADLK)) { | 267 | if (unlikely(err == -EDEADLK)) { |
| 268 | while (stop_cpus_in_progress) | 268 | while (stop_cpus_in_progress) |
| @@ -457,9 +457,9 @@ static int cpu_stop_should_run(unsigned int cpu) | |||
| 457 | unsigned long flags; | 457 | unsigned long flags; |
| 458 | int run; | 458 | int run; |
| 459 | 459 | ||
| 460 | spin_lock_irqsave(&stopper->lock, flags); | 460 | raw_spin_lock_irqsave(&stopper->lock, flags); |
| 461 | run = !list_empty(&stopper->works); | 461 | run = !list_empty(&stopper->works); |
| 462 | spin_unlock_irqrestore(&stopper->lock, flags); | 462 | raw_spin_unlock_irqrestore(&stopper->lock, flags); |
| 463 | return run; | 463 | return run; |
| 464 | } | 464 | } |
| 465 | 465 | ||
| @@ -470,13 +470,13 @@ static void cpu_stopper_thread(unsigned int cpu) | |||
| 470 | 470 | ||
| 471 | repeat: | 471 | repeat: |
| 472 | work = NULL; | 472 | work = NULL; |
| 473 | spin_lock_irq(&stopper->lock); | 473 | raw_spin_lock_irq(&stopper->lock); |
| 474 | if (!list_empty(&stopper->works)) { | 474 | if (!list_empty(&stopper->works)) { |
| 475 | work = list_first_entry(&stopper->works, | 475 | work = list_first_entry(&stopper->works, |
| 476 | struct cpu_stop_work, list); | 476 | struct cpu_stop_work, list); |
| 477 | list_del_init(&work->list); | 477 | list_del_init(&work->list); |
| 478 | } | 478 | } |
| 479 | spin_unlock_irq(&stopper->lock); | 479 | raw_spin_unlock_irq(&stopper->lock); |
| 480 | 480 | ||
| 481 | if (work) { | 481 | if (work) { |
| 482 | cpu_stop_fn_t fn = work->fn; | 482 | cpu_stop_fn_t fn = work->fn; |
| @@ -550,7 +550,7 @@ static int __init cpu_stop_init(void) | |||
| 550 | for_each_possible_cpu(cpu) { | 550 | for_each_possible_cpu(cpu) { |
| 551 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | 551 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); |
| 552 | 552 | ||
| 553 | spin_lock_init(&stopper->lock); | 553 | raw_spin_lock_init(&stopper->lock); |
| 554 | INIT_LIST_HEAD(&stopper->works); | 554 | INIT_LIST_HEAD(&stopper->works); |
| 555 | } | 555 | } |
| 556 | 556 | ||
diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt index 956b1ae4aafb..33ba98d72b16 100644 --- a/tools/memory-model/Documentation/cheatsheet.txt +++ b/tools/memory-model/Documentation/cheatsheet.txt | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | Prior Operation Subsequent Operation | 1 | Prior Operation Subsequent Operation |
| 2 | --------------- --------------------------- | 2 | --------------- --------------------------- |
| 3 | C Self R W RWM Self R W DR DW RMW SV | 3 | C Self R W RMW Self R W DR DW RMW SV |
| 4 | -- ---- - - --- ---- - - -- -- --- -- | 4 | -- ---- - - --- ---- - - -- -- --- -- |
| 5 | 5 | ||
| 6 | Store, e.g., WRITE_ONCE() Y Y | 6 | Store, e.g., WRITE_ONCE() Y Y |
| @@ -14,7 +14,7 @@ smp_wmb() Y W Y Y W | |||
| 14 | smp_mb() & synchronize_rcu() CP Y Y Y Y Y Y Y Y | 14 | smp_mb() & synchronize_rcu() CP Y Y Y Y Y Y Y Y |
| 15 | Successful full non-void RMW CP Y Y Y Y Y Y Y Y Y Y Y | 15 | Successful full non-void RMW CP Y Y Y Y Y Y Y Y Y Y Y |
| 16 | smp_mb__before_atomic() CP Y Y Y a a a a Y | 16 | smp_mb__before_atomic() CP Y Y Y a a a a Y |
| 17 | smp_mb__after_atomic() CP a a Y Y Y Y Y | 17 | smp_mb__after_atomic() CP a a Y Y Y Y Y Y |
| 18 | 18 | ||
| 19 | 19 | ||
| 20 | Key: C: Ordering is cumulative | 20 | Key: C: Ordering is cumulative |
| @@ -26,4 +26,5 @@ Key: C: Ordering is cumulative | |||
| 26 | DR: Dependent read (address dependency) | 26 | DR: Dependent read (address dependency) |
| 27 | DW: Dependent write (address, data, or control dependency) | 27 | DW: Dependent write (address, data, or control dependency) |
| 28 | RMW: Atomic read-modify-write operation | 28 | RMW: Atomic read-modify-write operation |
| 29 | SV Same-variable access | 29 | SELF: Orders self, as opposed to accesses before and/or after |
| 30 | SV: Orders later accesses to the same variable | ||
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt index a727c82bd434..1b09f3175a1f 100644 --- a/tools/memory-model/Documentation/explanation.txt +++ b/tools/memory-model/Documentation/explanation.txt | |||
| @@ -27,7 +27,7 @@ Explanation of the Linux-Kernel Memory Consistency Model | |||
| 27 | 19. AND THEN THERE WAS ALPHA | 27 | 19. AND THEN THERE WAS ALPHA |
| 28 | 20. THE HAPPENS-BEFORE RELATION: hb | 28 | 20. THE HAPPENS-BEFORE RELATION: hb |
| 29 | 21. THE PROPAGATES-BEFORE RELATION: pb | 29 | 21. THE PROPAGATES-BEFORE RELATION: pb |
| 30 | 22. RCU RELATIONS: link, gp-link, rscs-link, and rcu-path | 30 | 22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb |
| 31 | 23. ODDS AND ENDS | 31 | 23. ODDS AND ENDS |
| 32 | 32 | ||
| 33 | 33 | ||
| @@ -1451,8 +1451,8 @@ they execute means that it cannot have cycles. This requirement is | |||
| 1451 | the content of the LKMM's "propagation" axiom. | 1451 | the content of the LKMM's "propagation" axiom. |
| 1452 | 1452 | ||
| 1453 | 1453 | ||
| 1454 | RCU RELATIONS: link, gp-link, rscs-link, and rcu-path | 1454 | RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb |
| 1455 | ----------------------------------------------------- | 1455 | ---------------------------------------------------- |
| 1456 | 1456 | ||
| 1457 | RCU (Read-Copy-Update) is a powerful synchronization mechanism. It | 1457 | RCU (Read-Copy-Update) is a powerful synchronization mechanism. It |
| 1458 | rests on two concepts: grace periods and read-side critical sections. | 1458 | rests on two concepts: grace periods and read-side critical sections. |
| @@ -1509,8 +1509,8 @@ y, which occurs before the end of the critical section, did not | |||
| 1509 | propagate to P1 before the end of the grace period, violating the | 1509 | propagate to P1 before the end of the grace period, violating the |
| 1510 | Guarantee. | 1510 | Guarantee. |
| 1511 | 1511 | ||
| 1512 | In the kernel's implementations of RCU, the business about stores | 1512 | In the kernel's implementations of RCU, the requirements for stores |
| 1513 | propagating to every CPU is realized by placing strong fences at | 1513 | to propagate to every CPU are fulfilled by placing strong fences at |
| 1514 | suitable places in the RCU-related code. Thus, if a critical section | 1514 | suitable places in the RCU-related code. Thus, if a critical section |
| 1515 | starts before a grace period does then the critical section's CPU will | 1515 | starts before a grace period does then the critical section's CPU will |
| 1516 | execute an smp_mb() fence after the end of the critical section and | 1516 | execute an smp_mb() fence after the end of the critical section and |
| @@ -1523,72 +1523,124 @@ executes. | |||
| 1523 | What exactly do we mean by saying that a critical section "starts | 1523 | What exactly do we mean by saying that a critical section "starts |
| 1524 | before" or "ends after" a grace period? Some aspects of the meaning | 1524 | before" or "ends after" a grace period? Some aspects of the meaning |
| 1525 | are pretty obvious, as in the example above, but the details aren't | 1525 | are pretty obvious, as in the example above, but the details aren't |
| 1526 | entirely clear. The LKMM formalizes this notion by means of a | 1526 | entirely clear. The LKMM formalizes this notion by means of the |
| 1527 | relation with the unfortunately generic name "link". It is a very | 1527 | rcu-link relation. rcu-link encompasses a very general notion of |
| 1528 | general relation; among other things, X ->link Z includes cases where | 1528 | "before": Among other things, X ->rcu-link Z includes cases where X |
| 1529 | X happens-before or is equal to some event Y which is equal to or | 1529 | happens-before or is equal to some event Y which is equal to or comes |
| 1530 | comes before Z in the coherence order. Taking Y = Z, this says that | 1530 | before Z in the coherence order. When Y = Z this says that X ->rfe Z |
| 1531 | X ->rfe Z implies X ->link Z, and taking Y = X, it says that X ->fr Z | 1531 | implies X ->rcu-link Z. In addition, when Y = X it says that X ->fr Z |
| 1532 | and X ->co Z each imply X ->link Z. | 1532 | and X ->co Z each imply X ->rcu-link Z. |
| 1533 | 1533 | ||
| 1534 | The formal definition of the link relation is more than a little | 1534 | The formal definition of the rcu-link relation is more than a little |
| 1535 | obscure, and we won't give it here. It is closely related to the pb | 1535 | obscure, and we won't give it here. It is closely related to the pb |
| 1536 | relation, and the details don't matter unless you want to comb through | 1536 | relation, and the details don't matter unless you want to comb through |
| 1537 | a somewhat lengthy formal proof. Pretty much all you need to know | 1537 | a somewhat lengthy formal proof. Pretty much all you need to know |
| 1538 | about link is the information in the preceding paragraph. | 1538 | about rcu-link is the information in the preceding paragraph. |
| 1539 | 1539 | ||
| 1540 | The LKMM goes on to define the gp-link and rscs-link relations. They | 1540 | The LKMM also defines the gp and rscs relations. They bring grace |
| 1541 | bring grace periods and read-side critical sections into the picture, | 1541 | periods and read-side critical sections into the picture, in the |
| 1542 | in the following way: | 1542 | following way: |
| 1543 | 1543 | ||
| 1544 | E ->gp-link F means there is a synchronize_rcu() fence event S | 1544 | E ->gp F means there is a synchronize_rcu() fence event S such |
| 1545 | and an event X such that E ->po S, either S ->po X or S = X, | 1545 | that E ->po S and either S ->po F or S = F. In simple terms, |
| 1546 | and X ->link F. In other words, E and F are connected by a | 1546 | there is a grace period po-between E and F. |
| 1547 | grace period followed by an instance of link. | 1547 | |
| 1548 | 1548 | E ->rscs F means there is a critical section delimited by an | |
| 1549 | E ->rscs-link F means there is a critical section delimited by | 1549 | rcu_read_lock() fence L and an rcu_read_unlock() fence U, such |
| 1550 | an rcu_read_lock() fence L and an rcu_read_unlock() fence U, | 1550 | that E ->po U and either L ->po F or L = F. You can think of |
| 1551 | and an event X such that E ->po U, either L ->po X or L = X, | 1551 | this as saying that E and F are in the same critical section |
| 1552 | and X ->link F. Roughly speaking, this says that some event | 1552 | (in fact, it also allows E to be po-before the start of the |
| 1553 | in the same critical section as E is connected by link to F. | 1553 | critical section and F to be po-after the end). |
| 1554 | 1554 | ||
| 1555 | If we think of the link relation as standing for an extended "before", | 1555 | If we think of the rcu-link relation as standing for an extended |
| 1556 | then E ->gp-link F says that E executes before a grace period which | 1556 | "before", then X ->gp Y ->rcu-link Z says that X executes before a |
| 1557 | ends before F executes. (In fact it says more than this, because it | 1557 | grace period which ends before Z executes. (In fact it covers more |
| 1558 | includes cases where E executes before a grace period and some store | 1558 | than this, because it also includes cases where X executes before a |
| 1559 | propagates to F's CPU before F executes and doesn't propagate to some | 1559 | grace period and some store propagates to Z's CPU before Z executes |
| 1560 | other CPU until after the grace period ends.) Similarly, | 1560 | but doesn't propagate to some other CPU until after the grace period |
| 1561 | E ->rscs-link F says that E is part of (or before the start of) a | 1561 | ends.) Similarly, X ->rscs Y ->rcu-link Z says that X is part of (or |
| 1562 | critical section which starts before F executes. | 1562 | before the start of) a critical section which starts before Z |
| 1563 | executes. | ||
| 1564 | |||
| 1565 | The LKMM goes on to define the rcu-fence relation as a sequence of gp | ||
| 1566 | and rscs links separated by rcu-link links, in which the number of gp | ||
| 1567 | links is >= the number of rscs links. For example: | ||
| 1568 | |||
| 1569 | X ->gp Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V | ||
| 1570 | |||
| 1571 | would imply that X ->rcu-fence V, because this sequence contains two | ||
| 1572 | gp links and only one rscs link. (It also implies that X ->rcu-fence T | ||
| 1573 | and Z ->rcu-fence V.) On the other hand: | ||
| 1574 | |||
| 1575 | X ->rscs Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V | ||
| 1576 | |||
| 1577 | does not imply X ->rcu-fence V, because the sequence contains only | ||
| 1578 | one gp link but two rscs links. | ||
| 1579 | |||
| 1580 | The rcu-fence relation is important because the Grace Period Guarantee | ||
| 1581 | means that rcu-fence acts kind of like a strong fence. In particular, | ||
| 1582 | if W is a write and we have W ->rcu-fence Z, the Guarantee says that W | ||
| 1583 | will propagate to every CPU before Z executes. | ||
| 1584 | |||
| 1585 | To prove this in full generality requires some intellectual effort. | ||
| 1586 | We'll consider just a very simple case: | ||
| 1587 | |||
| 1588 | W ->gp X ->rcu-link Y ->rscs Z. | ||
| 1589 | |||
| 1590 | This formula means that there is a grace period G and a critical | ||
| 1591 | section C such that: | ||
| 1592 | |||
| 1593 | 1. W is po-before G; | ||
| 1594 | |||
| 1595 | 2. X is equal to or po-after G; | ||
| 1596 | |||
| 1597 | 3. X comes "before" Y in some sense; | ||
| 1598 | |||
| 1599 | 4. Y is po-before the end of C; | ||
| 1600 | |||
| 1601 | 5. Z is equal to or po-after the start of C. | ||
| 1602 | |||
| 1603 | From 2 - 4 we deduce that the grace period G ends before the critical | ||
| 1604 | section C. Then the second part of the Grace Period Guarantee says | ||
| 1605 | not only that G starts before C does, but also that W (which executes | ||
| 1606 | on G's CPU before G starts) must propagate to every CPU before C | ||
| 1607 | starts. In particular, W propagates to every CPU before Z executes | ||
| 1608 | (or finishes executing, in the case where Z is equal to the | ||
| 1609 | rcu_read_lock() fence event which starts C.) This sort of reasoning | ||
| 1610 | can be expanded to handle all the situations covered by rcu-fence. | ||
| 1611 | |||
| 1612 | Finally, the LKMM defines the RCU-before (rb) relation in terms of | ||
| 1613 | rcu-fence. This is done in essentially the same way as the pb | ||
| 1614 | relation was defined in terms of strong-fence. We will omit the | ||
| 1615 | details; the end result is that E ->rb F implies E must execute before | ||
| 1616 | F, just as E ->pb F does (and for much the same reasons). | ||
| 1563 | 1617 | ||
| 1564 | Putting this all together, the LKMM expresses the Grace Period | 1618 | Putting this all together, the LKMM expresses the Grace Period |
| 1565 | Guarantee by requiring that there are no cycles consisting of gp-link | 1619 | Guarantee by requiring that the rb relation does not contain a cycle. |
| 1566 | and rscs-link connections in which the number of gp-link instances is | 1620 | Equivalently, this "rcu" axiom requires that there are no events E and |
| 1567 | >= the number of rscs-link instances. It does this by defining the | 1621 | F with E ->rcu-link F ->rcu-fence E. Or to put it a third way, the |
| 1568 | rcu-path relation to link events E and F whenever it is possible to | 1622 | axiom requires that there are no cycles consisting of gp and rscs |
| 1569 | pass from E to F by a sequence of gp-link and rscs-link connections | 1623 | alternating with rcu-link, where the number of gp links is >= the |
| 1570 | with at least as many of the former as the latter. The LKMM's "rcu" | 1624 | number of rscs links. |
| 1571 | axiom then says that there are no events E such that E ->rcu-path E. | 1625 | |
| 1572 | 1626 | Justifying the axiom isn't easy, but it is in fact a valid | |
| 1573 | Justifying this axiom takes some intellectual effort, but it is in | 1627 | formalization of the Grace Period Guarantee. We won't attempt to go |
| 1574 | fact a valid formalization of the Grace Period Guarantee. We won't | 1628 | through the detailed argument, but the following analysis gives a |
| 1575 | attempt to go through the detailed argument, but the following | 1629 | taste of what is involved. Suppose we have a violation of the first |
| 1576 | analysis gives a taste of what is involved. Suppose we have a | 1630 | part of the Guarantee: A critical section starts before a grace |
| 1577 | violation of the first part of the Guarantee: A critical section | 1631 | period, and some store propagates to the critical section's CPU before |
| 1578 | starts before a grace period, and some store propagates to the | 1632 | the end of the critical section but doesn't propagate to some other |
| 1579 | critical section's CPU before the end of the critical section but | 1633 | CPU until after the end of the grace period. |
| 1580 | doesn't propagate to some other CPU until after the end of the grace | ||
| 1581 | period. | ||
| 1582 | 1634 | ||
| 1583 | Putting symbols to these ideas, let L and U be the rcu_read_lock() and | 1635 | Putting symbols to these ideas, let L and U be the rcu_read_lock() and |
| 1584 | rcu_read_unlock() fence events delimiting the critical section in | 1636 | rcu_read_unlock() fence events delimiting the critical section in |
| 1585 | question, and let S be the synchronize_rcu() fence event for the grace | 1637 | question, and let S be the synchronize_rcu() fence event for the grace |
| 1586 | period. Saying that the critical section starts before S means there | 1638 | period. Saying that the critical section starts before S means there |
| 1587 | are events E and F where E is po-after L (which marks the start of the | 1639 | are events E and F where E is po-after L (which marks the start of the |
| 1588 | critical section), E is "before" F in the sense of the link relation, | 1640 | critical section), E is "before" F in the sense of the rcu-link |
| 1589 | and F is po-before the grace period S: | 1641 | relation, and F is po-before the grace period S: |
| 1590 | 1642 | ||
| 1591 | L ->po E ->link F ->po S. | 1643 | L ->po E ->rcu-link F ->po S. |
| 1592 | 1644 | ||
| 1593 | Let W be the store mentioned above, let Z come before the end of the | 1645 | Let W be the store mentioned above, let Z come before the end of the |
| 1594 | critical section and witness that W propagates to the critical | 1646 | critical section and witness that W propagates to the critical |
| @@ -1600,16 +1652,19 @@ some event X which is po-after S. Symbolically, this amounts to: | |||
| 1600 | 1652 | ||
| 1601 | The fr link from Y to W indicates that W has not propagated to Y's CPU | 1653 | The fr link from Y to W indicates that W has not propagated to Y's CPU |
| 1602 | at the time that Y executes. From this, it can be shown (see the | 1654 | at the time that Y executes. From this, it can be shown (see the |
| 1603 | discussion of the link relation earlier) that X and Z are connected by | 1655 | discussion of the rcu-link relation earlier) that X and Z are related |
| 1604 | link, yielding: | 1656 | by rcu-link, yielding: |
| 1657 | |||
| 1658 | S ->po X ->rcu-link Z ->po U. | ||
| 1659 | |||
| 1660 | The formulas say that S is po-between F and X, hence F ->gp X. They | ||
| 1661 | also say that Z comes before the end of the critical section and E | ||
| 1662 | comes after its start, hence Z ->rscs E. From all this we obtain: | ||
| 1605 | 1663 | ||
| 1606 | S ->po X ->link Z ->po U. | 1664 | F ->gp X ->rcu-link Z ->rscs E ->rcu-link F, |
| 1607 | 1665 | ||
| 1608 | These formulas say that S is po-between F and X, hence F ->gp-link Z | 1666 | a forbidden cycle. Thus the "rcu" axiom rules out this violation of |
| 1609 | via X. They also say that Z comes before the end of the critical | 1667 | the Grace Period Guarantee. |
| 1610 | section and E comes after its start, hence Z ->rscs-link F via E. But | ||
| 1611 | now we have a forbidden cycle: F ->gp-link Z ->rscs-link F. Thus the | ||
| 1612 | "rcu" axiom rules out this violation of the Grace Period Guarantee. | ||
| 1613 | 1668 | ||
| 1614 | For something a little more down-to-earth, let's see how the axiom | 1669 | For something a little more down-to-earth, let's see how the axiom |
| 1615 | works out in practice. Consider the RCU code example from above, this | 1670 | works out in practice. Consider the RCU code example from above, this |
| @@ -1635,18 +1690,18 @@ time with statement labels added to the memory access instructions: | |||
| 1635 | } | 1690 | } |
| 1636 | 1691 | ||
| 1637 | 1692 | ||
| 1638 | If r2 = 0 at the end then P0's store at X overwrites the value | 1693 | If r2 = 0 at the end then P0's store at X overwrites the value that |
| 1639 | that P1's load at Z reads from, so we have Z ->fre X and thus | 1694 | P1's load at Z reads from, so we have Z ->fre X and thus Z ->rcu-link X. |
| 1640 | Z ->link X. In addition, there is a synchronize_rcu() between Y and | 1695 | In addition, there is a synchronize_rcu() between Y and Z, so therefore |
| 1641 | Z, so therefore we have Y ->gp-link X. | 1696 | we have Y ->gp Z. |
| 1642 | 1697 | ||
| 1643 | If r1 = 1 at the end then P1's load at Y reads from P0's store at W, | 1698 | If r1 = 1 at the end then P1's load at Y reads from P0's store at W, |
| 1644 | so we have W ->link Y. In addition, W and X are in the same critical | 1699 | so we have W ->rcu-link Y. In addition, W and X are in the same critical |
| 1645 | section, so therefore we have X ->rscs-link Y. | 1700 | section, so therefore we have X ->rscs W. |
| 1646 | 1701 | ||
| 1647 | This gives us a cycle, Y ->gp-link X ->rscs-link Y, with one gp-link | 1702 | Then X ->rscs W ->rcu-link Y ->gp Z ->rcu-link X is a forbidden cycle, |
| 1648 | and one rscs-link, violating the "rcu" axiom. Hence the outcome is | 1703 | violating the "rcu" axiom. Hence the outcome is not allowed by the |
| 1649 | not allowed by the LKMM, as we would expect. | 1704 | LKMM, as we would expect. |
| 1650 | 1705 | ||
| 1651 | For contrast, let's see what can happen in a more complicated example: | 1706 | For contrast, let's see what can happen in a more complicated example: |
| 1652 | 1707 | ||
| @@ -1682,15 +1737,11 @@ For contrast, let's see what can happen in a more complicated example: | |||
| 1682 | } | 1737 | } |
| 1683 | 1738 | ||
| 1684 | If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows | 1739 | If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows |
| 1685 | that W ->rscs-link Y via X, Y ->gp-link U via Z, and U ->rscs-link W | 1740 | that W ->rscs X ->rcu-link Y ->gp Z ->rcu-link U ->rscs V ->rcu-link W. |
| 1686 | via V. And just as before, this gives a cycle: | 1741 | However this cycle is not forbidden, because the sequence of relations |
| 1687 | 1742 | contains fewer instances of gp (one) than of rscs (two). Consequently | |
| 1688 | W ->rscs-link Y ->gp-link U ->rscs-link W. | 1743 | the outcome is allowed by the LKMM. The following instruction timing |
| 1689 | 1744 | diagram shows how it might actually occur: | |
| 1690 | However, this cycle has fewer gp-link instances than rscs-link | ||
| 1691 | instances, and consequently the outcome is not forbidden by the LKMM. | ||
| 1692 | The following instruction timing diagram shows how it might actually | ||
| 1693 | occur: | ||
| 1694 | 1745 | ||
| 1695 | P0 P1 P2 | 1746 | P0 P1 P2 |
| 1696 | -------------------- -------------------- -------------------- | 1747 | -------------------- -------------------- -------------------- |
diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt index ba2e34c2ec3f..b177f3e4a614 100644 --- a/tools/memory-model/Documentation/references.txt +++ b/tools/memory-model/Documentation/references.txt | |||
| @@ -63,15 +63,22 @@ o Shaked Flur, Susmit Sarkar, Christopher Pulte, Kyndylan Nienhuis, | |||
| 63 | Principles of Programming Languages (POPL 2017). ACM, New York, | 63 | Principles of Programming Languages (POPL 2017). ACM, New York, |
| 64 | NY, USA, 429–442. | 64 | NY, USA, 429–442. |
| 65 | 65 | ||
| 66 | o Christopher Pulte, Shaked Flur, Will Deacon, Jon French, | ||
| 67 | Susmit Sarkar, and Peter Sewell. 2018. "Simplifying ARM concurrency: | ||
| 68 | multicopy-atomic axiomatic and operational models for ARMv8". In | ||
| 69 | Proceedings of the ACM on Programming Languages, Volume 2, Issue | ||
| 70 | POPL, Article No. 19. ACM, New York, NY, USA. | ||
| 71 | |||
| 66 | 72 | ||
| 67 | Linux-kernel memory model | 73 | Linux-kernel memory model |
| 68 | ========================= | 74 | ========================= |
| 69 | 75 | ||
| 70 | o Andrea Parri, Alan Stern, Luc Maranget, Paul E. McKenney, | 76 | o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and |
| 71 | and Jade Alglave. 2017. "A formal model of | 77 | Alan Stern. 2018. "Frightening small children and disconcerting |
| 72 | Linux-kernel memory ordering - companion webpage". | 78 | grown-ups: Concurrency in the Linux kernel". In Proceedings of |
| 73 | http://moscova.inria.fr/∼maranget/cats7/linux/. (2017). [Online; | 79 | the 23rd International Conference on Architectural Support for |
| 74 | accessed 30-January-2017]. | 80 | Programming Languages and Operating Systems (ASPLOS 2018). ACM, |
| 81 | New York, NY, USA, 405-418. Webpage: http://diy.inria.fr/linux/. | ||
| 75 | 82 | ||
| 76 | o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and | 83 | o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and |
| 77 | Alan Stern. 2017. "A formal kernel memory-ordering model (part 1)" | 84 | Alan Stern. 2017. "A formal kernel memory-ordering model (part 1)" |
diff --git a/tools/memory-model/README b/tools/memory-model/README index 0b3a5f3c9ccd..734f7feaa5dc 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README | |||
| @@ -20,7 +20,7 @@ that litmus test to be exercised within the Linux kernel. | |||
| 20 | REQUIREMENTS | 20 | REQUIREMENTS |
| 21 | ============ | 21 | ============ |
| 22 | 22 | ||
| 23 | Version 7.48 of the "herd7" and "klitmus7" tools must be downloaded | 23 | Version 7.49 of the "herd7" and "klitmus7" tools must be downloaded |
| 24 | separately: | 24 | separately: |
| 25 | 25 | ||
| 26 | https://github.com/herd/herdtools7 | 26 | https://github.com/herd/herdtools7 |
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell index 432c7cf71b23..64f5740e0e75 100644 --- a/tools/memory-model/linux-kernel.bell +++ b/tools/memory-model/linux-kernel.bell | |||
| @@ -5,10 +5,10 @@ | |||
| 5 | * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>, | 5 | * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>, |
| 6 | * Andrea Parri <parri.andrea@gmail.com> | 6 | * Andrea Parri <parri.andrea@gmail.com> |
| 7 | * | 7 | * |
| 8 | * An earlier version of this file appears in the companion webpage for | 8 | * An earlier version of this file appeared in the companion webpage for |
| 9 | * "Frightening small children and disconcerting grown-ups: Concurrency | 9 | * "Frightening small children and disconcerting grown-ups: Concurrency |
| 10 | * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern, | 10 | * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern, |
| 11 | * which is to appear in ASPLOS 2018. | 11 | * which appeared in ASPLOS 2018. |
| 12 | *) | 12 | *) |
| 13 | 13 | ||
| 14 | "Linux-kernel memory consistency model" | 14 | "Linux-kernel memory consistency model" |
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index df97db03b6c2..59b5cbe6b624 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat | |||
| @@ -5,10 +5,10 @@ | |||
| 5 | * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>, | 5 | * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>, |
| 6 | * Andrea Parri <parri.andrea@gmail.com> | 6 | * Andrea Parri <parri.andrea@gmail.com> |
| 7 | * | 7 | * |
| 8 | * An earlier version of this file appears in the companion webpage for | 8 | * An earlier version of this file appeared in the companion webpage for |
| 9 | * "Frightening small children and disconcerting grown-ups: Concurrency | 9 | * "Frightening small children and disconcerting grown-ups: Concurrency |
| 10 | * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern, | 10 | * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern, |
| 11 | * which is to appear in ASPLOS 2018. | 11 | * which appeared in ASPLOS 2018. |
| 12 | *) | 12 | *) |
| 13 | 13 | ||
| 14 | "Linux-kernel memory consistency model" | 14 | "Linux-kernel memory consistency model" |
| @@ -100,22 +100,29 @@ let rscs = po ; crit^-1 ; po? | |||
| 100 | * one but two non-rf relations, but only in conjunction with an RCU | 100 | * one but two non-rf relations, but only in conjunction with an RCU |
| 101 | * read-side critical section. | 101 | * read-side critical section. |
| 102 | *) | 102 | *) |
| 103 | let link = hb* ; pb* ; prop | 103 | let rcu-link = hb* ; pb* ; prop |
| 104 | 104 | ||
| 105 | (* Chains that affect the RCU grace-period guarantee *) | 105 | (* |
| 106 | let gp-link = gp ; link | 106 | * Any sequence containing at least as many grace periods as RCU read-side |
| 107 | let rscs-link = rscs ; link | 107 | * critical sections (joined by rcu-link) acts as a generalized strong fence. |
| 108 | *) | ||
| 109 | let rec rcu-fence = gp | | ||
| 110 | (gp ; rcu-link ; rscs) | | ||
| 111 | (rscs ; rcu-link ; gp) | | ||
| 112 | (gp ; rcu-link ; rcu-fence ; rcu-link ; rscs) | | ||
| 113 | (rscs ; rcu-link ; rcu-fence ; rcu-link ; gp) | | ||
| 114 | (rcu-fence ; rcu-link ; rcu-fence) | ||
| 115 | |||
| 116 | (* rb orders instructions just as pb does *) | ||
| 117 | let rb = prop ; rcu-fence ; hb* ; pb* | ||
| 118 | |||
| 119 | irreflexive rb as rcu | ||
| 108 | 120 | ||
| 109 | (* | 121 | (* |
| 110 | * A cycle containing at least as many grace periods as RCU read-side | 122 | * The happens-before, propagation, and rcu constraints are all |
| 111 | * critical sections is forbidden. | 123 | * expressions of temporal ordering. They could be replaced by |
| 124 | * a single constraint on an "executes-before" relation, xb: | ||
| 125 | * | ||
| 126 | * let xb = hb | pb | rb | ||
| 127 | * acyclic xb as executes-before | ||
| 112 | *) | 128 | *) |
| 113 | let rec rcu-path = | ||
| 114 | gp-link | | ||
| 115 | (gp-link ; rscs-link) | | ||
| 116 | (rscs-link ; gp-link) | | ||
| 117 | (rcu-path ; rcu-path) | | ||
| 118 | (gp-link ; rcu-path ; rscs-link) | | ||
| 119 | (rscs-link ; rcu-path ; gp-link) | ||
| 120 | |||
| 121 | irreflexive rcu-path as rcu | ||
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def index 397e4e67e8c8..6fa3eb28d40b 100644 --- a/tools/memory-model/linux-kernel.def +++ b/tools/memory-model/linux-kernel.def | |||
| @@ -1,9 +1,9 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0+ | 1 | // SPDX-License-Identifier: GPL-2.0+ |
| 2 | // | 2 | // |
| 3 | // An earlier version of this file appears in the companion webpage for | 3 | // An earlier version of this file appeared in the companion webpage for |
| 4 | // "Frightening small children and disconcerting grown-ups: Concurrency | 4 | // "Frightening small children and disconcerting grown-ups: Concurrency |
| 5 | // in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern, | 5 | // in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern, |
| 6 | // which is to appear in ASPLOS 2018. | 6 | // which appeared in ASPLOS 2018. |
| 7 | 7 | ||
| 8 | // ONCE | 8 | // ONCE |
| 9 | READ_ONCE(X) __load{once}(X) | 9 | READ_ONCE(X) __load{once}(X) |
| @@ -14,14 +14,15 @@ smp_store_release(X,V) { __store{release}(*X,V); } | |||
| 14 | smp_load_acquire(X) __load{acquire}(*X) | 14 | smp_load_acquire(X) __load{acquire}(*X) |
| 15 | rcu_assign_pointer(X,V) { __store{release}(X,V); } | 15 | rcu_assign_pointer(X,V) { __store{release}(X,V); } |
| 16 | rcu_dereference(X) __load{once}(X) | 16 | rcu_dereference(X) __load{once}(X) |
| 17 | smp_store_mb(X,V) { __store{once}(X,V); __fence{mb}; } | ||
| 17 | 18 | ||
| 18 | // Fences | 19 | // Fences |
| 19 | smp_mb() { __fence{mb} ; } | 20 | smp_mb() { __fence{mb}; } |
| 20 | smp_rmb() { __fence{rmb} ; } | 21 | smp_rmb() { __fence{rmb}; } |
| 21 | smp_wmb() { __fence{wmb} ; } | 22 | smp_wmb() { __fence{wmb}; } |
| 22 | smp_mb__before_atomic() { __fence{before-atomic} ; } | 23 | smp_mb__before_atomic() { __fence{before-atomic}; } |
| 23 | smp_mb__after_atomic() { __fence{after-atomic} ; } | 24 | smp_mb__after_atomic() { __fence{after-atomic}; } |
| 24 | smp_mb__after_spinlock() { __fence{after-spinlock} ; } | 25 | smp_mb__after_spinlock() { __fence{after-spinlock}; } |
| 25 | 26 | ||
| 26 | // Exchange | 27 | // Exchange |
| 27 | xchg(X,V) __xchg{mb}(X,V) | 28 | xchg(X,V) __xchg{mb}(X,V) |
| @@ -34,26 +35,27 @@ cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W) | |||
| 34 | cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W) | 35 | cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W) |
| 35 | 36 | ||
| 36 | // Spinlocks | 37 | // Spinlocks |
| 37 | spin_lock(X) { __lock(X) ; } | 38 | spin_lock(X) { __lock(X); } |
| 38 | spin_unlock(X) { __unlock(X) ; } | 39 | spin_unlock(X) { __unlock(X); } |
| 39 | spin_trylock(X) __trylock(X) | 40 | spin_trylock(X) __trylock(X) |
| 41 | spin_is_locked(X) __islocked(X) | ||
| 40 | 42 | ||
| 41 | // RCU | 43 | // RCU |
| 42 | rcu_read_lock() { __fence{rcu-lock}; } | 44 | rcu_read_lock() { __fence{rcu-lock}; } |
| 43 | rcu_read_unlock() { __fence{rcu-unlock};} | 45 | rcu_read_unlock() { __fence{rcu-unlock}; } |
| 44 | synchronize_rcu() { __fence{sync-rcu}; } | 46 | synchronize_rcu() { __fence{sync-rcu}; } |
| 45 | synchronize_rcu_expedited() { __fence{sync-rcu}; } | 47 | synchronize_rcu_expedited() { __fence{sync-rcu}; } |
| 46 | 48 | ||
| 47 | // Atomic | 49 | // Atomic |
| 48 | atomic_read(X) READ_ONCE(*X) | 50 | atomic_read(X) READ_ONCE(*X) |
| 49 | atomic_set(X,V) { WRITE_ONCE(*X,V) ; } | 51 | atomic_set(X,V) { WRITE_ONCE(*X,V); } |
| 50 | atomic_read_acquire(X) smp_load_acquire(X) | 52 | atomic_read_acquire(X) smp_load_acquire(X) |
| 51 | atomic_set_release(X,V) { smp_store_release(X,V); } | 53 | atomic_set_release(X,V) { smp_store_release(X,V); } |
| 52 | 54 | ||
| 53 | atomic_add(V,X) { __atomic_op(X,+,V) ; } | 55 | atomic_add(V,X) { __atomic_op(X,+,V); } |
| 54 | atomic_sub(V,X) { __atomic_op(X,-,V) ; } | 56 | atomic_sub(V,X) { __atomic_op(X,-,V); } |
| 55 | atomic_inc(X) { __atomic_op(X,+,1) ; } | 57 | atomic_inc(X) { __atomic_op(X,+,1); } |
| 56 | atomic_dec(X) { __atomic_op(X,-,1) ; } | 58 | atomic_dec(X) { __atomic_op(X,-,1); } |
| 57 | 59 | ||
| 58 | atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V) | 60 | atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V) |
| 59 | atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V) | 61 | atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V) |
diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore new file mode 100644 index 000000000000..6e2ddc54152f --- /dev/null +++ b/tools/memory-model/litmus-tests/.gitignore | |||
| @@ -0,0 +1 @@ | |||
| *.litmus.out | |||
diff --git a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus index 50d5db9ea983..98a3716efa37 100644 --- a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus +++ b/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus | |||
| @@ -7,7 +7,7 @@ C IRIW+mbonceonces+OnceOnce | |||
| 7 | * between each pairs of reads. In other words, is smp_mb() sufficient to | 7 | * between each pairs of reads. In other words, is smp_mb() sufficient to |
| 8 | * cause two different reading processes to agree on the order of a pair | 8 | * cause two different reading processes to agree on the order of a pair |
| 9 | * of writes, where each write is to a different variable by a different | 9 | * of writes, where each write is to a different variable by a different |
| 10 | * process? | 10 | * process? This litmus test exercises LKMM's "propagation" rule. |
| 11 | *) | 11 | *) |
| 12 | 12 | ||
| 13 | {} | 13 | {} |
diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus new file mode 100644 index 000000000000..50f4d62bbf0e --- /dev/null +++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | C MP+polockmbonce+poacquiresilsil | ||
| 2 | |||
| 3 | (* | ||
| 4 | * Result: Never | ||
| 5 | * | ||
| 6 | * Do spinlocks combined with smp_mb__after_spinlock() provide order | ||
| 7 | * to outside observers using spin_is_locked() to sense the lock-held | ||
| 8 | * state, ordered by acquire? Note that when the first spin_is_locked() | ||
| 9 | * returns false and the second true, we know that the smp_load_acquire() | ||
| 10 | * executed before the lock was acquired (loosely speaking). | ||
| 11 | *) | ||
| 12 | |||
| 13 | { | ||
| 14 | } | ||
| 15 | |||
| 16 | P0(spinlock_t *lo, int *x) | ||
| 17 | { | ||
| 18 | spin_lock(lo); | ||
| 19 | smp_mb__after_spinlock(); | ||
| 20 | WRITE_ONCE(*x, 1); | ||
| 21 | spin_unlock(lo); | ||
| 22 | } | ||
| 23 | |||
| 24 | P1(spinlock_t *lo, int *x) | ||
| 25 | { | ||
| 26 | int r1; | ||
| 27 | int r2; | ||
| 28 | int r3; | ||
| 29 | |||
| 30 | r1 = smp_load_acquire(x); | ||
| 31 | r2 = spin_is_locked(lo); | ||
| 32 | r3 = spin_is_locked(lo); | ||
| 33 | } | ||
| 34 | |||
| 35 | exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1) | ||
diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus new file mode 100644 index 000000000000..abf81e7a0895 --- /dev/null +++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus | |||
| @@ -0,0 +1,34 @@ | |||
| 1 | C MP+polockonce+poacquiresilsil | ||
| 2 | |||
| 3 | (* | ||
| 4 | * Result: Sometimes | ||
| 5 | * | ||
| 6 | * Do spinlocks provide order to outside observers using spin_is_locked() | ||
| 7 | * to sense the lock-held state, ordered by acquire? Note that when the | ||
| 8 | * first spin_is_locked() returns false and the second true, we know that | ||
| 9 | * the smp_load_acquire() executed before the lock was acquired (loosely | ||
| 10 | * speaking). | ||
| 11 | *) | ||
| 12 | |||
| 13 | { | ||
| 14 | } | ||
| 15 | |||
| 16 | P0(spinlock_t *lo, int *x) | ||
| 17 | { | ||
| 18 | spin_lock(lo); | ||
| 19 | WRITE_ONCE(*x, 1); | ||
| 20 | spin_unlock(lo); | ||
| 21 | } | ||
| 22 | |||
| 23 | P1(spinlock_t *lo, int *x) | ||
| 24 | { | ||
| 25 | int r1; | ||
| 26 | int r2; | ||
| 27 | int r3; | ||
| 28 | |||
| 29 | r1 = smp_load_acquire(x); | ||
| 30 | r2 = spin_is_locked(lo); | ||
| 31 | r3 = spin_is_locked(lo); | ||
| 32 | } | ||
| 33 | |||
| 34 | exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1) | ||
diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README index 04096fb8b8d9..17eb9a8c222d 100644 --- a/tools/memory-model/litmus-tests/README +++ b/tools/memory-model/litmus-tests/README | |||
| @@ -23,7 +23,8 @@ IRIW+mbonceonces+OnceOnce.litmus | |||
| 23 | between each pairs of reads. In other words, is smp_mb() | 23 | between each pairs of reads. In other words, is smp_mb() |
| 24 | sufficient to cause two different reading processes to agree on | 24 | sufficient to cause two different reading processes to agree on |
| 25 | the order of a pair of writes, where each write is to a different | 25 | the order of a pair of writes, where each write is to a different |
| 26 | variable by a different process? | 26 | variable by a different process? This litmus test is forbidden |
| 27 | by LKMM's propagation rule. | ||
| 27 | 28 | ||
| 28 | IRIW+poonceonces+OnceOnce.litmus | 29 | IRIW+poonceonces+OnceOnce.litmus |
| 29 | Test of independent reads from independent writes with nothing | 30 | Test of independent reads from independent writes with nothing |
| @@ -63,6 +64,16 @@ LB+poonceonces.litmus | |||
| 63 | MP+onceassign+derefonce.litmus | 64 | MP+onceassign+derefonce.litmus |
| 64 | As below, but with rcu_assign_pointer() and an rcu_dereference(). | 65 | As below, but with rcu_assign_pointer() and an rcu_dereference(). |
| 65 | 66 | ||
| 67 | MP+polockmbonce+poacquiresilsil.litmus | ||
| 68 | Protect the access with a lock and an smp_mb__after_spinlock() | ||
| 69 | in one process, and use an acquire load followed by a pair of | ||
| 70 | spin_is_locked() calls in the other process. | ||
| 71 | |||
| 72 | MP+polockonce+poacquiresilsil.litmus | ||
| 73 | Protect the access with a lock in one process, and use an | ||
| 74 | acquire load followed by a pair of spin_is_locked() calls | ||
| 75 | in the other process. | ||
| 76 | |||
| 66 | MP+polocks.litmus | 77 | MP+polocks.litmus |
| 67 | As below, but with the second access of the writer process | 78 | As below, but with the second access of the writer process |
| 68 | and the first access of reader process protected by a lock. | 79 | and the first access of reader process protected by a lock. |
| @@ -109,8 +120,10 @@ S+wmbonceonce+poacquireonce.litmus | |||
| 109 | 120 | ||
| 110 | WRC+poonceonces+Once.litmus | 121 | WRC+poonceonces+Once.litmus |
| 111 | WRC+pooncerelease+rmbonceonce+Once.litmus | 122 | WRC+pooncerelease+rmbonceonce+Once.litmus |
| 112 | These two are members of an extension of the MP litmus-test class | 123 | These two are members of an extension of the MP litmus-test |
| 113 | in which the first write is moved to a separate process. | 124 | class in which the first write is moved to a separate process. |
| 125 | The second is forbidden because smp_store_release() is | ||
| 126 | A-cumulative in LKMM. | ||
| 114 | 127 | ||
| 115 | Z6.0+pooncelock+pooncelock+pombonce.litmus | 128 | Z6.0+pooncelock+pooncelock+pombonce.litmus |
| 116 | Is the ordering provided by a spin_unlock() and a subsequent | 129 | Is the ordering provided by a spin_unlock() and a subsequent |
diff --git a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus b/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus index 97fcbffde9a0..ad3448b941e6 100644 --- a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus +++ b/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus | |||
| @@ -5,7 +5,9 @@ C WRC+pooncerelease+rmbonceonce+Once | |||
| 5 | * | 5 | * |
| 6 | * This litmus test is an extension of the message-passing pattern, where | 6 | * This litmus test is an extension of the message-passing pattern, where |
| 7 | * the first write is moved to a separate process. Because it features | 7 | * the first write is moved to a separate process. Because it features |
| 8 | * a release and a read memory barrier, it should be forbidden. | 8 | * a release and a read memory barrier, it should be forbidden. More |
| 9 | * specifically, this litmus test is forbidden because smp_store_release() | ||
| 10 | * is A-cumulative in LKMM. | ||
| 9 | *) | 11 | *) |
| 10 | 12 | ||
| 11 | {} | 13 | {} |
diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat index ba4a4ec6d313..305ded17e741 100644 --- a/tools/memory-model/lock.cat +++ b/tools/memory-model/lock.cat | |||
| @@ -4,46 +4,72 @@ | |||
| 4 | * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu> | 4 | * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu> |
| 5 | *) | 5 | *) |
| 6 | 6 | ||
| 7 | (* Generate coherence orders and handle lock operations *) | 7 | (* |
| 8 | * Generate coherence orders and handle lock operations | ||
| 9 | * | ||
| 10 | * Warning: spin_is_locked() crashes herd7 versions strictly before 7.48. | ||
| 11 | * spin_is_locked() is functional from herd7 version 7.49. | ||
| 12 | *) | ||
| 8 | 13 | ||
| 9 | include "cross.cat" | 14 | include "cross.cat" |
| 10 | 15 | ||
| 11 | (* From lock reads to their partner lock writes *) | ||
| 12 | let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po) | ||
| 13 | let rmw = rmw | lk-rmw | ||
| 14 | |||
| 15 | (* | 16 | (* |
| 16 | * A paired LKR must always see an unlocked value; spin_lock() calls nested | 17 | * The lock-related events generated by herd are as follows: |
| 17 | * inside a critical section (for the same lock) always deadlock. | 18 | * |
| 19 | * LKR Lock-Read: the read part of a spin_lock() or successful | ||
| 20 | * spin_trylock() read-modify-write event pair | ||
| 21 | * LKW Lock-Write: the write part of a spin_lock() or successful | ||
| 22 | * spin_trylock() RMW event pair | ||
| 23 | * UL Unlock: a spin_unlock() event | ||
| 24 | * LF Lock-Fail: a failed spin_trylock() event | ||
| 25 | * RL Read-Locked: a spin_is_locked() event which returns True | ||
| 26 | * RU Read-Unlocked: a spin_is_locked() event which returns False | ||
| 27 | * | ||
| 28 | * LKR and LKW events always come paired, like all RMW event sequences. | ||
| 29 | * | ||
| 30 | * LKR, LF, RL, and RU are read events; LKR has Acquire ordering. | ||
| 31 | * LKW and UL are write events; UL has Release ordering. | ||
| 32 | * LKW, LF, RL, and RU have no ordering properties. | ||
| 18 | *) | 33 | *) |
| 19 | empty ([LKW] ; po-loc ; [domain(lk-rmw)]) \ (po-loc ; [UL] ; po-loc) | ||
| 20 | as lock-nest | ||
| 21 | 34 | ||
| 22 | (* The litmus test is invalid if an LKW event is not part of an RMW pair *) | 35 | (* Backward compatibility *) |
| 23 | flag ~empty LKW \ range(lk-rmw) as unpaired-LKW | 36 | let RL = try RL with emptyset |
| 37 | let RU = try RU with emptyset | ||
| 24 | 38 | ||
| 25 | (* This will be allowed if we implement spin_is_locked() *) | 39 | (* Treat RL as a kind of LF: a read with no ordering properties *) |
| 26 | flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR | 40 | let LF = LF | RL |
| 27 | 41 | ||
| 28 | (* There should be no R or W accesses to spinlocks *) | 42 | (* There should be no ordinary R or W accesses to spinlocks *) |
| 29 | let ALL-LOCKS = LKR | LKW | UL | LF | 43 | let ALL-LOCKS = LKR | LKW | UL | LF | RU |
| 30 | flag ~empty [M \ IW] ; loc ; [ALL-LOCKS] as mixed-lock-accesses | 44 | flag ~empty [M \ IW] ; loc ; [ALL-LOCKS] as mixed-lock-accesses |
| 31 | 45 | ||
| 46 | (* Link Lock-Reads to their RMW-partner Lock-Writes *) | ||
| 47 | let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po) | ||
| 48 | let rmw = rmw | lk-rmw | ||
| 49 | |||
| 50 | (* The litmus test is invalid if an LKR/LKW event is not part of an RMW pair *) | ||
| 51 | flag ~empty LKW \ range(lk-rmw) as unpaired-LKW | ||
| 52 | flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR | ||
| 53 | |||
| 54 | (* | ||
| 55 | * An LKR must always see an unlocked value; spin_lock() calls nested | ||
| 56 | * inside a critical section (for the same lock) always deadlock. | ||
| 57 | *) | ||
| 58 | empty ([LKW] ; po-loc ; [LKR]) \ (po-loc ; [UL] ; po-loc) as lock-nest | ||
| 59 | |||
| 32 | (* The final value of a spinlock should not be tested *) | 60 | (* The final value of a spinlock should not be tested *) |
| 33 | flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final | 61 | flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final |
| 34 | 62 | ||
| 35 | |||
| 36 | (* | 63 | (* |
| 37 | * Put lock operations in their appropriate classes, but leave UL out of W | 64 | * Put lock operations in their appropriate classes, but leave UL out of W |
| 38 | * until after the co relation has been generated. | 65 | * until after the co relation has been generated. |
| 39 | *) | 66 | *) |
| 40 | let R = R | LKR | LF | 67 | let R = R | LKR | LF | RU |
| 41 | let W = W | LKW | 68 | let W = W | LKW |
| 42 | 69 | ||
| 43 | let Release = Release | UL | 70 | let Release = Release | UL |
| 44 | let Acquire = Acquire | LKR | 71 | let Acquire = Acquire | LKR |
| 45 | 72 | ||
| 46 | |||
| 47 | (* Match LKW events to their corresponding UL events *) | 73 | (* Match LKW events to their corresponding UL events *) |
| 48 | let critical = ([LKW] ; po-loc ; [UL]) \ (po-loc ; [LKW | UL] ; po-loc) | 74 | let critical = ([LKW] ; po-loc ; [UL]) \ (po-loc ; [LKW | UL] ; po-loc) |
| 49 | 75 | ||
| @@ -53,27 +79,48 @@ flag ~empty UL \ range(critical) as unmatched-unlock | |||
| 53 | let UNMATCHED-LKW = LKW \ domain(critical) | 79 | let UNMATCHED-LKW = LKW \ domain(critical) |
| 54 | empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks | 80 | empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks |
| 55 | 81 | ||
| 56 | |||
| 57 | (* rfi for LF events: link each LKW to the LF events in its critical section *) | 82 | (* rfi for LF events: link each LKW to the LF events in its critical section *) |
| 58 | let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc) | 83 | let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc) |
| 59 | 84 | ||
| 60 | (* rfe for LF events *) | 85 | (* rfe for LF events *) |
| 61 | let all-possible-rfe-lf = | 86 | let all-possible-rfe-lf = |
| 62 | (* | 87 | (* |
| 63 | * Given an LF event r, compute the possible rfe edges for that event | 88 | * Given an LF event r, compute the possible rfe edges for that event |
| 64 | * (all those starting from LKW events in other threads), | 89 | * (all those starting from LKW events in other threads), |
| 65 | * and then convert that relation to a set of single-edge relations. | 90 | * and then convert that relation to a set of single-edge relations. |
| 66 | *) | 91 | *) |
| 67 | let possible-rfe-lf r = | 92 | let possible-rfe-lf r = |
| 68 | let pair-to-relation p = p ++ 0 | 93 | let pair-to-relation p = p ++ 0 |
| 69 | in map pair-to-relation ((LKW * {r}) & loc & ext) | 94 | in map pair-to-relation ((LKW * {r}) & loc & ext) |
| 70 | (* Do this for each LF event r that isn't in rfi-lf *) | 95 | (* Do this for each LF event r that isn't in rfi-lf *) |
| 71 | in map possible-rfe-lf (LF \ range(rfi-lf)) | 96 | in map possible-rfe-lf (LF \ range(rfi-lf)) |
| 72 | 97 | ||
| 73 | (* Generate all rf relations for LF events *) | 98 | (* Generate all rf relations for LF events *) |
| 74 | with rfe-lf from cross(all-possible-rfe-lf) | 99 | with rfe-lf from cross(all-possible-rfe-lf) |
| 75 | let rf = rf | rfi-lf | rfe-lf | 100 | let rf-lf = rfe-lf | rfi-lf |
| 101 | |||
| 102 | (* | ||
| 103 | * RU, i.e., spin_is_locked() returning False, is slightly different. | ||
| 104 | * We rely on the memory model to rule out cases where spin_is_locked() | ||
| 105 | * within one of the lock's critical sections returns False. | ||
| 106 | *) | ||
| 107 | |||
| 108 | (* rfi for RU events: an RU may read from the last po-previous UL *) | ||
| 109 | let rfi-ru = ([UL] ; po-loc ; [RU]) \ ([UL] ; po-loc ; [LKW] ; po-loc) | ||
| 110 | |||
| 111 | (* rfe for RU events: an RU may read from an external UL or the initial write *) | ||
| 112 | let all-possible-rfe-ru = | ||
| 113 | let possible-rfe-ru r = | ||
| 114 | let pair-to-relation p = p ++ 0 | ||
| 115 | in map pair-to-relation (((UL | IW) * {r}) & loc & ext) | ||
| 116 | in map possible-rfe-ru RU | ||
| 117 | |||
| 118 | (* Generate all rf relations for RU events *) | ||
| 119 | with rfe-ru from cross(all-possible-rfe-ru) | ||
| 120 | let rf-ru = rfe-ru | rfi-ru | ||
| 76 | 121 | ||
| 122 | (* Final rf relation *) | ||
| 123 | let rf = rf | rf-lf | rf-ru | ||
| 77 | 124 | ||
| 78 | (* Generate all co relations, including LKW events but not UL *) | 125 | (* Generate all co relations, including LKW events but not UL *) |
| 79 | let co0 = co0 | ([IW] ; loc ; [LKW]) | | 126 | let co0 = co0 | ([IW] ; loc ; [LKW]) | |
diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh new file mode 100644 index 000000000000..af0aa15ab84e --- /dev/null +++ b/tools/memory-model/scripts/checkalllitmus.sh | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | #!/bin/sh | ||
| 2 | # | ||
| 3 | # Run herd tests on all .litmus files in the specified directory (which | ||
| 4 | # defaults to litmus-tests) and check each file's result against a "Result:" | ||
| 5 | # comment within that litmus test. If the verification result does not | ||
| 6 | # match that specified in the litmus test, this script prints an error | ||
| 7 | # message prefixed with "^^^". It also outputs verification results to | ||
| 8 | # a file whose name is that of the specified litmus test, but with ".out" | ||
| 9 | # appended. | ||
| 10 | # | ||
| 11 | # Usage: | ||
| 12 | # sh checkalllitmus.sh [ directory ] | ||
| 13 | # | ||
| 14 | # The LINUX_HERD_OPTIONS environment variable may be used to specify | ||
| 15 | # arguments to herd, whose default is defined by the checklitmus.sh script. | ||
| 16 | # Thus, one would normally run this in the directory containing the memory | ||
| 17 | # model, specifying the pathname of the litmus test to check. | ||
| 18 | # | ||
| 19 | # This script makes no attempt to run the litmus tests concurrently. | ||
| 20 | # | ||
| 21 | # This program is free software; you can redistribute it and/or modify | ||
| 22 | # it under the terms of the GNU General Public License as published by | ||
| 23 | # the Free Software Foundation; either version 2 of the License, or | ||
| 24 | # (at your option) any later version. | ||
| 25 | # | ||
| 26 | # This program is distributed in the hope that it will be useful, | ||
| 27 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 28 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 29 | # GNU General Public License for more details. | ||
| 30 | # | ||
| 31 | # You should have received a copy of the GNU General Public License | ||
| 32 | # along with this program; if not, you can access it online at | ||
| 33 | # http://www.gnu.org/licenses/gpl-2.0.html. | ||
| 34 | # | ||
| 35 | # Copyright IBM Corporation, 2018 | ||
| 36 | # | ||
| 37 | # Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | ||
| 38 | |||
| 39 | litmusdir=${1-litmus-tests} | ||
| 40 | if test -d "$litmusdir" -a -r "$litmusdir" -a -x "$litmusdir" | ||
| 41 | then | ||
| 42 | : | ||
| 43 | else | ||
| 44 | echo ' --- ' error: $litmusdir is not an accessible directory | ||
| 45 | exit 255 | ||
| 46 | fi | ||
| 47 | |||
| 48 | # Find the checklitmus script. If it is not where we expect it, then | ||
| 49 | # assume that the caller has the PATH environment variable set | ||
| 50 | # appropriately. | ||
| 51 | if test -x scripts/checklitmus.sh | ||
| 52 | then | ||
| 53 | clscript=scripts/checklitmus.sh | ||
| 54 | else | ||
| 55 | clscript=checklitmus.sh | ||
| 56 | fi | ||
| 57 | |||
| 58 | # Run the script on all the litmus tests in the specified directory | ||
| 59 | ret=0 | ||
| 60 | for i in litmus-tests/*.litmus | ||
| 61 | do | ||
| 62 | if ! $clscript $i | ||
| 63 | then | ||
| 64 | ret=1 | ||
| 65 | fi | ||
| 66 | done | ||
| 67 | if test "$ret" -ne 0 | ||
| 68 | then | ||
| 69 | echo " ^^^ VERIFICATION MISMATCHES" | ||
| 70 | else | ||
| 71 | echo All litmus tests verified as was expected. | ||
| 72 | fi | ||
| 73 | exit $ret | ||
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh new file mode 100644 index 000000000000..e2e477472844 --- /dev/null +++ b/tools/memory-model/scripts/checklitmus.sh | |||
| @@ -0,0 +1,86 @@ | |||
| 1 | #!/bin/sh | ||
| 2 | # | ||
| 3 | # Run a herd test and check the result against a "Result:" comment within | ||
| 4 | # the litmus test. If the verification result does not match that specified | ||
| 5 | # in the litmus test, this script prints an error message prefixed with | ||
| 6 | # "^^^" and exits with a non-zero status. It also outputs verification | ||
| 7 | # results to a file whose name is that of the specified litmus test, but | ||
| 8 | # with ".out" appended. | ||
| 9 | # | ||
| 10 | # Usage: | ||
| 11 | # sh checklitmus.sh file.litmus | ||
| 12 | # | ||
| 13 | # The LINUX_HERD_OPTIONS environment variable may be used to specify | ||
| 14 | # arguments to herd, which default to "-conf linux-kernel.cfg". Thus, | ||
| 15 | # one would normally run this in the directory containing the memory model, | ||
| 16 | # specifying the pathname of the litmus test to check. | ||
| 17 | # | ||
| 18 | # This program is free software; you can redistribute it and/or modify | ||
| 19 | # it under the terms of the GNU General Public License as published by | ||
| 20 | # the Free Software Foundation; either version 2 of the License, or | ||
| 21 | # (at your option) any later version. | ||
| 22 | # | ||
| 23 | # This program is distributed in the hope that it will be useful, | ||
| 24 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 25 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 26 | # GNU General Public License for more details. | ||
| 27 | # | ||
| 28 | # You should have received a copy of the GNU General Public License | ||
| 29 | # along with this program; if not, you can access it online at | ||
| 30 | # http://www.gnu.org/licenses/gpl-2.0.html. | ||
| 31 | # | ||
| 32 | # Copyright IBM Corporation, 2018 | ||
| 33 | # | ||
| 34 | # Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | ||
| 35 | |||
| 36 | litmus=$1 | ||
| 37 | herdoptions=${LINUX_HERD_OPTIONS--conf linux-kernel.cfg} | ||
| 38 | |||
| 39 | if test -f "$litmus" -a -r "$litmus" | ||
| 40 | then | ||
| 41 | : | ||
| 42 | else | ||
| 43 | echo ' --- ' error: \"$litmus\" is not a readable file | ||
| 44 | exit 255 | ||
| 45 | fi | ||
| 46 | if grep -q '^ \* Result: ' $litmus | ||
| 47 | then | ||
| 48 | outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'` | ||
| 49 | else | ||
| 50 | outcome=specified | ||
| 51 | fi | ||
| 52 | |||
| 53 | echo Herd options: $herdoptions > $litmus.out | ||
| 54 | /usr/bin/time herd7 -o ~/tmp $herdoptions $litmus >> $litmus.out 2>&1 | ||
| 55 | grep "Herd options:" $litmus.out | ||
| 56 | grep '^Observation' $litmus.out | ||
| 57 | if grep -q '^Observation' $litmus.out | ||
| 58 | then | ||
| 59 | : | ||
| 60 | else | ||
| 61 | cat $litmus.out | ||
| 62 | echo ' ^^^ Verification error' | ||
| 63 | echo ' ^^^ Verification error' >> $litmus.out 2>&1 | ||
| 64 | exit 255 | ||
| 65 | fi | ||
| 66 | if test "$outcome" = DEADLOCK | ||
| 67 | then | ||
| 68 | echo grep 3 and 4 | ||
| 69 | if grep '^Observation' $litmus.out | grep -q 'Never 0 0$' | ||
| 70 | then | ||
| 71 | ret=0 | ||
| 72 | else | ||
| 73 | echo " ^^^ Unexpected non-$outcome verification" | ||
| 74 | echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1 | ||
| 75 | ret=1 | ||
| 76 | fi | ||
| 77 | elif grep '^Observation' $litmus.out | grep -q $outcome || test "$outcome" = Maybe | ||
| 78 | then | ||
| 79 | ret=0 | ||
| 80 | else | ||
| 81 | echo " ^^^ Unexpected non-$outcome verification" | ||
| 82 | echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1 | ||
| 83 | ret=1 | ||
| 84 | fi | ||
| 85 | tail -2 $litmus.out | head -1 | ||
| 86 | exit $ret | ||
