From e0f6dec35f9286e78879fe1ac92803fd69fc4fdc Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 4 Dec 2013 14:31:28 -0800 Subject: x86, bitops: Correct the assembly constraints to testing bitops In checkin: 0c44c2d0f459 x86: Use asm goto to implement better modify_and_test() functions the various functions which do modify and test were unified and optimized using "asm goto". However, this change missed the detail that the bitops require an "Ir" constraint rather than an "er" constraint ("I" = integer constant from 0-31, "e" = signed 32-bit integer constant). This would cause code to miscompile if these functions were used on constant bit positions 32-255 and the build to fail if used on constant bit positions above 255. Add the constraints as a parameter to the GEN_BINARY_RMWcc() macro to avoid this problem. Reported-by: Jesse Brandeburg Signed-off-by: H. Peter Anvin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/529E8719.4070202@zytor.com --- arch/x86/include/asm/atomic.h | 4 ++-- arch/x86/include/asm/atomic64_64.h | 4 ++-- arch/x86/include/asm/bitops.h | 6 +++--- arch/x86/include/asm/local.h | 4 ++-- arch/x86/include/asm/rmwcc.h | 8 ++++---- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index da31c8b8a92d..b17f4f48ecd7 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -77,7 +77,7 @@ static inline void atomic_sub(int i, atomic_t *v) */ static inline int atomic_sub_and_test(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, i, "%0", "e"); + GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); } /** @@ -141,7 +141,7 @@ static inline int atomic_inc_and_test(atomic_t *v) */ static inline int atomic_add_negative(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, i, "%0", "s"); + GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); } /** diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 3f065c985aee..46e9052bbd28 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -72,7 +72,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) */ static inline int atomic64_sub_and_test(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, i, "%0", "e"); + GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e"); } /** @@ -138,7 +138,7 @@ static inline int atomic64_inc_and_test(atomic64_t *v) */ static inline int atomic64_add_negative(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, i, "%0", "s"); + GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s"); } /** diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 6d76d0935989..9fc1af74dc83 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -205,7 +205,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_set_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c"); } /** @@ -251,7 +251,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c"); } /** @@ -304,7 +304,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_change_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c"); } static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 5b23e605e707..4ad6560847b1 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -52,7 +52,7 @@ static inline void local_sub(long i, local_t *l) */ static inline int local_sub_and_test(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, i, "%0", "e"); + GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e"); } /** @@ -92,7 +92,7 @@ static inline int local_inc_and_test(local_t *l) */ static inline int local_add_negative(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, i, "%0", "s"); + GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s"); } /** diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 1ff990f1de8e..8f7866a5b9a4 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -16,8 +16,8 @@ cc_label: \ #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ __GEN_RMWcc(op " " arg0, var, cc) -#define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \ - __GEN_RMWcc(op " %1, " arg0, var, cc, "er" (val)) +#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ + __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val)) #else /* !CC_HAVE_ASM_GOTO */ @@ -33,8 +33,8 @@ do { \ #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ __GEN_RMWcc(op " " arg0, var, cc) -#define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \ - __GEN_RMWcc(op " %2, " arg0, var, cc, "er" (val)) +#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ + __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val)) #endif /* CC_HAVE_ASM_GOTO */ -- cgit v1.2.2 From ba1f14fbe70965ae0fb1655a5275a62723f65b77 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2013 14:26:41 +0100 Subject: sched: Remove PREEMPT_NEED_RESCHED from generic code While hunting a preemption issue with Alexander, Ben noticed that the currently generic PREEMPT_NEED_RESCHED stuff is horribly broken for load-store architectures. We currently rely on the IPI to fold TIF_NEED_RESCHED into PREEMPT_NEED_RESCHED, but when this IPI lands while we already have a load for the preempt-count but before the store, the store will erase the PREEMPT_NEED_RESCHED change. The current preempt-count only works on load-store archs because interrupts are assumed to be completely balanced wrt their preempt_count fiddling; the previous preempt_count load will match the preempt_count state after the interrupt and therefore nothing gets lost. This patch removes the PREEMPT_NEED_RESCHED usage from generic code and pushes it into x86 arch code; the generic code goes back to relying on TIF_NEED_RESCHED. Boot tested on x86_64 and compile tested on ppc64. Reported-by: Benjamin Herrenschmidt Reported-and-Tested-by: Alexander Graf Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20131128132641.GP10022@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/preempt.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 8729723636fd..c8b051933b1b 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -7,6 +7,12 @@ DECLARE_PER_CPU(int, __preempt_count); +/* + * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such + * that a decrement hitting 0 means we can and should reschedule. + */ +#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) + /* * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users * that think a non-zero value indicates we cannot preempt. @@ -74,6 +80,11 @@ static __always_inline void __preempt_count_sub(int val) __this_cpu_add_4(__preempt_count, -val); } +/* + * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule + * a decrement which hits zero means we have no preempt_count and should + * reschedule. + */ static __always_inline bool __preempt_count_dec_and_test(void) { GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); -- cgit v1.2.2 From 20841405940e7be0617612d521e206e4b6b325db Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 18 Dec 2013 17:08:44 -0800 Subject: mm: fix TLB flush race between migration, and change_protection_range There are a few subtle races, between change_protection_range (used by mprotect and change_prot_numa) on one side, and NUMA page migration and compaction on the other side. The basic race is that there is a time window between when the PTE gets made non-present (PROT_NONE or NUMA), and the TLB is flushed. During that time, a CPU may continue writing to the page. This is fine most of the time, however compaction or the NUMA migration code may come in, and migrate the page away. When that happens, the CPU may continue writing, through the cached translation, to what is no longer the current memory location of the process. This only affects x86, which has a somewhat optimistic pte_accessible. All other architectures appear to be safe, and will either always flush, or flush whenever there is a valid mapping, even with no permissions (SPARC). The basic race looks like this: CPU A CPU B CPU C load TLB entry make entry PTE/PMD_NUMA fault on entry read/write old page start migrating page change PTE/PMD to new page read/write old page [*] flush TLB reload TLB from new entry read/write new page lose data [*] the old page may belong to a new user at this point! The obvious fix is to flush remote TLB entries, by making sure that pte_accessible aware of the fact that PROT_NONE and PROT_NUMA memory may still be accessible if there is a TLB flush pending for the mm. This should fix both NUMA migration and compaction. [mgorman@suse.de: fix build] Signed-off-by: Rik van Riel Signed-off-by: Mel Gorman Cc: Alex Thorlton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 3d1999458709..bbc8b12fa443 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -452,9 +452,16 @@ static inline int pte_present(pte_t a) } #define pte_accessible pte_accessible -static inline int pte_accessible(pte_t a) +static inline bool pte_accessible(struct mm_struct *mm, pte_t a) { - return pte_flags(a) & _PAGE_PRESENT; + if (pte_flags(a) & _PAGE_PRESENT) + return true; + + if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) && + mm_tlb_flush_pending(mm)) + return true; + + return false; } static inline int pte_hidden(pte_t pte) -- cgit v1.2.2