aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
authorRaghavendra K T <raghavendra.kt@linux.vnet.ibm.com>2015-02-06 06:14:11 -0500
committerIngo Molnar <mingo@kernel.org>2015-02-18 08:53:49 -0500
commitd6abfdb2022368d8c6c4be3f11a06656601a6cc2 (patch)
tree2569d8da91393a807fdb479a25eaa5087673c49e /arch/x86/include
parent8d1e5a1a1ccf5ae9d8a5a0ee7960202ccb0c5429 (diff)
x86/spinlocks/paravirt: Fix memory corruption on unlock
Paravirt spinlock clears slowpath flag after doing unlock. As explained by Linus currently it does: prev = *lock; add_smp(&lock->tickets.head, TICKET_LOCK_INC); /* add_smp() is a full mb() */ if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG)) __ticket_unlock_slowpath(lock, prev); which is *exactly* the kind of things you cannot do with spinlocks, because after you've done the "add_smp()" and released the spinlock for the fast-path, you can't access the spinlock any more. Exactly because a fast-path lock might come in, and release the whole data structure. Linus suggested that we should not do any writes to lock after unlock(), and we can move slowpath clearing to fastpath lock. So this patch implements the fix with: 1. Moving slowpath flag to head (Oleg): Unlocked locks don't care about the slowpath flag; therefore we can keep it set after the last unlock, and clear it again on the first (try)lock. -- this removes the write after unlock. note that keeping slowpath flag would result in unnecessary kicks. By moving the slowpath flag from the tail to the head ticket we also avoid the need to access both the head and tail tickets on unlock. 2. use xadd to avoid read/write after unlock that checks the need for unlock_kick (Linus): We further avoid the need for a read-after-release by using xadd; the prev head value will include the slowpath flag and indicate if we need to do PV kicking of suspended spinners -- on modern chips xadd isn't (much) more expensive than an add + load. Result: setup: 16core (32 cpu +ht sandy bridge 8GB 16vcpu guest) benchmark overcommit %improve kernbench 1x -0.13 kernbench 2x 0.02 dbench 1x -1.77 dbench 2x -0.63 [Jeremy: Hinted missing TICKET_LOCK_INC for kick] [Oleg: Moved slowpath flag to head, ticket_equals idea] [PeterZ: Added detailed changelog] Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Reported-by: Sasha Levin <sasha.levin@oracle.com> Tested-by: Sasha Levin <sasha.levin@oracle.com> Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Cc: Andrew Jones <drjones@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christoph Lameter <cl@linux.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dave Jones <davej@redhat.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Fernando Luis Vázquez Cao <fernando_b1@lab.ntt.co.jp> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Ulrich Obergfell <uobergfe@redhat.com> Cc: Waiman Long <Waiman.Long@hp.com> Cc: a.ryabinin@samsung.com Cc: dave@stgolabs.net Cc: hpa@zytor.com Cc: jasowang@redhat.com Cc: jeremy@goop.org Cc: paul.gortmaker@windriver.com Cc: riel@redhat.com Cc: tglx@linutronix.de Cc: waiman.long@hp.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20150215173043.GA7471@linux.vnet.ibm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/spinlock.h94
1 files changed, 46 insertions, 48 deletions
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 625660f8a2fc..cf87de3fc390 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -46,7 +46,7 @@ static __always_inline bool static_key_false(struct static_key *key);
46 46
47static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) 47static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
48{ 48{
49 set_bit(0, (volatile unsigned long *)&lock->tickets.tail); 49 set_bit(0, (volatile unsigned long *)&lock->tickets.head);
50} 50}
51 51
52#else /* !CONFIG_PARAVIRT_SPINLOCKS */ 52#else /* !CONFIG_PARAVIRT_SPINLOCKS */
@@ -60,10 +60,30 @@ static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
60} 60}
61 61
62#endif /* CONFIG_PARAVIRT_SPINLOCKS */ 62#endif /* CONFIG_PARAVIRT_SPINLOCKS */
63static inline int __tickets_equal(__ticket_t one, __ticket_t two)
64{
65 return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
66}
67
68static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
69 __ticket_t head)
70{
71 if (head & TICKET_SLOWPATH_FLAG) {
72 arch_spinlock_t old, new;
73
74 old.tickets.head = head;
75 new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
76 old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
77 new.tickets.tail = old.tickets.tail;
78
79 /* try to clear slowpath flag when there are no contenders */
80 cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
81 }
82}
63 83
64static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) 84static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
65{ 85{
66 return lock.tickets.head == lock.tickets.tail; 86 return __tickets_equal(lock.tickets.head, lock.tickets.tail);
67} 87}
68 88
69/* 89/*
@@ -87,18 +107,21 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
87 if (likely(inc.head == inc.tail)) 107 if (likely(inc.head == inc.tail))
88 goto out; 108 goto out;
89 109
90 inc.tail &= ~TICKET_SLOWPATH_FLAG;
91 for (;;) { 110 for (;;) {
92 unsigned count = SPIN_THRESHOLD; 111 unsigned count = SPIN_THRESHOLD;
93 112
94 do { 113 do {
95 if (READ_ONCE(lock->tickets.head) == inc.tail) 114 inc.head = READ_ONCE(lock->tickets.head);
96 goto out; 115 if (__tickets_equal(inc.head, inc.tail))
116 goto clear_slowpath;
97 cpu_relax(); 117 cpu_relax();
98 } while (--count); 118 } while (--count);
99 __ticket_lock_spinning(lock, inc.tail); 119 __ticket_lock_spinning(lock, inc.tail);
100 } 120 }
101out: barrier(); /* make sure nothing creeps before the lock is taken */ 121clear_slowpath:
122 __ticket_check_and_clear_slowpath(lock, inc.head);
123out:
124 barrier(); /* make sure nothing creeps before the lock is taken */
102} 125}
103 126
104static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) 127static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
@@ -106,56 +129,30 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
106 arch_spinlock_t old, new; 129 arch_spinlock_t old, new;
107 130
108 old.tickets = READ_ONCE(lock->tickets); 131 old.tickets = READ_ONCE(lock->tickets);
109 if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) 132 if (!__tickets_equal(old.tickets.head, old.tickets.tail))
110 return 0; 133 return 0;
111 134
112 new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT); 135 new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
136 new.head_tail &= ~TICKET_SLOWPATH_FLAG;
113 137
114 /* cmpxchg is a full barrier, so nothing can move before it */ 138 /* cmpxchg is a full barrier, so nothing can move before it */
115 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; 139 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
116} 140}
117 141
118static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
119 arch_spinlock_t old)
120{
121 arch_spinlock_t new;
122
123 BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
124
125 /* Perform the unlock on the "before" copy */
126 old.tickets.head += TICKET_LOCK_INC;
127
128 /* Clear the slowpath flag */
129 new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
130
131 /*
132 * If the lock is uncontended, clear the flag - use cmpxchg in
133 * case it changes behind our back though.
134 */
135 if (new.tickets.head != new.tickets.tail ||
136 cmpxchg(&lock->head_tail, old.head_tail,
137 new.head_tail) != old.head_tail) {
138 /*
139 * Lock still has someone queued for it, so wake up an
140 * appropriate waiter.
141 */
142 __ticket_unlock_kick(lock, old.tickets.head);
143 }
144}
145
146static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) 142static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
147{ 143{
148 if (TICKET_SLOWPATH_FLAG && 144 if (TICKET_SLOWPATH_FLAG &&
149 static_key_false(&paravirt_ticketlocks_enabled)) { 145 static_key_false(&paravirt_ticketlocks_enabled)) {
150 arch_spinlock_t prev; 146 __ticket_t head;
151 147
152 prev = *lock; 148 BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
153 add_smp(&lock->tickets.head, TICKET_LOCK_INC);
154 149
155 /* add_smp() is a full mb() */ 150 head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
156 151
157 if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG)) 152 if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
158 __ticket_unlock_slowpath(lock, prev); 153 head &= ~TICKET_SLOWPATH_FLAG;
154 __ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
155 }
159 } else 156 } else
160 __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX); 157 __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
161} 158}
@@ -164,14 +161,15 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
164{ 161{
165 struct __raw_tickets tmp = READ_ONCE(lock->tickets); 162 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
166 163
167 return tmp.tail != tmp.head; 164 return !__tickets_equal(tmp.tail, tmp.head);
168} 165}
169 166
170static inline int arch_spin_is_contended(arch_spinlock_t *lock) 167static inline int arch_spin_is_contended(arch_spinlock_t *lock)
171{ 168{
172 struct __raw_tickets tmp = READ_ONCE(lock->tickets); 169 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
173 170
174 return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; 171 tmp.head &= ~TICKET_SLOWPATH_FLAG;
172 return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
175} 173}
176#define arch_spin_is_contended arch_spin_is_contended 174#define arch_spin_is_contended arch_spin_is_contended
177 175
@@ -183,16 +181,16 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
183 181
184static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) 182static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
185{ 183{
186 __ticket_t head = ACCESS_ONCE(lock->tickets.head); 184 __ticket_t head = READ_ONCE(lock->tickets.head);
187 185
188 for (;;) { 186 for (;;) {
189 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 187 struct __raw_tickets tmp = READ_ONCE(lock->tickets);
190 /* 188 /*
191 * We need to check "unlocked" in a loop, tmp.head == head 189 * We need to check "unlocked" in a loop, tmp.head == head
192 * can be false positive because of overflow. 190 * can be false positive because of overflow.
193 */ 191 */
194 if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) || 192 if (__tickets_equal(tmp.head, tmp.tail) ||
195 tmp.head != head) 193 !__tickets_equal(tmp.head, head))
196 break; 194 break;
197 195
198 cpu_relax(); 196 cpu_relax();