diff options
author | Tejun Heo <tj@kernel.org> | 2010-12-18 09:54:36 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2010-12-18 09:54:36 -0500 |
commit | 05c2d088d0eb904e50460b04d77324c26cef4637 (patch) | |
tree | 1dab544e05f9021a02e76adcbdb5edf4b31c7d62 /arch | |
parent | 3ea9f6833c8f865a221b59ce37d7650dcf3b3e17 (diff) | |
parent | 8270137a0d50507a5b40f880db636527045b8466 (diff) |
Merge branch 'this_cpu_ops' into for-2.6.38
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig.cpu | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/percpu.h | 187 |
2 files changed, 153 insertions, 37 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2ac9069890cd..15588a0ef466 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -310,6 +310,9 @@ config X86_INTERNODE_CACHE_SHIFT | |||
310 | config X86_CMPXCHG | 310 | config X86_CMPXCHG |
311 | def_bool X86_64 || (X86_32 && !M386) | 311 | def_bool X86_64 || (X86_32 && !M386) |
312 | 312 | ||
313 | config CMPXCHG_LOCAL | ||
314 | def_bool X86_64 || (X86_32 && !M386) | ||
315 | |||
313 | config X86_L1_CACHE_SHIFT | 316 | config X86_L1_CACHE_SHIFT |
314 | int | 317 | int |
315 | default "7" if MPENTIUM4 || MPSC | 318 | default "7" if MPENTIUM4 || MPSC |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 38f9e965ff96..8ee45167e817 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -177,39 +177,6 @@ do { \ | |||
177 | } \ | 177 | } \ |
178 | } while (0) | 178 | } while (0) |
179 | 179 | ||
180 | /* | ||
181 | * Add return operation | ||
182 | */ | ||
183 | #define percpu_add_return_op(var, val) \ | ||
184 | ({ \ | ||
185 | typeof(var) paro_ret__ = val; \ | ||
186 | switch (sizeof(var)) { \ | ||
187 | case 1: \ | ||
188 | asm("xaddb %0, "__percpu_arg(1) \ | ||
189 | : "+q" (paro_ret__), "+m" (var) \ | ||
190 | : : "memory"); \ | ||
191 | break; \ | ||
192 | case 2: \ | ||
193 | asm("xaddw %0, "__percpu_arg(1) \ | ||
194 | : "+r" (paro_ret__), "+m" (var) \ | ||
195 | : : "memory"); \ | ||
196 | break; \ | ||
197 | case 4: \ | ||
198 | asm("xaddl %0, "__percpu_arg(1) \ | ||
199 | : "+r" (paro_ret__), "+m" (var) \ | ||
200 | : : "memory"); \ | ||
201 | break; \ | ||
202 | case 8: \ | ||
203 | asm("xaddq %0, "__percpu_arg(1) \ | ||
204 | : "+re" (paro_ret__), "+m" (var) \ | ||
205 | : : "memory"); \ | ||
206 | break; \ | ||
207 | default: __bad_percpu_size(); \ | ||
208 | } \ | ||
209 | paro_ret__ += val; \ | ||
210 | paro_ret__; \ | ||
211 | }) | ||
212 | |||
213 | #define percpu_from_op(op, var, constraint) \ | 180 | #define percpu_from_op(op, var, constraint) \ |
214 | ({ \ | 181 | ({ \ |
215 | typeof(var) pfo_ret__; \ | 182 | typeof(var) pfo_ret__; \ |
@@ -263,6 +230,125 @@ do { \ | |||
263 | }) | 230 | }) |
264 | 231 | ||
265 | /* | 232 | /* |
233 | * Add return operation | ||
234 | */ | ||
235 | #define percpu_add_return_op(var, val) \ | ||
236 | ({ \ | ||
237 | typeof(var) paro_ret__ = val; \ | ||
238 | switch (sizeof(var)) { \ | ||
239 | case 1: \ | ||
240 | asm("xaddb %0, "__percpu_arg(1) \ | ||
241 | : "+q" (paro_ret__), "+m" (var) \ | ||
242 | : : "memory"); \ | ||
243 | break; \ | ||
244 | case 2: \ | ||
245 | asm("xaddw %0, "__percpu_arg(1) \ | ||
246 | : "+r" (paro_ret__), "+m" (var) \ | ||
247 | : : "memory"); \ | ||
248 | break; \ | ||
249 | case 4: \ | ||
250 | asm("xaddl %0, "__percpu_arg(1) \ | ||
251 | : "+r" (paro_ret__), "+m" (var) \ | ||
252 | : : "memory"); \ | ||
253 | break; \ | ||
254 | case 8: \ | ||
255 | asm("xaddq %0, "__percpu_arg(1) \ | ||
256 | : "+re" (paro_ret__), "+m" (var) \ | ||
257 | : : "memory"); \ | ||
258 | break; \ | ||
259 | default: __bad_percpu_size(); \ | ||
260 | } \ | ||
261 | paro_ret__ += val; \ | ||
262 | paro_ret__; \ | ||
263 | }) | ||
264 | |||
265 | /* | ||
266 | * xchg is implemented using cmpxchg without a lock prefix. xchg is | ||
267 | * expensive due to the implied lock prefix. The processor cannot prefetch | ||
268 | * cachelines if xchg is used. | ||
269 | */ | ||
270 | #define percpu_xchg_op(var, nval) \ | ||
271 | ({ \ | ||
272 | typeof(var) pxo_ret__; \ | ||
273 | typeof(var) pxo_new__ = (nval); \ | ||
274 | switch (sizeof(var)) { \ | ||
275 | case 1: \ | ||
276 | asm("\n1:mov "__percpu_arg(1)",%%al" \ | ||
277 | "\n\tcmpxchgb %2, "__percpu_arg(1) \ | ||
278 | "\n\tjnz 1b" \ | ||
279 | : "=a" (pxo_ret__), "+m" (var) \ | ||
280 | : "q" (pxo_new__) \ | ||
281 | : "memory"); \ | ||
282 | break; \ | ||
283 | case 2: \ | ||
284 | asm("\n1:mov "__percpu_arg(1)",%%ax" \ | ||
285 | "\n\tcmpxchgw %2, "__percpu_arg(1) \ | ||
286 | "\n\tjnz 1b" \ | ||
287 | : "=a" (pxo_ret__), "+m" (var) \ | ||
288 | : "r" (pxo_new__) \ | ||
289 | : "memory"); \ | ||
290 | break; \ | ||
291 | case 4: \ | ||
292 | asm("\n1:mov "__percpu_arg(1)",%%eax" \ | ||
293 | "\n\tcmpxchgl %2, "__percpu_arg(1) \ | ||
294 | "\n\tjnz 1b" \ | ||
295 | : "=a" (pxo_ret__), "+m" (var) \ | ||
296 | : "r" (pxo_new__) \ | ||
297 | : "memory"); \ | ||
298 | break; \ | ||
299 | case 8: \ | ||
300 | asm("\n1:mov "__percpu_arg(1)",%%rax" \ | ||
301 | "\n\tcmpxchgq %2, "__percpu_arg(1) \ | ||
302 | "\n\tjnz 1b" \ | ||
303 | : "=a" (pxo_ret__), "+m" (var) \ | ||
304 | : "r" (pxo_new__) \ | ||
305 | : "memory"); \ | ||
306 | break; \ | ||
307 | default: __bad_percpu_size(); \ | ||
308 | } \ | ||
309 | pxo_ret__; \ | ||
310 | }) | ||
311 | |||
312 | /* | ||
313 | * cmpxchg has no such implied lock semantics as a result it is much | ||
314 | * more efficient for cpu local operations. | ||
315 | */ | ||
316 | #define percpu_cmpxchg_op(var, oval, nval) \ | ||
317 | ({ \ | ||
318 | typeof(var) pco_ret__; \ | ||
319 | typeof(var) pco_old__ = (oval); \ | ||
320 | typeof(var) pco_new__ = (nval); \ | ||
321 | switch (sizeof(var)) { \ | ||
322 | case 1: \ | ||
323 | asm("cmpxchgb %2, "__percpu_arg(1) \ | ||
324 | : "=a" (pco_ret__), "+m" (var) \ | ||
325 | : "q" (pco_new__), "0" (pco_old__) \ | ||
326 | : "memory"); \ | ||
327 | break; \ | ||
328 | case 2: \ | ||
329 | asm("cmpxchgw %2, "__percpu_arg(1) \ | ||
330 | : "=a" (pco_ret__), "+m" (var) \ | ||
331 | : "r" (pco_new__), "0" (pco_old__) \ | ||
332 | : "memory"); \ | ||
333 | break; \ | ||
334 | case 4: \ | ||
335 | asm("cmpxchgl %2, "__percpu_arg(1) \ | ||
336 | : "=a" (pco_ret__), "+m" (var) \ | ||
337 | : "r" (pco_new__), "0" (pco_old__) \ | ||
338 | : "memory"); \ | ||
339 | break; \ | ||
340 | case 8: \ | ||
341 | asm("cmpxchgq %2, "__percpu_arg(1) \ | ||
342 | : "=a" (pco_ret__), "+m" (var) \ | ||
343 | : "r" (pco_new__), "0" (pco_old__) \ | ||
344 | : "memory"); \ | ||
345 | break; \ | ||
346 | default: __bad_percpu_size(); \ | ||
347 | } \ | ||
348 | pco_ret__; \ | ||
349 | }) | ||
350 | |||
351 | /* | ||
266 | * percpu_read() makes gcc load the percpu variable every time it is | 352 | * percpu_read() makes gcc load the percpu variable every time it is |
267 | * accessed while percpu_read_stable() allows the value to be cached. | 353 | * accessed while percpu_read_stable() allows the value to be cached. |
268 | * percpu_read_stable() is more efficient and can be used if its value | 354 | * percpu_read_stable() is more efficient and can be used if its value |
@@ -300,6 +386,12 @@ do { \ | |||
300 | #define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | 386 | #define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) |
301 | #define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 387 | #define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
302 | #define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 388 | #define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
389 | /* | ||
390 | * Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much | ||
391 | * faster than an xchg with forced lock semantics. | ||
392 | */ | ||
393 | #define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
394 | #define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
303 | 395 | ||
304 | #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 396 | #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
305 | #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 397 | #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
@@ -319,6 +411,11 @@ do { \ | |||
319 | #define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | 411 | #define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) |
320 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 412 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
321 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 413 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
414 | #define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) | ||
415 | #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | ||
416 | #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | ||
417 | #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
418 | #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
322 | 419 | ||
323 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) | 420 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
324 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) | 421 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
@@ -332,15 +429,32 @@ do { \ | |||
332 | #define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | 429 | #define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) |
333 | #define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 430 | #define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
334 | #define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 431 | #define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
432 | #define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) | ||
433 | #define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | ||
434 | #define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | ||
435 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
436 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
335 | 437 | ||
336 | #ifndef CONFIG_M386 | 438 | #ifndef CONFIG_M386 |
337 | #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) | 439 | #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) |
338 | #define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) | 440 | #define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) |
339 | #define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) | 441 | #define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) |
442 | #define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
443 | #define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
444 | #define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
445 | |||
340 | #define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) | 446 | #define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) |
341 | #define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) | 447 | #define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) |
342 | #define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) | 448 | #define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) |
343 | #endif | 449 | #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
450 | #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
451 | #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
452 | |||
453 | #define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
454 | #define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
455 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
456 | #endif /* !CONFIG_M386 */ | ||
457 | |||
344 | /* | 458 | /* |
345 | * Per cpu atomic 64 bit operations are only available under 64 bit. | 459 | * Per cpu atomic 64 bit operations are only available under 64 bit. |
346 | * 32 bit must fall back to generic operations. | 460 | * 32 bit must fall back to generic operations. |
@@ -352,6 +466,7 @@ do { \ | |||
352 | #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 466 | #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
353 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 467 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
354 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 468 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
469 | #define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) | ||
355 | 470 | ||
356 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 471 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
357 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) | 472 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) |
@@ -359,14 +474,12 @@ do { \ | |||
359 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 474 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
360 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 475 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
361 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 476 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
477 | #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) | ||
362 | 478 | ||
363 | #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) | 479 | #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
364 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 480 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
365 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 481 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
366 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 482 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
367 | |||
368 | #define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) | ||
369 | #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) | ||
370 | #endif | 483 | #endif |
371 | 484 | ||
372 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ | 485 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ |