diff options
Diffstat (limited to 'arch/x86/include/asm/percpu.h')
-rw-r--r-- | arch/x86/include/asm/percpu.h | 158 |
1 files changed, 157 insertions, 1 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index f899e01a8ac9..8ee45167e817 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -230,6 +230,125 @@ do { \ | |||
230 | }) | 230 | }) |
231 | 231 | ||
232 | /* | 232 | /* |
233 | * Add return operation | ||
234 | */ | ||
235 | #define percpu_add_return_op(var, val) \ | ||
236 | ({ \ | ||
237 | typeof(var) paro_ret__ = val; \ | ||
238 | switch (sizeof(var)) { \ | ||
239 | case 1: \ | ||
240 | asm("xaddb %0, "__percpu_arg(1) \ | ||
241 | : "+q" (paro_ret__), "+m" (var) \ | ||
242 | : : "memory"); \ | ||
243 | break; \ | ||
244 | case 2: \ | ||
245 | asm("xaddw %0, "__percpu_arg(1) \ | ||
246 | : "+r" (paro_ret__), "+m" (var) \ | ||
247 | : : "memory"); \ | ||
248 | break; \ | ||
249 | case 4: \ | ||
250 | asm("xaddl %0, "__percpu_arg(1) \ | ||
251 | : "+r" (paro_ret__), "+m" (var) \ | ||
252 | : : "memory"); \ | ||
253 | break; \ | ||
254 | case 8: \ | ||
255 | asm("xaddq %0, "__percpu_arg(1) \ | ||
256 | : "+re" (paro_ret__), "+m" (var) \ | ||
257 | : : "memory"); \ | ||
258 | break; \ | ||
259 | default: __bad_percpu_size(); \ | ||
260 | } \ | ||
261 | paro_ret__ += val; \ | ||
262 | paro_ret__; \ | ||
263 | }) | ||
264 | |||
265 | /* | ||
266 | * xchg is implemented using cmpxchg without a lock prefix. xchg is | ||
267 | * expensive due to the implied lock prefix. The processor cannot prefetch | ||
268 | * cachelines if xchg is used. | ||
269 | */ | ||
270 | #define percpu_xchg_op(var, nval) \ | ||
271 | ({ \ | ||
272 | typeof(var) pxo_ret__; \ | ||
273 | typeof(var) pxo_new__ = (nval); \ | ||
274 | switch (sizeof(var)) { \ | ||
275 | case 1: \ | ||
276 | asm("\n1:mov "__percpu_arg(1)",%%al" \ | ||
277 | "\n\tcmpxchgb %2, "__percpu_arg(1) \ | ||
278 | "\n\tjnz 1b" \ | ||
279 | : "=a" (pxo_ret__), "+m" (var) \ | ||
280 | : "q" (pxo_new__) \ | ||
281 | : "memory"); \ | ||
282 | break; \ | ||
283 | case 2: \ | ||
284 | asm("\n1:mov "__percpu_arg(1)",%%ax" \ | ||
285 | "\n\tcmpxchgw %2, "__percpu_arg(1) \ | ||
286 | "\n\tjnz 1b" \ | ||
287 | : "=a" (pxo_ret__), "+m" (var) \ | ||
288 | : "r" (pxo_new__) \ | ||
289 | : "memory"); \ | ||
290 | break; \ | ||
291 | case 4: \ | ||
292 | asm("\n1:mov "__percpu_arg(1)",%%eax" \ | ||
293 | "\n\tcmpxchgl %2, "__percpu_arg(1) \ | ||
294 | "\n\tjnz 1b" \ | ||
295 | : "=a" (pxo_ret__), "+m" (var) \ | ||
296 | : "r" (pxo_new__) \ | ||
297 | : "memory"); \ | ||
298 | break; \ | ||
299 | case 8: \ | ||
300 | asm("\n1:mov "__percpu_arg(1)",%%rax" \ | ||
301 | "\n\tcmpxchgq %2, "__percpu_arg(1) \ | ||
302 | "\n\tjnz 1b" \ | ||
303 | : "=a" (pxo_ret__), "+m" (var) \ | ||
304 | : "r" (pxo_new__) \ | ||
305 | : "memory"); \ | ||
306 | break; \ | ||
307 | default: __bad_percpu_size(); \ | ||
308 | } \ | ||
309 | pxo_ret__; \ | ||
310 | }) | ||
311 | |||
312 | /* | ||
313 | * cmpxchg has no such implied lock semantics as a result it is much | ||
314 | * more efficient for cpu local operations. | ||
315 | */ | ||
316 | #define percpu_cmpxchg_op(var, oval, nval) \ | ||
317 | ({ \ | ||
318 | typeof(var) pco_ret__; \ | ||
319 | typeof(var) pco_old__ = (oval); \ | ||
320 | typeof(var) pco_new__ = (nval); \ | ||
321 | switch (sizeof(var)) { \ | ||
322 | case 1: \ | ||
323 | asm("cmpxchgb %2, "__percpu_arg(1) \ | ||
324 | : "=a" (pco_ret__), "+m" (var) \ | ||
325 | : "q" (pco_new__), "0" (pco_old__) \ | ||
326 | : "memory"); \ | ||
327 | break; \ | ||
328 | case 2: \ | ||
329 | asm("cmpxchgw %2, "__percpu_arg(1) \ | ||
330 | : "=a" (pco_ret__), "+m" (var) \ | ||
331 | : "r" (pco_new__), "0" (pco_old__) \ | ||
332 | : "memory"); \ | ||
333 | break; \ | ||
334 | case 4: \ | ||
335 | asm("cmpxchgl %2, "__percpu_arg(1) \ | ||
336 | : "=a" (pco_ret__), "+m" (var) \ | ||
337 | : "r" (pco_new__), "0" (pco_old__) \ | ||
338 | : "memory"); \ | ||
339 | break; \ | ||
340 | case 8: \ | ||
341 | asm("cmpxchgq %2, "__percpu_arg(1) \ | ||
342 | : "=a" (pco_ret__), "+m" (var) \ | ||
343 | : "r" (pco_new__), "0" (pco_old__) \ | ||
344 | : "memory"); \ | ||
345 | break; \ | ||
346 | default: __bad_percpu_size(); \ | ||
347 | } \ | ||
348 | pco_ret__; \ | ||
349 | }) | ||
350 | |||
351 | /* | ||
233 | * percpu_read() makes gcc load the percpu variable every time it is | 352 | * percpu_read() makes gcc load the percpu variable every time it is |
234 | * accessed while percpu_read_stable() allows the value to be cached. | 353 | * accessed while percpu_read_stable() allows the value to be cached. |
235 | * percpu_read_stable() is more efficient and can be used if its value | 354 | * percpu_read_stable() is more efficient and can be used if its value |
@@ -267,6 +386,12 @@ do { \ | |||
267 | #define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | 386 | #define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) |
268 | #define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 387 | #define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
269 | #define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 388 | #define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
389 | /* | ||
390 | * Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much | ||
391 | * faster than an xchg with forced lock semantics. | ||
392 | */ | ||
393 | #define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
394 | #define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
270 | 395 | ||
271 | #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 396 | #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
272 | #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 397 | #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
@@ -286,6 +411,11 @@ do { \ | |||
286 | #define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | 411 | #define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) |
287 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 412 | #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
288 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 413 | #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
414 | #define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) | ||
415 | #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | ||
416 | #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | ||
417 | #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
418 | #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
289 | 419 | ||
290 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) | 420 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
291 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) | 421 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
@@ -299,6 +429,31 @@ do { \ | |||
299 | #define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | 429 | #define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) |
300 | #define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 430 | #define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
301 | #define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 431 | #define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
432 | #define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) | ||
433 | #define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | ||
434 | #define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | ||
435 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
436 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
437 | |||
438 | #ifndef CONFIG_M386 | ||
439 | #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) | ||
440 | #define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) | ||
441 | #define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) | ||
442 | #define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
443 | #define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
444 | #define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
445 | |||
446 | #define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) | ||
447 | #define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) | ||
448 | #define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) | ||
449 | #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
450 | #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
451 | #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
452 | |||
453 | #define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
454 | #define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
455 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
456 | #endif /* !CONFIG_M386 */ | ||
302 | 457 | ||
303 | /* | 458 | /* |
304 | * Per cpu atomic 64 bit operations are only available under 64 bit. | 459 | * Per cpu atomic 64 bit operations are only available under 64 bit. |
@@ -311,6 +466,7 @@ do { \ | |||
311 | #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 466 | #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
312 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 467 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
313 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 468 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
469 | #define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) | ||
314 | 470 | ||
315 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 471 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
316 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) | 472 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) |
@@ -318,12 +474,12 @@ do { \ | |||
318 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 474 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
319 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 475 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
320 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 476 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
477 | #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) | ||
321 | 478 | ||
322 | #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) | 479 | #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
323 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 480 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
324 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 481 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
325 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 482 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
326 | |||
327 | #endif | 483 | #endif |
328 | 484 | ||
329 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ | 485 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ |