aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/align.c
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2006-06-07 02:14:40 -0400
committerPaul Mackerras <paulus@samba.org>2006-06-09 07:24:15 -0400
commitfab5db97e44f76461f76b24adfa8ccb14d4df498 (patch)
tree123026a1a6f1702468220189b7410077479ae8a2 /arch/powerpc/kernel/align.c
parent651d765d0b2c72d33430487c8b6ef64c60cd2134 (diff)
[PATCH] powerpc: Implement support for setting little-endian mode via prctl
This adds the PowerPC part of the code to allow processes to change their endian mode via prctl. This also extends the alignment exception handler to be able to fix up alignment exceptions that occur in little-endian mode, both for "PowerPC" little-endian and true little-endian. We always enter signal handlers in big-endian mode -- the support for little-endian mode does not amount to the creation of a little-endian user/kernel ABI. If the signal handler returns, the endian mode is restored to what it was when the signal was delivered. We have two new kernel CPU feature bits, one for PPC little-endian and one for true little-endian. Most of the classic 32-bit processors support PPC little-endian, and this is reflected in the CPU feature table. There are two corresponding feature bits reported to userland in the AT_HWCAP aux vector entry. This is based on an earlier patch by Anton Blanchard. Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/kernel/align.c')
-rw-r--r--arch/powerpc/kernel/align.c189
1 files changed, 118 insertions, 71 deletions
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index faaec9c6f78..4734b5de599 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -35,17 +35,19 @@ struct aligninfo {
35 35
36#define INVALID { 0, 0 } 36#define INVALID { 0, 0 }
37 37
38#define LD 1 /* load */ 38/* Bits in the flags field */
39#define ST 2 /* store */ 39#define LD 0 /* load */
40#define SE 4 /* sign-extend value */ 40#define ST 1 /* store */
41#define F 8 /* to/from fp regs */ 41#define SE 2 /* sign-extend value */
42#define U 0x10 /* update index register */ 42#define F 4 /* to/from fp regs */
43#define M 0x20 /* multiple load/store */ 43#define U 8 /* update index register */
44#define SW 0x40 /* byte swap int or ... */ 44#define M 0x10 /* multiple load/store */
45#define S 0x40 /* ... single-precision fp */ 45#define SW 0x20 /* byte swap */
46#define SX 0x40 /* byte count in XER */ 46#define S 0x40 /* single-precision fp or... */
47#define SX 0x40 /* ... byte count in XER */
47#define HARD 0x80 /* string, stwcx. */ 48#define HARD 0x80 /* string, stwcx. */
48 49
50/* DSISR bits reported for a DCBZ instruction: */
49#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */ 51#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */
50 52
51#define SWAP(a, b) (t = (a), (a) = (b), (b) = t) 53#define SWAP(a, b) (t = (a), (a) = (b), (b) = t)
@@ -256,12 +258,16 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
256#define REG_BYTE(rp, i) *((u8 *)(rp) + (i)) 258#define REG_BYTE(rp, i) *((u8 *)(rp) + (i))
257#endif 259#endif
258 260
261#define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz))
262
259static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr, 263static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
260 unsigned int reg, unsigned int nb, 264 unsigned int reg, unsigned int nb,
261 unsigned int flags, unsigned int instr) 265 unsigned int flags, unsigned int instr,
266 unsigned long swiz)
262{ 267{
263 unsigned long *rptr; 268 unsigned long *rptr;
264 unsigned int nb0, i; 269 unsigned int nb0, i, bswiz;
270 unsigned long p;
265 271
266 /* 272 /*
267 * We do not try to emulate 8 bytes multiple as they aren't really 273 * We do not try to emulate 8 bytes multiple as they aren't really
@@ -280,9 +286,12 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
280 if (nb == 0) 286 if (nb == 0)
281 return 1; 287 return 1;
282 } else { 288 } else {
283 if (__get_user(instr, 289 unsigned long pc = regs->nip ^ (swiz & 4);
284 (unsigned int __user *)regs->nip)) 290
291 if (__get_user(instr, (unsigned int __user *)pc))
285 return -EFAULT; 292 return -EFAULT;
293 if (swiz == 0 && (flags & SW))
294 instr = cpu_to_le32(instr);
286 nb = (instr >> 11) & 0x1f; 295 nb = (instr >> 11) & 0x1f;
287 if (nb == 0) 296 if (nb == 0)
288 nb = 32; 297 nb = 32;
@@ -300,7 +309,10 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
300 return -EFAULT; /* bad address */ 309 return -EFAULT; /* bad address */
301 310
302 rptr = &regs->gpr[reg]; 311 rptr = &regs->gpr[reg];
303 if (flags & LD) { 312 p = (unsigned long) addr;
313 bswiz = (flags & SW)? 3: 0;
314
315 if (!(flags & ST)) {
304 /* 316 /*
305 * This zeroes the top 4 bytes of the affected registers 317 * This zeroes the top 4 bytes of the affected registers
306 * in 64-bit mode, and also zeroes out any remaining 318 * in 64-bit mode, and also zeroes out any remaining
@@ -311,26 +323,28 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
311 memset(&regs->gpr[0], 0, 323 memset(&regs->gpr[0], 0,
312 ((nb0 + 3) / 4) * sizeof(unsigned long)); 324 ((nb0 + 3) / 4) * sizeof(unsigned long));
313 325
314 for (i = 0; i < nb; ++i) 326 for (i = 0; i < nb; ++i, ++p)
315 if (__get_user(REG_BYTE(rptr, i), addr + i)) 327 if (__get_user(REG_BYTE(rptr, i ^ bswiz), SWIZ_PTR(p)))
316 return -EFAULT; 328 return -EFAULT;
317 if (nb0 > 0) { 329 if (nb0 > 0) {
318 rptr = &regs->gpr[0]; 330 rptr = &regs->gpr[0];
319 addr += nb; 331 addr += nb;
320 for (i = 0; i < nb0; ++i) 332 for (i = 0; i < nb0; ++i, ++p)
321 if (__get_user(REG_BYTE(rptr, i), addr + i)) 333 if (__get_user(REG_BYTE(rptr, i ^ bswiz),
334 SWIZ_PTR(p)))
322 return -EFAULT; 335 return -EFAULT;
323 } 336 }
324 337
325 } else { 338 } else {
326 for (i = 0; i < nb; ++i) 339 for (i = 0; i < nb; ++i, ++p)
327 if (__put_user(REG_BYTE(rptr, i), addr + i)) 340 if (__put_user(REG_BYTE(rptr, i ^ bswiz), SWIZ_PTR(p)))
328 return -EFAULT; 341 return -EFAULT;
329 if (nb0 > 0) { 342 if (nb0 > 0) {
330 rptr = &regs->gpr[0]; 343 rptr = &regs->gpr[0];
331 addr += nb; 344 addr += nb;
332 for (i = 0; i < nb0; ++i) 345 for (i = 0; i < nb0; ++i, ++p)
333 if (__put_user(REG_BYTE(rptr, i), addr + i)) 346 if (__put_user(REG_BYTE(rptr, i ^ bswiz),
347 SWIZ_PTR(p)))
334 return -EFAULT; 348 return -EFAULT;
335 } 349 }
336 } 350 }
@@ -352,7 +366,7 @@ int fix_alignment(struct pt_regs *regs)
352 unsigned int reg, areg; 366 unsigned int reg, areg;
353 unsigned int dsisr; 367 unsigned int dsisr;
354 unsigned char __user *addr; 368 unsigned char __user *addr;
355 unsigned char __user *p; 369 unsigned long p, swiz;
356 int ret, t; 370 int ret, t;
357 union { 371 union {
358 u64 ll; 372 u64 ll;
@@ -380,11 +394,15 @@ int fix_alignment(struct pt_regs *regs)
380 * let's make one up from the instruction 394 * let's make one up from the instruction
381 */ 395 */
382 if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) { 396 if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
383 unsigned int real_instr; 397 unsigned long pc = regs->nip;
384 if (unlikely(__get_user(real_instr, 398
385 (unsigned int __user *)regs->nip))) 399 if (cpu_has_feature(CPU_FTR_PPC_LE) && (regs->msr & MSR_LE))
400 pc ^= 4;
401 if (unlikely(__get_user(instr, (unsigned int __user *)pc)))
386 return -EFAULT; 402 return -EFAULT;
387 dsisr = make_dsisr(real_instr); 403 if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE))
404 instr = cpu_to_le32(instr);
405 dsisr = make_dsisr(instr);
388 } 406 }
389 407
390 /* extract the operation and registers from the dsisr */ 408 /* extract the operation and registers from the dsisr */
@@ -397,6 +415,24 @@ int fix_alignment(struct pt_regs *regs)
397 nb = aligninfo[instr].len; 415 nb = aligninfo[instr].len;
398 flags = aligninfo[instr].flags; 416 flags = aligninfo[instr].flags;
399 417
418 /* Byteswap little endian loads and stores */
419 swiz = 0;
420 if (regs->msr & MSR_LE) {
421 flags ^= SW;
422 /*
423 * So-called "PowerPC little endian" mode works by
424 * swizzling addresses rather than by actually doing
425 * any byte-swapping. To emulate this, we XOR each
426 * byte address with 7. We also byte-swap, because
427 * the processor's address swizzling depends on the
428 * operand size (it xors the address with 7 for bytes,
429 * 6 for halfwords, 4 for words, 0 for doublewords) but
430 * we will xor with 7 and load/store each byte separately.
431 */
432 if (cpu_has_feature(CPU_FTR_PPC_LE))
433 swiz = 7;
434 }
435
400 /* DAR has the operand effective address */ 436 /* DAR has the operand effective address */
401 addr = (unsigned char __user *)regs->dar; 437 addr = (unsigned char __user *)regs->dar;
402 438
@@ -412,7 +448,8 @@ int fix_alignment(struct pt_regs *regs)
412 * function 448 * function
413 */ 449 */
414 if (flags & M) 450 if (flags & M)
415 return emulate_multiple(regs, addr, reg, nb, flags, instr); 451 return emulate_multiple(regs, addr, reg, nb,
452 flags, instr, swiz);
416 453
417 /* Verify the address of the operand */ 454 /* Verify the address of the operand */
418 if (unlikely(user_mode(regs) && 455 if (unlikely(user_mode(regs) &&
@@ -431,51 +468,71 @@ int fix_alignment(struct pt_regs *regs)
431 /* If we are loading, get the data from user space, else 468 /* If we are loading, get the data from user space, else
432 * get it from register values 469 * get it from register values
433 */ 470 */
434 if (flags & LD) { 471 if (!(flags & ST)) {
435 data.ll = 0; 472 data.ll = 0;
436 ret = 0; 473 ret = 0;
437 p = addr; 474 p = (unsigned long) addr;
438 switch (nb) { 475 switch (nb) {
439 case 8: 476 case 8:
440 ret |= __get_user(data.v[0], p++); 477 ret |= __get_user(data.v[0], SWIZ_PTR(p++));
441 ret |= __get_user(data.v[1], p++); 478 ret |= __get_user(data.v[1], SWIZ_PTR(p++));
442 ret |= __get_user(data.v[2], p++); 479 ret |= __get_user(data.v[2], SWIZ_PTR(p++));
443 ret |= __get_user(data.v[3], p++); 480 ret |= __get_user(data.v[3], SWIZ_PTR(p++));
444 case 4: 481 case 4:
445 ret |= __get_user(data.v[4], p++); 482 ret |= __get_user(data.v[4], SWIZ_PTR(p++));
446 ret |= __get_user(data.v[5], p++); 483 ret |= __get_user(data.v[5], SWIZ_PTR(p++));
447 case 2: 484 case 2:
448 ret |= __get_user(data.v[6], p++); 485 ret |= __get_user(data.v[6], SWIZ_PTR(p++));
449 ret |= __get_user(data.v[7], p++); 486 ret |= __get_user(data.v[7], SWIZ_PTR(p++));
450 if (unlikely(ret)) 487 if (unlikely(ret))
451 return -EFAULT; 488 return -EFAULT;
452 } 489 }
453 } else if (flags & F) 490 } else if (flags & F) {
454 data.dd = current->thread.fpr[reg]; 491 data.dd = current->thread.fpr[reg];
455 else 492 if (flags & S) {
493 /* Single-precision FP store requires conversion... */
494#ifdef CONFIG_PPC_FPU
495 preempt_disable();
496 enable_kernel_fp();
497 cvt_df(&data.dd, (float *)&data.v[4], &current->thread);
498 preempt_enable();
499#else
500 return 0;
501#endif
502 }
503 } else
456 data.ll = regs->gpr[reg]; 504 data.ll = regs->gpr[reg];
457 505
458 /* Perform other misc operations like sign extension, byteswap, 506 if (flags & SW) {
507 switch (nb) {
508 case 8:
509 SWAP(data.v[0], data.v[7]);
510 SWAP(data.v[1], data.v[6]);
511 SWAP(data.v[2], data.v[5]);
512 SWAP(data.v[3], data.v[4]);
513 break;
514 case 4:
515 SWAP(data.v[4], data.v[7]);
516 SWAP(data.v[5], data.v[6]);
517 break;
518 case 2:
519 SWAP(data.v[6], data.v[7]);
520 break;
521 }
522 }
523
524 /* Perform other misc operations like sign extension
459 * or floating point single precision conversion 525 * or floating point single precision conversion
460 */ 526 */
461 switch (flags & ~U) { 527 switch (flags & ~(U|SW)) {
462 case LD+SE: /* sign extend */ 528 case LD+SE: /* sign extend */
463 if ( nb == 2 ) 529 if ( nb == 2 )
464 data.ll = data.x16.low16; 530 data.ll = data.x16.low16;
465 else /* nb must be 4 */ 531 else /* nb must be 4 */
466 data.ll = data.x32.low32; 532 data.ll = data.x32.low32;
467 break; 533 break;
468 case LD+S: /* byte-swap */
469 case ST+S:
470 if (nb == 2) {
471 SWAP(data.v[6], data.v[7]);
472 } else {
473 SWAP(data.v[4], data.v[7]);
474 SWAP(data.v[5], data.v[6]);
475 }
476 break;
477 534
478 /* Single-precision FP load and store require conversions... */ 535 /* Single-precision FP load requires conversion... */
479 case LD+F+S: 536 case LD+F+S:
480#ifdef CONFIG_PPC_FPU 537#ifdef CONFIG_PPC_FPU
481 preempt_disable(); 538 preempt_disable();
@@ -486,34 +543,24 @@ int fix_alignment(struct pt_regs *regs)
486 return 0; 543 return 0;
487#endif 544#endif
488 break; 545 break;
489 case ST+F+S:
490#ifdef CONFIG_PPC_FPU
491 preempt_disable();
492 enable_kernel_fp();
493 cvt_df(&data.dd, (float *)&data.v[4], &current->thread);
494 preempt_enable();
495#else
496 return 0;
497#endif
498 break;
499 } 546 }
500 547
501 /* Store result to memory or update registers */ 548 /* Store result to memory or update registers */
502 if (flags & ST) { 549 if (flags & ST) {
503 ret = 0; 550 ret = 0;
504 p = addr; 551 p = (unsigned long) addr;
505 switch (nb) { 552 switch (nb) {
506 case 8: 553 case 8:
507 ret |= __put_user(data.v[0], p++); 554 ret |= __put_user(data.v[0], SWIZ_PTR(p++));
508 ret |= __put_user(data.v[1], p++); 555 ret |= __put_user(data.v[1], SWIZ_PTR(p++));
509 ret |= __put_user(data.v[2], p++); 556 ret |= __put_user(data.v[2], SWIZ_PTR(p++));
510 ret |= __put_user(data.v[3], p++); 557 ret |= __put_user(data.v[3], SWIZ_PTR(p++));
511 case 4: 558 case 4:
512 ret |= __put_user(data.v[4], p++); 559 ret |= __put_user(data.v[4], SWIZ_PTR(p++));
513 ret |= __put_user(data.v[5], p++); 560 ret |= __put_user(data.v[5], SWIZ_PTR(p++));
514 case 2: 561 case 2:
515 ret |= __put_user(data.v[6], p++); 562 ret |= __put_user(data.v[6], SWIZ_PTR(p++));
516 ret |= __put_user(data.v[7], p++); 563 ret |= __put_user(data.v[7], SWIZ_PTR(p++));
517 } 564 }
518 if (unlikely(ret)) 565 if (unlikely(ret))
519 return -EFAULT; 566 return -EFAULT;