diff options
-rw-r--r-- | arch/sparc64/kernel/smp.c | 113 |
1 files changed, 69 insertions, 44 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 6d458b35643c..2387a9b81be7 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c | |||
@@ -459,30 +459,35 @@ again: | |||
459 | } | 459 | } |
460 | } | 460 | } |
461 | 461 | ||
462 | static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) | 462 | static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt) |
463 | { | 463 | { |
464 | u64 *mondo, data0, data1, data2; | ||
465 | u16 *cpu_list; | ||
464 | u64 pstate; | 466 | u64 pstate; |
465 | int i; | 467 | int i; |
466 | 468 | ||
467 | __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); | 469 | __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); |
468 | for_each_cpu_mask_nr(i, *mask) | 470 | cpu_list = __va(tb->cpu_list_pa); |
469 | spitfire_xcall_helper(data0, data1, data2, pstate, i); | 471 | mondo = __va(tb->cpu_mondo_block_pa); |
472 | data0 = mondo[0]; | ||
473 | data1 = mondo[1]; | ||
474 | data2 = mondo[2]; | ||
475 | for (i = 0; i < cnt; i++) | ||
476 | spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]); | ||
470 | } | 477 | } |
471 | 478 | ||
472 | /* Cheetah now allows to send the whole 64-bytes of data in the interrupt | 479 | /* Cheetah now allows to send the whole 64-bytes of data in the interrupt |
473 | * packet, but we have no use for that. However we do take advantage of | 480 | * packet, but we have no use for that. However we do take advantage of |
474 | * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). | 481 | * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). |
475 | */ | 482 | */ |
476 | static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask_p) | 483 | static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt) |
477 | { | 484 | { |
478 | u64 pstate, ver, busy_mask; | ||
479 | int nack_busy_id, is_jbus, need_more; | 485 | int nack_busy_id, is_jbus, need_more; |
480 | cpumask_t mask; | 486 | u64 *mondo, pstate, ver, busy_mask; |
481 | 487 | u16 *cpu_list; | |
482 | if (cpus_empty(*mask_p)) | ||
483 | return; | ||
484 | 488 | ||
485 | mask = *mask_p; | 489 | cpu_list = __va(tb->cpu_list_pa); |
490 | mondo = __va(tb->cpu_mondo_block_pa); | ||
486 | 491 | ||
487 | /* Unfortunately, someone at Sun had the brilliant idea to make the | 492 | /* Unfortunately, someone at Sun had the brilliant idea to make the |
488 | * busy/nack fields hard-coded by ITID number for this Ultra-III | 493 | * busy/nack fields hard-coded by ITID number for this Ultra-III |
@@ -505,7 +510,7 @@ retry: | |||
505 | "stxa %2, [%5] %6\n\t" | 510 | "stxa %2, [%5] %6\n\t" |
506 | "membar #Sync\n\t" | 511 | "membar #Sync\n\t" |
507 | : /* no outputs */ | 512 | : /* no outputs */ |
508 | : "r" (data0), "r" (data1), "r" (data2), | 513 | : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]), |
509 | "r" (0x40), "r" (0x50), "r" (0x60), | 514 | "r" (0x40), "r" (0x50), "r" (0x60), |
510 | "i" (ASI_INTR_W)); | 515 | "i" (ASI_INTR_W)); |
511 | 516 | ||
@@ -514,11 +519,16 @@ retry: | |||
514 | { | 519 | { |
515 | int i; | 520 | int i; |
516 | 521 | ||
517 | for_each_cpu_mask_nr(i, mask) { | 522 | for (i = 0; i < cnt; i++) { |
518 | u64 target = (i << 14) | 0x70; | 523 | u64 target, nr; |
524 | |||
525 | nr = cpu_list[i]; | ||
526 | if (nr == 0xffff) | ||
527 | continue; | ||
519 | 528 | ||
529 | target = (nr << 14) | 0x70; | ||
520 | if (is_jbus) { | 530 | if (is_jbus) { |
521 | busy_mask |= (0x1UL << (i * 2)); | 531 | busy_mask |= (0x1UL << (nr * 2)); |
522 | } else { | 532 | } else { |
523 | target |= (nack_busy_id << 24); | 533 | target |= (nack_busy_id << 24); |
524 | busy_mask |= (0x1UL << | 534 | busy_mask |= (0x1UL << |
@@ -552,11 +562,13 @@ retry: | |||
552 | __asm__ __volatile__("wrpr %0, 0x0, %%pstate" | 562 | __asm__ __volatile__("wrpr %0, 0x0, %%pstate" |
553 | : : "r" (pstate)); | 563 | : : "r" (pstate)); |
554 | if (unlikely(need_more)) { | 564 | if (unlikely(need_more)) { |
555 | int i, cnt = 0; | 565 | int i, this_cnt = 0; |
556 | for_each_cpu_mask_nr(i, mask) { | 566 | for (i = 0; i < cnt; i++) { |
557 | cpu_clear(i, mask); | 567 | if (cpu_list[i] == 0xffff) |
558 | cnt++; | 568 | continue; |
559 | if (cnt == 32) | 569 | cpu_list[i] = 0xffff; |
570 | this_cnt++; | ||
571 | if (this_cnt == 32) | ||
560 | break; | 572 | break; |
561 | } | 573 | } |
562 | goto retry; | 574 | goto retry; |
@@ -587,16 +599,20 @@ retry: | |||
587 | /* Clear out the mask bits for cpus which did not | 599 | /* Clear out the mask bits for cpus which did not |
588 | * NACK us. | 600 | * NACK us. |
589 | */ | 601 | */ |
590 | for_each_cpu_mask_nr(i, mask) { | 602 | for (i = 0; i < cnt; i++) { |
591 | u64 check_mask; | 603 | u64 check_mask, nr; |
604 | |||
605 | nr = cpu_list[i]; | ||
606 | if (nr == 0xffff) | ||
607 | continue; | ||
592 | 608 | ||
593 | if (is_jbus) | 609 | if (is_jbus) |
594 | check_mask = (0x2UL << (2*i)); | 610 | check_mask = (0x2UL << (2*nr)); |
595 | else | 611 | else |
596 | check_mask = (0x2UL << | 612 | check_mask = (0x2UL << |
597 | this_busy_nack); | 613 | this_busy_nack); |
598 | if ((dispatch_stat & check_mask) == 0) | 614 | if ((dispatch_stat & check_mask) == 0) |
599 | cpu_clear(i, mask); | 615 | cpu_list[i] = 0xffff; |
600 | this_busy_nack += 2; | 616 | this_busy_nack += 2; |
601 | if (this_busy_nack == 64) | 617 | if (this_busy_nack == 64) |
602 | break; | 618 | break; |
@@ -608,34 +624,17 @@ retry: | |||
608 | } | 624 | } |
609 | 625 | ||
610 | /* Multi-cpu list version. */ | 626 | /* Multi-cpu list version. */ |
611 | static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) | 627 | static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) |
612 | { | 628 | { |
613 | int cnt, retries, this_cpu, prev_sent, i; | 629 | int retries, this_cpu, prev_sent, i; |
614 | unsigned long status; | 630 | unsigned long status; |
615 | cpumask_t error_mask; | 631 | cpumask_t error_mask; |
616 | struct trap_per_cpu *tb; | ||
617 | u16 *cpu_list; | 632 | u16 *cpu_list; |
618 | u64 *mondo; | ||
619 | |||
620 | if (cpus_empty(*mask)) | ||
621 | return; | ||
622 | 633 | ||
623 | this_cpu = smp_processor_id(); | 634 | this_cpu = smp_processor_id(); |
624 | tb = &trap_block[this_cpu]; | ||
625 | |||
626 | mondo = __va(tb->cpu_mondo_block_pa); | ||
627 | mondo[0] = data0; | ||
628 | mondo[1] = data1; | ||
629 | mondo[2] = data2; | ||
630 | wmb(); | ||
631 | 635 | ||
632 | cpu_list = __va(tb->cpu_list_pa); | 636 | cpu_list = __va(tb->cpu_list_pa); |
633 | 637 | ||
634 | /* Setup the initial cpu list. */ | ||
635 | cnt = 0; | ||
636 | for_each_cpu_mask_nr(i, *mask) | ||
637 | cpu_list[cnt++] = i; | ||
638 | |||
639 | cpus_clear(error_mask); | 638 | cpus_clear(error_mask); |
640 | retries = 0; | 639 | retries = 0; |
641 | prev_sent = 0; | 640 | prev_sent = 0; |
@@ -743,11 +742,15 @@ dump_cpu_list_and_out: | |||
743 | printk("]\n"); | 742 | printk("]\n"); |
744 | } | 743 | } |
745 | 744 | ||
746 | static void (*xcall_deliver_impl)(u64, u64, u64, const cpumask_t *); | 745 | static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); |
747 | 746 | ||
748 | static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) | 747 | static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) |
749 | { | 748 | { |
749 | struct trap_per_cpu *tb; | ||
750 | int this_cpu, i, cnt; | ||
750 | unsigned long flags; | 751 | unsigned long flags; |
752 | u16 *cpu_list; | ||
753 | u64 *mondo; | ||
751 | 754 | ||
752 | /* We have to do this whole thing with interrupts fully disabled. | 755 | /* We have to do this whole thing with interrupts fully disabled. |
753 | * Otherwise if we send an xcall from interrupt context it will | 756 | * Otherwise if we send an xcall from interrupt context it will |
@@ -760,7 +763,29 @@ static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask | |||
760 | * Fortunately, udelay() uses %stick/%tick so we can use that. | 763 | * Fortunately, udelay() uses %stick/%tick so we can use that. |
761 | */ | 764 | */ |
762 | local_irq_save(flags); | 765 | local_irq_save(flags); |
763 | xcall_deliver_impl(data0, data1, data2, mask); | 766 | |
767 | this_cpu = smp_processor_id(); | ||
768 | tb = &trap_block[this_cpu]; | ||
769 | |||
770 | mondo = __va(tb->cpu_mondo_block_pa); | ||
771 | mondo[0] = data0; | ||
772 | mondo[1] = data1; | ||
773 | mondo[2] = data2; | ||
774 | wmb(); | ||
775 | |||
776 | cpu_list = __va(tb->cpu_list_pa); | ||
777 | |||
778 | /* Setup the initial cpu list. */ | ||
779 | cnt = 0; | ||
780 | for_each_cpu_mask_nr(i, *mask) { | ||
781 | if (i == this_cpu || !cpu_online(i)) | ||
782 | continue; | ||
783 | cpu_list[cnt++] = i; | ||
784 | } | ||
785 | |||
786 | if (cnt) | ||
787 | xcall_deliver_impl(tb, cnt); | ||
788 | |||
764 | local_irq_restore(flags); | 789 | local_irq_restore(flags); |
765 | } | 790 | } |
766 | 791 | ||