aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/sparc64/kernel/smp.c113
1 files changed, 69 insertions, 44 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 6d458b35643c..2387a9b81be7 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -459,30 +459,35 @@ again:
459 } 459 }
460} 460}
461 461
462static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) 462static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
463{ 463{
464 u64 *mondo, data0, data1, data2;
465 u16 *cpu_list;
464 u64 pstate; 466 u64 pstate;
465 int i; 467 int i;
466 468
467 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); 469 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
468 for_each_cpu_mask_nr(i, *mask) 470 cpu_list = __va(tb->cpu_list_pa);
469 spitfire_xcall_helper(data0, data1, data2, pstate, i); 471 mondo = __va(tb->cpu_mondo_block_pa);
472 data0 = mondo[0];
473 data1 = mondo[1];
474 data2 = mondo[2];
475 for (i = 0; i < cnt; i++)
476 spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
470} 477}
471 478
472/* Cheetah now allows to send the whole 64-bytes of data in the interrupt 479/* Cheetah now allows to send the whole 64-bytes of data in the interrupt
473 * packet, but we have no use for that. However we do take advantage of 480 * packet, but we have no use for that. However we do take advantage of
474 * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). 481 * the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
475 */ 482 */
476static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask_p) 483static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
477{ 484{
478 u64 pstate, ver, busy_mask;
479 int nack_busy_id, is_jbus, need_more; 485 int nack_busy_id, is_jbus, need_more;
480 cpumask_t mask; 486 u64 *mondo, pstate, ver, busy_mask;
481 487 u16 *cpu_list;
482 if (cpus_empty(*mask_p))
483 return;
484 488
485 mask = *mask_p; 489 cpu_list = __va(tb->cpu_list_pa);
490 mondo = __va(tb->cpu_mondo_block_pa);
486 491
487 /* Unfortunately, someone at Sun had the brilliant idea to make the 492 /* Unfortunately, someone at Sun had the brilliant idea to make the
488 * busy/nack fields hard-coded by ITID number for this Ultra-III 493 * busy/nack fields hard-coded by ITID number for this Ultra-III
@@ -505,7 +510,7 @@ retry:
505 "stxa %2, [%5] %6\n\t" 510 "stxa %2, [%5] %6\n\t"
506 "membar #Sync\n\t" 511 "membar #Sync\n\t"
507 : /* no outputs */ 512 : /* no outputs */
508 : "r" (data0), "r" (data1), "r" (data2), 513 : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
509 "r" (0x40), "r" (0x50), "r" (0x60), 514 "r" (0x40), "r" (0x50), "r" (0x60),
510 "i" (ASI_INTR_W)); 515 "i" (ASI_INTR_W));
511 516
@@ -514,11 +519,16 @@ retry:
514 { 519 {
515 int i; 520 int i;
516 521
517 for_each_cpu_mask_nr(i, mask) { 522 for (i = 0; i < cnt; i++) {
518 u64 target = (i << 14) | 0x70; 523 u64 target, nr;
524
525 nr = cpu_list[i];
526 if (nr == 0xffff)
527 continue;
519 528
529 target = (nr << 14) | 0x70;
520 if (is_jbus) { 530 if (is_jbus) {
521 busy_mask |= (0x1UL << (i * 2)); 531 busy_mask |= (0x1UL << (nr * 2));
522 } else { 532 } else {
523 target |= (nack_busy_id << 24); 533 target |= (nack_busy_id << 24);
524 busy_mask |= (0x1UL << 534 busy_mask |= (0x1UL <<
@@ -552,11 +562,13 @@ retry:
552 __asm__ __volatile__("wrpr %0, 0x0, %%pstate" 562 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
553 : : "r" (pstate)); 563 : : "r" (pstate));
554 if (unlikely(need_more)) { 564 if (unlikely(need_more)) {
555 int i, cnt = 0; 565 int i, this_cnt = 0;
556 for_each_cpu_mask_nr(i, mask) { 566 for (i = 0; i < cnt; i++) {
557 cpu_clear(i, mask); 567 if (cpu_list[i] == 0xffff)
558 cnt++; 568 continue;
559 if (cnt == 32) 569 cpu_list[i] = 0xffff;
570 this_cnt++;
571 if (this_cnt == 32)
560 break; 572 break;
561 } 573 }
562 goto retry; 574 goto retry;
@@ -587,16 +599,20 @@ retry:
587 /* Clear out the mask bits for cpus which did not 599 /* Clear out the mask bits for cpus which did not
588 * NACK us. 600 * NACK us.
589 */ 601 */
590 for_each_cpu_mask_nr(i, mask) { 602 for (i = 0; i < cnt; i++) {
591 u64 check_mask; 603 u64 check_mask, nr;
604
605 nr = cpu_list[i];
606 if (nr == 0xffff)
607 continue;
592 608
593 if (is_jbus) 609 if (is_jbus)
594 check_mask = (0x2UL << (2*i)); 610 check_mask = (0x2UL << (2*nr));
595 else 611 else
596 check_mask = (0x2UL << 612 check_mask = (0x2UL <<
597 this_busy_nack); 613 this_busy_nack);
598 if ((dispatch_stat & check_mask) == 0) 614 if ((dispatch_stat & check_mask) == 0)
599 cpu_clear(i, mask); 615 cpu_list[i] = 0xffff;
600 this_busy_nack += 2; 616 this_busy_nack += 2;
601 if (this_busy_nack == 64) 617 if (this_busy_nack == 64)
602 break; 618 break;
@@ -608,34 +624,17 @@ retry:
608} 624}
609 625
610/* Multi-cpu list version. */ 626/* Multi-cpu list version. */
611static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) 627static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
612{ 628{
613 int cnt, retries, this_cpu, prev_sent, i; 629 int retries, this_cpu, prev_sent, i;
614 unsigned long status; 630 unsigned long status;
615 cpumask_t error_mask; 631 cpumask_t error_mask;
616 struct trap_per_cpu *tb;
617 u16 *cpu_list; 632 u16 *cpu_list;
618 u64 *mondo;
619
620 if (cpus_empty(*mask))
621 return;
622 633
623 this_cpu = smp_processor_id(); 634 this_cpu = smp_processor_id();
624 tb = &trap_block[this_cpu];
625
626 mondo = __va(tb->cpu_mondo_block_pa);
627 mondo[0] = data0;
628 mondo[1] = data1;
629 mondo[2] = data2;
630 wmb();
631 635
632 cpu_list = __va(tb->cpu_list_pa); 636 cpu_list = __va(tb->cpu_list_pa);
633 637
634 /* Setup the initial cpu list. */
635 cnt = 0;
636 for_each_cpu_mask_nr(i, *mask)
637 cpu_list[cnt++] = i;
638
639 cpus_clear(error_mask); 638 cpus_clear(error_mask);
640 retries = 0; 639 retries = 0;
641 prev_sent = 0; 640 prev_sent = 0;
@@ -743,11 +742,15 @@ dump_cpu_list_and_out:
743 printk("]\n"); 742 printk("]\n");
744} 743}
745 744
746static void (*xcall_deliver_impl)(u64, u64, u64, const cpumask_t *); 745static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
747 746
748static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) 747static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
749{ 748{
749 struct trap_per_cpu *tb;
750 int this_cpu, i, cnt;
750 unsigned long flags; 751 unsigned long flags;
752 u16 *cpu_list;
753 u64 *mondo;
751 754
752 /* We have to do this whole thing with interrupts fully disabled. 755 /* We have to do this whole thing with interrupts fully disabled.
753 * Otherwise if we send an xcall from interrupt context it will 756 * Otherwise if we send an xcall from interrupt context it will
@@ -760,7 +763,29 @@ static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask
760 * Fortunately, udelay() uses %stick/%tick so we can use that. 763 * Fortunately, udelay() uses %stick/%tick so we can use that.
761 */ 764 */
762 local_irq_save(flags); 765 local_irq_save(flags);
763 xcall_deliver_impl(data0, data1, data2, mask); 766
767 this_cpu = smp_processor_id();
768 tb = &trap_block[this_cpu];
769
770 mondo = __va(tb->cpu_mondo_block_pa);
771 mondo[0] = data0;
772 mondo[1] = data1;
773 mondo[2] = data2;
774 wmb();
775
776 cpu_list = __va(tb->cpu_list_pa);
777
778 /* Setup the initial cpu list. */
779 cnt = 0;
780 for_each_cpu_mask_nr(i, *mask) {
781 if (i == this_cpu || !cpu_online(i))
782 continue;
783 cpu_list[cnt++] = i;
784 }
785
786 if (cnt)
787 xcall_deliver_impl(tb, cnt);
788
764 local_irq_restore(flags); 789 local_irq_restore(flags);
765} 790}
766 791