aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-08-04 19:42:58 -0400
committerDavid S. Miller <davem@davemloft.net>2008-08-04 19:42:58 -0400
commit90f7ae8a55190f5edfb9fda957e25c994ed39ec4 (patch)
treeb815a08c25f4acf37b02a982c67c6d0efd2fe480 /arch
parentc02a5119e862dea9a1361182840d41ae1fe24227 (diff)
sparc64: Build cpu list and mondo block at top-level xcall_deliver().
Then modify all of the xcall dispatch implementations get passed and use this information. Now all of the xcall dispatch implementations do not need to be mindful of details such as "is current cpu in the list?" and "is cpu online?" Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch')
-rw-r--r--arch/sparc64/kernel/smp.c113
1 files changed, 69 insertions, 44 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 6d458b35643c..2387a9b81be7 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -459,30 +459,35 @@ again:
459 } 459 }
460} 460}
461 461
462static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) 462static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
463{ 463{
464 u64 *mondo, data0, data1, data2;
465 u16 *cpu_list;
464 u64 pstate; 466 u64 pstate;
465 int i; 467 int i;
466 468
467 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); 469 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
468 for_each_cpu_mask_nr(i, *mask) 470 cpu_list = __va(tb->cpu_list_pa);
469 spitfire_xcall_helper(data0, data1, data2, pstate, i); 471 mondo = __va(tb->cpu_mondo_block_pa);
472 data0 = mondo[0];
473 data1 = mondo[1];
474 data2 = mondo[2];
475 for (i = 0; i < cnt; i++)
476 spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
470} 477}
471 478
472/* Cheetah now allows to send the whole 64-bytes of data in the interrupt 479/* Cheetah now allows to send the whole 64-bytes of data in the interrupt
473 * packet, but we have no use for that. However we do take advantage of 480 * packet, but we have no use for that. However we do take advantage of
474 * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). 481 * the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
475 */ 482 */
476static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask_p) 483static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
477{ 484{
478 u64 pstate, ver, busy_mask;
479 int nack_busy_id, is_jbus, need_more; 485 int nack_busy_id, is_jbus, need_more;
480 cpumask_t mask; 486 u64 *mondo, pstate, ver, busy_mask;
481 487 u16 *cpu_list;
482 if (cpus_empty(*mask_p))
483 return;
484 488
485 mask = *mask_p; 489 cpu_list = __va(tb->cpu_list_pa);
490 mondo = __va(tb->cpu_mondo_block_pa);
486 491
487 /* Unfortunately, someone at Sun had the brilliant idea to make the 492 /* Unfortunately, someone at Sun had the brilliant idea to make the
488 * busy/nack fields hard-coded by ITID number for this Ultra-III 493 * busy/nack fields hard-coded by ITID number for this Ultra-III
@@ -505,7 +510,7 @@ retry:
505 "stxa %2, [%5] %6\n\t" 510 "stxa %2, [%5] %6\n\t"
506 "membar #Sync\n\t" 511 "membar #Sync\n\t"
507 : /* no outputs */ 512 : /* no outputs */
508 : "r" (data0), "r" (data1), "r" (data2), 513 : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
509 "r" (0x40), "r" (0x50), "r" (0x60), 514 "r" (0x40), "r" (0x50), "r" (0x60),
510 "i" (ASI_INTR_W)); 515 "i" (ASI_INTR_W));
511 516
@@ -514,11 +519,16 @@ retry:
514 { 519 {
515 int i; 520 int i;
516 521
517 for_each_cpu_mask_nr(i, mask) { 522 for (i = 0; i < cnt; i++) {
518 u64 target = (i << 14) | 0x70; 523 u64 target, nr;
524
525 nr = cpu_list[i];
526 if (nr == 0xffff)
527 continue;
519 528
529 target = (nr << 14) | 0x70;
520 if (is_jbus) { 530 if (is_jbus) {
521 busy_mask |= (0x1UL << (i * 2)); 531 busy_mask |= (0x1UL << (nr * 2));
522 } else { 532 } else {
523 target |= (nack_busy_id << 24); 533 target |= (nack_busy_id << 24);
524 busy_mask |= (0x1UL << 534 busy_mask |= (0x1UL <<
@@ -552,11 +562,13 @@ retry:
552 __asm__ __volatile__("wrpr %0, 0x0, %%pstate" 562 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
553 : : "r" (pstate)); 563 : : "r" (pstate));
554 if (unlikely(need_more)) { 564 if (unlikely(need_more)) {
555 int i, cnt = 0; 565 int i, this_cnt = 0;
556 for_each_cpu_mask_nr(i, mask) { 566 for (i = 0; i < cnt; i++) {
557 cpu_clear(i, mask); 567 if (cpu_list[i] == 0xffff)
558 cnt++; 568 continue;
559 if (cnt == 32) 569 cpu_list[i] = 0xffff;
570 this_cnt++;
571 if (this_cnt == 32)
560 break; 572 break;
561 } 573 }
562 goto retry; 574 goto retry;
@@ -587,16 +599,20 @@ retry:
587 /* Clear out the mask bits for cpus which did not 599 /* Clear out the mask bits for cpus which did not
588 * NACK us. 600 * NACK us.
589 */ 601 */
590 for_each_cpu_mask_nr(i, mask) { 602 for (i = 0; i < cnt; i++) {
591 u64 check_mask; 603 u64 check_mask, nr;
604
605 nr = cpu_list[i];
606 if (nr == 0xffff)
607 continue;
592 608
593 if (is_jbus) 609 if (is_jbus)
594 check_mask = (0x2UL << (2*i)); 610 check_mask = (0x2UL << (2*nr));
595 else 611 else
596 check_mask = (0x2UL << 612 check_mask = (0x2UL <<
597 this_busy_nack); 613 this_busy_nack);
598 if ((dispatch_stat & check_mask) == 0) 614 if ((dispatch_stat & check_mask) == 0)
599 cpu_clear(i, mask); 615 cpu_list[i] = 0xffff;
600 this_busy_nack += 2; 616 this_busy_nack += 2;
601 if (this_busy_nack == 64) 617 if (this_busy_nack == 64)
602 break; 618 break;
@@ -608,34 +624,17 @@ retry:
608} 624}
609 625
610/* Multi-cpu list version. */ 626/* Multi-cpu list version. */
611static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) 627static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
612{ 628{
613 int cnt, retries, this_cpu, prev_sent, i; 629 int retries, this_cpu, prev_sent, i;
614 unsigned long status; 630 unsigned long status;
615 cpumask_t error_mask; 631 cpumask_t error_mask;
616 struct trap_per_cpu *tb;
617 u16 *cpu_list; 632 u16 *cpu_list;
618 u64 *mondo;
619
620 if (cpus_empty(*mask))
621 return;
622 633
623 this_cpu = smp_processor_id(); 634 this_cpu = smp_processor_id();
624 tb = &trap_block[this_cpu];
625
626 mondo = __va(tb->cpu_mondo_block_pa);
627 mondo[0] = data0;
628 mondo[1] = data1;
629 mondo[2] = data2;
630 wmb();
631 635
632 cpu_list = __va(tb->cpu_list_pa); 636 cpu_list = __va(tb->cpu_list_pa);
633 637
634 /* Setup the initial cpu list. */
635 cnt = 0;
636 for_each_cpu_mask_nr(i, *mask)
637 cpu_list[cnt++] = i;
638
639 cpus_clear(error_mask); 638 cpus_clear(error_mask);
640 retries = 0; 639 retries = 0;
641 prev_sent = 0; 640 prev_sent = 0;
@@ -743,11 +742,15 @@ dump_cpu_list_and_out:
743 printk("]\n"); 742 printk("]\n");
744} 743}
745 744
746static void (*xcall_deliver_impl)(u64, u64, u64, const cpumask_t *); 745static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
747 746
748static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) 747static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
749{ 748{
749 struct trap_per_cpu *tb;
750 int this_cpu, i, cnt;
750 unsigned long flags; 751 unsigned long flags;
752 u16 *cpu_list;
753 u64 *mondo;
751 754
752 /* We have to do this whole thing with interrupts fully disabled. 755 /* We have to do this whole thing with interrupts fully disabled.
753 * Otherwise if we send an xcall from interrupt context it will 756 * Otherwise if we send an xcall from interrupt context it will
@@ -760,7 +763,29 @@ static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask
760 * Fortunately, udelay() uses %stick/%tick so we can use that. 763 * Fortunately, udelay() uses %stick/%tick so we can use that.
761 */ 764 */
762 local_irq_save(flags); 765 local_irq_save(flags);
763 xcall_deliver_impl(data0, data1, data2, mask); 766
767 this_cpu = smp_processor_id();
768 tb = &trap_block[this_cpu];
769
770 mondo = __va(tb->cpu_mondo_block_pa);
771 mondo[0] = data0;
772 mondo[1] = data1;
773 mondo[2] = data2;
774 wmb();
775
776 cpu_list = __va(tb->cpu_list_pa);
777
778 /* Setup the initial cpu list. */
779 cnt = 0;
780 for_each_cpu_mask_nr(i, *mask) {
781 if (i == this_cpu || !cpu_online(i))
782 continue;
783 cpu_list[cnt++] = i;
784 }
785
786 if (cnt)
787 xcall_deliver_impl(tb, cnt);
788
764 local_irq_restore(flags); 789 local_irq_restore(flags);
765} 790}
766 791