aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-08-04 22:04:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-08-04 22:04:36 -0400
commite9ba9698187ddbc0c5bfcf41de0349a662d23d02 (patch)
treeb11f8658d0bb05fe8bd826de1ead328d44e84005
parent2e1e9212ed8c532c6b324de77d3cafef5d2bc846 (diff)
parentae583885bfd07474789059cdef399289bd66c8d0 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6: sparc64: Remove all cpumask_t local variables in xcall dispatch. sparc64: Kill error_mask from hypervisor_xcall_deliver(). sparc64: Build cpu list and mondo block at top-level xcall_deliver(). sparc64: Disable local interrupts around xcall_deliver_impl() invocation. sparc64: Make all xcall_deliver's go through common helper function. sparc64: Always allocate the send mondo blocks, even on non-sun4v. sparc64: Make smp_cross_call_masked() take a cpumask_t pointer. sparc64: Directly call xcall_deliver() in smp_start_sync_tick_client. sparc64: Call xcall_deliver() directly in some cases. sparc64: Use cpumask_t pointers and for_each_cpu_mask_nr() in xcall_deliver. sparc64: Use xcall_deliver() consistently. sparc64: Use function pointer for cross-call sending. arch/sparc64/kernel/signal.c: removed duplicated #include sparc64: Need to disable preemption around smp_tsb_sync().
-rw-r--r--arch/sparc64/kernel/irq.c19
-rw-r--r--arch/sparc64/kernel/signal.c1
-rw-r--r--arch/sparc64/kernel/smp.c292
-rw-r--r--arch/sparc64/mm/tsb.c5
4 files changed, 166 insertions, 151 deletions
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index c481673d249c..ba43d85e8dde 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -915,12 +915,18 @@ static void __init sun4v_init_mondo_queues(void)
915 alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask); 915 alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask);
916 alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, 916 alloc_one_kbuf(&tb->nonresum_kernel_buf_pa,
917 tb->nonresum_qmask); 917 tb->nonresum_qmask);
918 }
919}
920
921static void __init init_send_mondo_info(void)
922{
923 int cpu;
924
925 for_each_possible_cpu(cpu) {
926 struct trap_per_cpu *tb = &trap_block[cpu];
918 927
919 init_cpu_send_mondo_info(tb); 928 init_cpu_send_mondo_info(tb);
920 } 929 }
921
922 /* Load up the boot cpu's entries. */
923 sun4v_register_mondo_queues(hard_smp_processor_id());
924} 930}
925 931
926static struct irqaction timer_irq_action = { 932static struct irqaction timer_irq_action = {
@@ -949,6 +955,13 @@ void __init init_IRQ(void)
949 if (tlb_type == hypervisor) 955 if (tlb_type == hypervisor)
950 sun4v_init_mondo_queues(); 956 sun4v_init_mondo_queues();
951 957
958 init_send_mondo_info();
959
960 if (tlb_type == hypervisor) {
961 /* Load up the boot cpu's entries. */
962 sun4v_register_mondo_queues(hard_smp_processor_id());
963 }
964
952 /* We need to clear any IRQ's pending in the soft interrupt 965 /* We need to clear any IRQ's pending in the soft interrupt
953 * registers, a spurious one could be left around from the 966 * registers, a spurious one could be left around from the
954 * PROM timer which we just disabled. 967 * PROM timer which we just disabled.
diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c
index ca5a6ae3a6e2..ec82d76dc6f2 100644
--- a/arch/sparc64/kernel/signal.c
+++ b/arch/sparc64/kernel/signal.c
@@ -23,7 +23,6 @@
23#include <linux/tty.h> 23#include <linux/tty.h>
24#include <linux/binfmts.h> 24#include <linux/binfmts.h>
25#include <linux/bitops.h> 25#include <linux/bitops.h>
26#include <linux/tracehook.h>
27 26
28#include <asm/uaccess.h> 27#include <asm/uaccess.h>
29#include <asm/ptrace.h> 28#include <asm/ptrace.h>
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 340842e51ce1..27b81775a4de 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -459,27 +459,35 @@ again:
459 } 459 }
460} 460}
461 461
462static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) 462static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
463{ 463{
464 u64 *mondo, data0, data1, data2;
465 u16 *cpu_list;
464 u64 pstate; 466 u64 pstate;
465 int i; 467 int i;
466 468
467 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); 469 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
468 for_each_cpu_mask(i, mask) 470 cpu_list = __va(tb->cpu_list_pa);
469 spitfire_xcall_helper(data0, data1, data2, pstate, i); 471 mondo = __va(tb->cpu_mondo_block_pa);
472 data0 = mondo[0];
473 data1 = mondo[1];
474 data2 = mondo[2];
475 for (i = 0; i < cnt; i++)
476 spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
470} 477}
471 478
472/* Cheetah now allows to send the whole 64-bytes of data in the interrupt 479/* Cheetah now allows to send the whole 64-bytes of data in the interrupt
473 * packet, but we have no use for that. However we do take advantage of 480 * packet, but we have no use for that. However we do take advantage of
474 * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). 481 * the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
475 */ 482 */
476static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) 483static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
477{ 484{
478 u64 pstate, ver, busy_mask;
479 int nack_busy_id, is_jbus, need_more; 485 int nack_busy_id, is_jbus, need_more;
486 u64 *mondo, pstate, ver, busy_mask;
487 u16 *cpu_list;
480 488
481 if (cpus_empty(mask)) 489 cpu_list = __va(tb->cpu_list_pa);
482 return; 490 mondo = __va(tb->cpu_mondo_block_pa);
483 491
484 /* Unfortunately, someone at Sun had the brilliant idea to make the 492 /* Unfortunately, someone at Sun had the brilliant idea to make the
485 * busy/nack fields hard-coded by ITID number for this Ultra-III 493 * busy/nack fields hard-coded by ITID number for this Ultra-III
@@ -502,7 +510,7 @@ retry:
502 "stxa %2, [%5] %6\n\t" 510 "stxa %2, [%5] %6\n\t"
503 "membar #Sync\n\t" 511 "membar #Sync\n\t"
504 : /* no outputs */ 512 : /* no outputs */
505 : "r" (data0), "r" (data1), "r" (data2), 513 : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
506 "r" (0x40), "r" (0x50), "r" (0x60), 514 "r" (0x40), "r" (0x50), "r" (0x60),
507 "i" (ASI_INTR_W)); 515 "i" (ASI_INTR_W));
508 516
@@ -511,11 +519,16 @@ retry:
511 { 519 {
512 int i; 520 int i;
513 521
514 for_each_cpu_mask(i, mask) { 522 for (i = 0; i < cnt; i++) {
515 u64 target = (i << 14) | 0x70; 523 u64 target, nr;
524
525 nr = cpu_list[i];
526 if (nr == 0xffff)
527 continue;
516 528
529 target = (nr << 14) | 0x70;
517 if (is_jbus) { 530 if (is_jbus) {
518 busy_mask |= (0x1UL << (i * 2)); 531 busy_mask |= (0x1UL << (nr * 2));
519 } else { 532 } else {
520 target |= (nack_busy_id << 24); 533 target |= (nack_busy_id << 24);
521 busy_mask |= (0x1UL << 534 busy_mask |= (0x1UL <<
@@ -549,11 +562,13 @@ retry:
549 __asm__ __volatile__("wrpr %0, 0x0, %%pstate" 562 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
550 : : "r" (pstate)); 563 : : "r" (pstate));
551 if (unlikely(need_more)) { 564 if (unlikely(need_more)) {
552 int i, cnt = 0; 565 int i, this_cnt = 0;
553 for_each_cpu_mask(i, mask) { 566 for (i = 0; i < cnt; i++) {
554 cpu_clear(i, mask); 567 if (cpu_list[i] == 0xffff)
555 cnt++; 568 continue;
556 if (cnt == 32) 569 cpu_list[i] = 0xffff;
570 this_cnt++;
571 if (this_cnt == 32)
557 break; 572 break;
558 } 573 }
559 goto retry; 574 goto retry;
@@ -584,16 +599,20 @@ retry:
584 /* Clear out the mask bits for cpus which did not 599 /* Clear out the mask bits for cpus which did not
585 * NACK us. 600 * NACK us.
586 */ 601 */
587 for_each_cpu_mask(i, mask) { 602 for (i = 0; i < cnt; i++) {
588 u64 check_mask; 603 u64 check_mask, nr;
604
605 nr = cpu_list[i];
606 if (nr == 0xffff)
607 continue;
589 608
590 if (is_jbus) 609 if (is_jbus)
591 check_mask = (0x2UL << (2*i)); 610 check_mask = (0x2UL << (2*nr));
592 else 611 else
593 check_mask = (0x2UL << 612 check_mask = (0x2UL <<
594 this_busy_nack); 613 this_busy_nack);
595 if ((dispatch_stat & check_mask) == 0) 614 if ((dispatch_stat & check_mask) == 0)
596 cpu_clear(i, mask); 615 cpu_list[i] = 0xffff;
597 this_busy_nack += 2; 616 this_busy_nack += 2;
598 if (this_busy_nack == 64) 617 if (this_busy_nack == 64)
599 break; 618 break;
@@ -605,47 +624,17 @@ retry:
605} 624}
606 625
607/* Multi-cpu list version. */ 626/* Multi-cpu list version. */
608static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) 627static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
609{ 628{
610 struct trap_per_cpu *tb; 629 int retries, this_cpu, prev_sent, i, saw_cpu_error;
630 unsigned long status;
611 u16 *cpu_list; 631 u16 *cpu_list;
612 u64 *mondo;
613 cpumask_t error_mask;
614 unsigned long flags, status;
615 int cnt, retries, this_cpu, prev_sent, i;
616
617 if (cpus_empty(mask))
618 return;
619
620 /* We have to do this whole thing with interrupts fully disabled.
621 * Otherwise if we send an xcall from interrupt context it will
622 * corrupt both our mondo block and cpu list state.
623 *
624 * One consequence of this is that we cannot use timeout mechanisms
625 * that depend upon interrupts being delivered locally. So, for
626 * example, we cannot sample jiffies and expect it to advance.
627 *
628 * Fortunately, udelay() uses %stick/%tick so we can use that.
629 */
630 local_irq_save(flags);
631 632
632 this_cpu = smp_processor_id(); 633 this_cpu = smp_processor_id();
633 tb = &trap_block[this_cpu];
634
635 mondo = __va(tb->cpu_mondo_block_pa);
636 mondo[0] = data0;
637 mondo[1] = data1;
638 mondo[2] = data2;
639 wmb();
640 634
641 cpu_list = __va(tb->cpu_list_pa); 635 cpu_list = __va(tb->cpu_list_pa);
642 636
643 /* Setup the initial cpu list. */ 637 saw_cpu_error = 0;
644 cnt = 0;
645 for_each_cpu_mask(i, mask)
646 cpu_list[cnt++] = i;
647
648 cpus_clear(error_mask);
649 retries = 0; 638 retries = 0;
650 prev_sent = 0; 639 prev_sent = 0;
651 do { 640 do {
@@ -690,10 +679,9 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t
690 continue; 679 continue;
691 680
692 err = sun4v_cpu_state(cpu); 681 err = sun4v_cpu_state(cpu);
693 if (err >= 0 && 682 if (err == HV_CPU_STATE_ERROR) {
694 err == HV_CPU_STATE_ERROR) { 683 saw_cpu_error = (cpu + 1);
695 cpu_list[i] = 0xffff; 684 cpu_list[i] = 0xffff;
696 cpu_set(cpu, error_mask);
697 } 685 }
698 } 686 }
699 } else if (unlikely(status != HV_EWOULDBLOCK)) 687 } else if (unlikely(status != HV_EWOULDBLOCK))
@@ -717,32 +705,24 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t
717 } 705 }
718 } while (1); 706 } while (1);
719 707
720 local_irq_restore(flags); 708 if (unlikely(saw_cpu_error))
721
722 if (unlikely(!cpus_empty(error_mask)))
723 goto fatal_mondo_cpu_error; 709 goto fatal_mondo_cpu_error;
724 710
725 return; 711 return;
726 712
727fatal_mondo_cpu_error: 713fatal_mondo_cpu_error:
728 printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " 714 printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
729 "were in error state\n", 715 "(including %d) were in error state\n",
730 this_cpu); 716 this_cpu, saw_cpu_error - 1);
731 printk(KERN_CRIT "CPU[%d]: Error mask [ ", this_cpu);
732 for_each_cpu_mask(i, error_mask)
733 printk("%d ", i);
734 printk("]\n");
735 return; 717 return;
736 718
737fatal_mondo_timeout: 719fatal_mondo_timeout:
738 local_irq_restore(flags);
739 printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " 720 printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
740 " progress after %d retries.\n", 721 " progress after %d retries.\n",
741 this_cpu, retries); 722 this_cpu, retries);
742 goto dump_cpu_list_and_out; 723 goto dump_cpu_list_and_out;
743 724
744fatal_mondo_error: 725fatal_mondo_error:
745 local_irq_restore(flags);
746 printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", 726 printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
747 this_cpu, status); 727 this_cpu, status);
748 printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " 728 printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
@@ -756,58 +736,93 @@ dump_cpu_list_and_out:
756 printk("]\n"); 736 printk("]\n");
757} 737}
758 738
759/* Send cross call to all processors mentioned in MASK 739static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
760 * except self. 740
741static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
742{
743 struct trap_per_cpu *tb;
744 int this_cpu, i, cnt;
745 unsigned long flags;
746 u16 *cpu_list;
747 u64 *mondo;
748
749 /* We have to do this whole thing with interrupts fully disabled.
750 * Otherwise if we send an xcall from interrupt context it will
751 * corrupt both our mondo block and cpu list state.
752 *
753 * One consequence of this is that we cannot use timeout mechanisms
754 * that depend upon interrupts being delivered locally. So, for
755 * example, we cannot sample jiffies and expect it to advance.
756 *
757 * Fortunately, udelay() uses %stick/%tick so we can use that.
758 */
759 local_irq_save(flags);
760
761 this_cpu = smp_processor_id();
762 tb = &trap_block[this_cpu];
763
764 mondo = __va(tb->cpu_mondo_block_pa);
765 mondo[0] = data0;
766 mondo[1] = data1;
767 mondo[2] = data2;
768 wmb();
769
770 cpu_list = __va(tb->cpu_list_pa);
771
772 /* Setup the initial cpu list. */
773 cnt = 0;
774 for_each_cpu_mask_nr(i, *mask) {
775 if (i == this_cpu || !cpu_online(i))
776 continue;
777 cpu_list[cnt++] = i;
778 }
779
780 if (cnt)
781 xcall_deliver_impl(tb, cnt);
782
783 local_irq_restore(flags);
784}
785
786/* Send cross call to all processors mentioned in MASK_P
787 * except self. Really, there are only two cases currently,
788 * "&cpu_online_map" and "&mm->cpu_vm_mask".
761 */ 789 */
762static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, cpumask_t mask) 790static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
763{ 791{
764 u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff)); 792 u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
765 int this_cpu = get_cpu();
766 793
767 cpus_and(mask, mask, cpu_online_map); 794 xcall_deliver(data0, data1, data2, mask);
768 cpu_clear(this_cpu, mask); 795}
769
770 if (tlb_type == spitfire)
771 spitfire_xcall_deliver(data0, data1, data2, mask);
772 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
773 cheetah_xcall_deliver(data0, data1, data2, mask);
774 else
775 hypervisor_xcall_deliver(data0, data1, data2, mask);
776 /* NOTE: Caller runs local copy on master. */
777 796
778 put_cpu(); 797/* Send cross call to all processors except self. */
798static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
799{
800 smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map);
779} 801}
780 802
781extern unsigned long xcall_sync_tick; 803extern unsigned long xcall_sync_tick;
782 804
783static void smp_start_sync_tick_client(int cpu) 805static void smp_start_sync_tick_client(int cpu)
784{ 806{
785 cpumask_t mask = cpumask_of_cpu(cpu); 807 xcall_deliver((u64) &xcall_sync_tick, 0, 0,
786 808 &cpumask_of_cpu(cpu));
787 smp_cross_call_masked(&xcall_sync_tick,
788 0, 0, 0, mask);
789} 809}
790 810
791extern unsigned long xcall_call_function; 811extern unsigned long xcall_call_function;
792 812
793void arch_send_call_function_ipi(cpumask_t mask) 813void arch_send_call_function_ipi(cpumask_t mask)
794{ 814{
795 smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask); 815 xcall_deliver((u64) &xcall_call_function, 0, 0, &mask);
796} 816}
797 817
798extern unsigned long xcall_call_function_single; 818extern unsigned long xcall_call_function_single;
799 819
800void arch_send_call_function_single_ipi(int cpu) 820void arch_send_call_function_single_ipi(int cpu)
801{ 821{
802 cpumask_t mask = cpumask_of_cpu(cpu); 822 xcall_deliver((u64) &xcall_call_function_single, 0, 0,
803 823 &cpumask_of_cpu(cpu));
804 smp_cross_call_masked(&xcall_call_function_single, 0, 0, 0, mask);
805} 824}
806 825
807/* Send cross call to all processors except self. */
808#define smp_cross_call(func, ctx, data1, data2) \
809 smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map)
810
811void smp_call_function_client(int irq, struct pt_regs *regs) 826void smp_call_function_client(int irq, struct pt_regs *regs)
812{ 827{
813 clear_softint(1 << irq); 828 clear_softint(1 << irq);
@@ -877,7 +892,6 @@ static inline void __local_flush_dcache_page(struct page *page)
877 892
878void smp_flush_dcache_page_impl(struct page *page, int cpu) 893void smp_flush_dcache_page_impl(struct page *page, int cpu)
879{ 894{
880 cpumask_t mask = cpumask_of_cpu(cpu);
881 int this_cpu; 895 int this_cpu;
882 896
883 if (tlb_type == hypervisor) 897 if (tlb_type == hypervisor)
@@ -893,29 +907,24 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
893 __local_flush_dcache_page(page); 907 __local_flush_dcache_page(page);
894 } else if (cpu_online(cpu)) { 908 } else if (cpu_online(cpu)) {
895 void *pg_addr = page_address(page); 909 void *pg_addr = page_address(page);
896 u64 data0; 910 u64 data0 = 0;
897 911
898 if (tlb_type == spitfire) { 912 if (tlb_type == spitfire) {
899 data0 = 913 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
900 ((u64)&xcall_flush_dcache_page_spitfire);
901 if (page_mapping(page) != NULL) 914 if (page_mapping(page) != NULL)
902 data0 |= ((u64)1 << 32); 915 data0 |= ((u64)1 << 32);
903 spitfire_xcall_deliver(data0,
904 __pa(pg_addr),
905 (u64) pg_addr,
906 mask);
907 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { 916 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
908#ifdef DCACHE_ALIASING_POSSIBLE 917#ifdef DCACHE_ALIASING_POSSIBLE
909 data0 = 918 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
910 ((u64)&xcall_flush_dcache_page_cheetah);
911 cheetah_xcall_deliver(data0,
912 __pa(pg_addr),
913 0, mask);
914#endif 919#endif
915 } 920 }
921 if (data0) {
922 xcall_deliver(data0, __pa(pg_addr),
923 (u64) pg_addr, &cpumask_of_cpu(cpu));
916#ifdef CONFIG_DEBUG_DCFLUSH 924#ifdef CONFIG_DEBUG_DCFLUSH
917 atomic_inc(&dcpage_flushes_xcall); 925 atomic_inc(&dcpage_flushes_xcall);
918#endif 926#endif
927 }
919 } 928 }
920 929
921 put_cpu(); 930 put_cpu();
@@ -923,66 +932,41 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
923 932
924void flush_dcache_page_all(struct mm_struct *mm, struct page *page) 933void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
925{ 934{
926 void *pg_addr = page_address(page); 935 void *pg_addr;
927 cpumask_t mask = cpu_online_map;
928 u64 data0;
929 int this_cpu; 936 int this_cpu;
937 u64 data0;
930 938
931 if (tlb_type == hypervisor) 939 if (tlb_type == hypervisor)
932 return; 940 return;
933 941
934 this_cpu = get_cpu(); 942 this_cpu = get_cpu();
935 943
936 cpu_clear(this_cpu, mask);
937
938#ifdef CONFIG_DEBUG_DCFLUSH 944#ifdef CONFIG_DEBUG_DCFLUSH
939 atomic_inc(&dcpage_flushes); 945 atomic_inc(&dcpage_flushes);
940#endif 946#endif
941 if (cpus_empty(mask)) 947 data0 = 0;
942 goto flush_self; 948 pg_addr = page_address(page);
943 if (tlb_type == spitfire) { 949 if (tlb_type == spitfire) {
944 data0 = ((u64)&xcall_flush_dcache_page_spitfire); 950 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
945 if (page_mapping(page) != NULL) 951 if (page_mapping(page) != NULL)
946 data0 |= ((u64)1 << 32); 952 data0 |= ((u64)1 << 32);
947 spitfire_xcall_deliver(data0,
948 __pa(pg_addr),
949 (u64) pg_addr,
950 mask);
951 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { 953 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
952#ifdef DCACHE_ALIASING_POSSIBLE 954#ifdef DCACHE_ALIASING_POSSIBLE
953 data0 = ((u64)&xcall_flush_dcache_page_cheetah); 955 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
954 cheetah_xcall_deliver(data0,
955 __pa(pg_addr),
956 0, mask);
957#endif 956#endif
958 } 957 }
958 if (data0) {
959 xcall_deliver(data0, __pa(pg_addr),
960 (u64) pg_addr, &cpu_online_map);
959#ifdef CONFIG_DEBUG_DCFLUSH 961#ifdef CONFIG_DEBUG_DCFLUSH
960 atomic_inc(&dcpage_flushes_xcall); 962 atomic_inc(&dcpage_flushes_xcall);
961#endif 963#endif
962 flush_self: 964 }
963 __local_flush_dcache_page(page); 965 __local_flush_dcache_page(page);
964 966
965 put_cpu(); 967 put_cpu();
966} 968}
967 969
968static void __smp_receive_signal_mask(cpumask_t mask)
969{
970 smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask);
971}
972
973void smp_receive_signal(int cpu)
974{
975 cpumask_t mask = cpumask_of_cpu(cpu);
976
977 if (cpu_online(cpu))
978 __smp_receive_signal_mask(mask);
979}
980
981void smp_receive_signal_client(int irq, struct pt_regs *regs)
982{
983 clear_softint(1 << irq);
984}
985
986void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) 970void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
987{ 971{
988 struct mm_struct *mm; 972 struct mm_struct *mm;
@@ -1083,7 +1067,7 @@ void smp_flush_tlb_mm(struct mm_struct *mm)
1083 1067
1084 smp_cross_call_masked(&xcall_flush_tlb_mm, 1068 smp_cross_call_masked(&xcall_flush_tlb_mm,
1085 ctx, 0, 0, 1069 ctx, 0, 0,
1086 mm->cpu_vm_mask); 1070 &mm->cpu_vm_mask);
1087 1071
1088local_flush_and_out: 1072local_flush_and_out:
1089 __flush_tlb_mm(ctx, SECONDARY_CONTEXT); 1073 __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
@@ -1101,7 +1085,7 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
1101 else 1085 else
1102 smp_cross_call_masked(&xcall_flush_tlb_pending, 1086 smp_cross_call_masked(&xcall_flush_tlb_pending,
1103 ctx, nr, (unsigned long) vaddrs, 1087 ctx, nr, (unsigned long) vaddrs,
1104 mm->cpu_vm_mask); 1088 &mm->cpu_vm_mask);
1105 1089
1106 __flush_tlb_pending(ctx, nr, vaddrs); 1090 __flush_tlb_pending(ctx, nr, vaddrs);
1107 1091
@@ -1202,6 +1186,16 @@ void __devinit smp_prepare_boot_cpu(void)
1202{ 1186{
1203} 1187}
1204 1188
1189void __init smp_setup_processor_id(void)
1190{
1191 if (tlb_type == spitfire)
1192 xcall_deliver_impl = spitfire_xcall_deliver;
1193 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
1194 xcall_deliver_impl = cheetah_xcall_deliver;
1195 else
1196 xcall_deliver_impl = hypervisor_xcall_deliver;
1197}
1198
1205void __devinit smp_fill_in_sib_core_maps(void) 1199void __devinit smp_fill_in_sib_core_maps(void)
1206{ 1200{
1207 unsigned int i; 1201 unsigned int i;
@@ -1370,7 +1364,13 @@ void __init smp_cpus_done(unsigned int max_cpus)
1370 1364
1371void smp_send_reschedule(int cpu) 1365void smp_send_reschedule(int cpu)
1372{ 1366{
1373 smp_receive_signal(cpu); 1367 xcall_deliver((u64) &xcall_receive_signal, 0, 0,
1368 &cpumask_of_cpu(cpu));
1369}
1370
1371void smp_receive_signal_client(int irq, struct pt_regs *regs)
1372{
1373 clear_softint(1 << irq);
1374} 1374}
1375 1375
1376/* This is a nop because we capture all other cpus 1376/* This is a nop because we capture all other cpus
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index 3547937b17a2..587f8efb2e05 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -1,9 +1,10 @@
1/* arch/sparc64/mm/tsb.c 1/* arch/sparc64/mm/tsb.c
2 * 2 *
3 * Copyright (C) 2006 David S. Miller <davem@davemloft.net> 3 * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net>
4 */ 4 */
5 5
6#include <linux/kernel.h> 6#include <linux/kernel.h>
7#include <linux/preempt.h>
7#include <asm/system.h> 8#include <asm/system.h>
8#include <asm/page.h> 9#include <asm/page.h>
9#include <asm/tlbflush.h> 10#include <asm/tlbflush.h>
@@ -415,7 +416,9 @@ retry_tsb_alloc:
415 tsb_context_switch(mm); 416 tsb_context_switch(mm);
416 417
417 /* Now force other processors to do the same. */ 418 /* Now force other processors to do the same. */
419 preempt_disable();
418 smp_tsb_sync(mm); 420 smp_tsb_sync(mm);
421 preempt_enable();
419 422
420 /* Now it is safe to free the old tsb. */ 423 /* Now it is safe to free the old tsb. */
421 kmem_cache_free(tsb_caches[old_cache_index], old_tsb); 424 kmem_cache_free(tsb_caches[old_cache_index], old_tsb);