diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-08-04 22:04:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-08-04 22:04:36 -0400 |
commit | e9ba9698187ddbc0c5bfcf41de0349a662d23d02 (patch) | |
tree | b11f8658d0bb05fe8bd826de1ead328d44e84005 /arch/sparc64/kernel/smp.c | |
parent | 2e1e9212ed8c532c6b324de77d3cafef5d2bc846 (diff) | |
parent | ae583885bfd07474789059cdef399289bd66c8d0 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6:
sparc64: Remove all cpumask_t local variables in xcall dispatch.
sparc64: Kill error_mask from hypervisor_xcall_deliver().
sparc64: Build cpu list and mondo block at top-level xcall_deliver().
sparc64: Disable local interrupts around xcall_deliver_impl() invocation.
sparc64: Make all xcall_deliver's go through common helper function.
sparc64: Always allocate the send mondo blocks, even on non-sun4v.
sparc64: Make smp_cross_call_masked() take a cpumask_t pointer.
sparc64: Directly call xcall_deliver() in smp_start_sync_tick_client.
sparc64: Call xcall_deliver() directly in some cases.
sparc64: Use cpumask_t pointers and for_each_cpu_mask_nr() in xcall_deliver.
sparc64: Use xcall_deliver() consistently.
sparc64: Use function pointer for cross-call sending.
arch/sparc64/kernel/signal.c: removed duplicated #include
sparc64: Need to disable preemption around smp_tsb_sync().
Diffstat (limited to 'arch/sparc64/kernel/smp.c')
-rw-r--r-- | arch/sparc64/kernel/smp.c | 292 |
1 files changed, 146 insertions, 146 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 340842e51ce1..27b81775a4de 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c | |||
@@ -459,27 +459,35 @@ again: | |||
459 | } | 459 | } |
460 | } | 460 | } |
461 | 461 | ||
462 | static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) | 462 | static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt) |
463 | { | 463 | { |
464 | u64 *mondo, data0, data1, data2; | ||
465 | u16 *cpu_list; | ||
464 | u64 pstate; | 466 | u64 pstate; |
465 | int i; | 467 | int i; |
466 | 468 | ||
467 | __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); | 469 | __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); |
468 | for_each_cpu_mask(i, mask) | 470 | cpu_list = __va(tb->cpu_list_pa); |
469 | spitfire_xcall_helper(data0, data1, data2, pstate, i); | 471 | mondo = __va(tb->cpu_mondo_block_pa); |
472 | data0 = mondo[0]; | ||
473 | data1 = mondo[1]; | ||
474 | data2 = mondo[2]; | ||
475 | for (i = 0; i < cnt; i++) | ||
476 | spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]); | ||
470 | } | 477 | } |
471 | 478 | ||
472 | /* Cheetah now allows to send the whole 64-bytes of data in the interrupt | 479 | /* Cheetah now allows to send the whole 64-bytes of data in the interrupt |
473 | * packet, but we have no use for that. However we do take advantage of | 480 | * packet, but we have no use for that. However we do take advantage of |
474 | * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). | 481 | * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). |
475 | */ | 482 | */ |
476 | static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) | 483 | static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt) |
477 | { | 484 | { |
478 | u64 pstate, ver, busy_mask; | ||
479 | int nack_busy_id, is_jbus, need_more; | 485 | int nack_busy_id, is_jbus, need_more; |
486 | u64 *mondo, pstate, ver, busy_mask; | ||
487 | u16 *cpu_list; | ||
480 | 488 | ||
481 | if (cpus_empty(mask)) | 489 | cpu_list = __va(tb->cpu_list_pa); |
482 | return; | 490 | mondo = __va(tb->cpu_mondo_block_pa); |
483 | 491 | ||
484 | /* Unfortunately, someone at Sun had the brilliant idea to make the | 492 | /* Unfortunately, someone at Sun had the brilliant idea to make the |
485 | * busy/nack fields hard-coded by ITID number for this Ultra-III | 493 | * busy/nack fields hard-coded by ITID number for this Ultra-III |
@@ -502,7 +510,7 @@ retry: | |||
502 | "stxa %2, [%5] %6\n\t" | 510 | "stxa %2, [%5] %6\n\t" |
503 | "membar #Sync\n\t" | 511 | "membar #Sync\n\t" |
504 | : /* no outputs */ | 512 | : /* no outputs */ |
505 | : "r" (data0), "r" (data1), "r" (data2), | 513 | : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]), |
506 | "r" (0x40), "r" (0x50), "r" (0x60), | 514 | "r" (0x40), "r" (0x50), "r" (0x60), |
507 | "i" (ASI_INTR_W)); | 515 | "i" (ASI_INTR_W)); |
508 | 516 | ||
@@ -511,11 +519,16 @@ retry: | |||
511 | { | 519 | { |
512 | int i; | 520 | int i; |
513 | 521 | ||
514 | for_each_cpu_mask(i, mask) { | 522 | for (i = 0; i < cnt; i++) { |
515 | u64 target = (i << 14) | 0x70; | 523 | u64 target, nr; |
524 | |||
525 | nr = cpu_list[i]; | ||
526 | if (nr == 0xffff) | ||
527 | continue; | ||
516 | 528 | ||
529 | target = (nr << 14) | 0x70; | ||
517 | if (is_jbus) { | 530 | if (is_jbus) { |
518 | busy_mask |= (0x1UL << (i * 2)); | 531 | busy_mask |= (0x1UL << (nr * 2)); |
519 | } else { | 532 | } else { |
520 | target |= (nack_busy_id << 24); | 533 | target |= (nack_busy_id << 24); |
521 | busy_mask |= (0x1UL << | 534 | busy_mask |= (0x1UL << |
@@ -549,11 +562,13 @@ retry: | |||
549 | __asm__ __volatile__("wrpr %0, 0x0, %%pstate" | 562 | __asm__ __volatile__("wrpr %0, 0x0, %%pstate" |
550 | : : "r" (pstate)); | 563 | : : "r" (pstate)); |
551 | if (unlikely(need_more)) { | 564 | if (unlikely(need_more)) { |
552 | int i, cnt = 0; | 565 | int i, this_cnt = 0; |
553 | for_each_cpu_mask(i, mask) { | 566 | for (i = 0; i < cnt; i++) { |
554 | cpu_clear(i, mask); | 567 | if (cpu_list[i] == 0xffff) |
555 | cnt++; | 568 | continue; |
556 | if (cnt == 32) | 569 | cpu_list[i] = 0xffff; |
570 | this_cnt++; | ||
571 | if (this_cnt == 32) | ||
557 | break; | 572 | break; |
558 | } | 573 | } |
559 | goto retry; | 574 | goto retry; |
@@ -584,16 +599,20 @@ retry: | |||
584 | /* Clear out the mask bits for cpus which did not | 599 | /* Clear out the mask bits for cpus which did not |
585 | * NACK us. | 600 | * NACK us. |
586 | */ | 601 | */ |
587 | for_each_cpu_mask(i, mask) { | 602 | for (i = 0; i < cnt; i++) { |
588 | u64 check_mask; | 603 | u64 check_mask, nr; |
604 | |||
605 | nr = cpu_list[i]; | ||
606 | if (nr == 0xffff) | ||
607 | continue; | ||
589 | 608 | ||
590 | if (is_jbus) | 609 | if (is_jbus) |
591 | check_mask = (0x2UL << (2*i)); | 610 | check_mask = (0x2UL << (2*nr)); |
592 | else | 611 | else |
593 | check_mask = (0x2UL << | 612 | check_mask = (0x2UL << |
594 | this_busy_nack); | 613 | this_busy_nack); |
595 | if ((dispatch_stat & check_mask) == 0) | 614 | if ((dispatch_stat & check_mask) == 0) |
596 | cpu_clear(i, mask); | 615 | cpu_list[i] = 0xffff; |
597 | this_busy_nack += 2; | 616 | this_busy_nack += 2; |
598 | if (this_busy_nack == 64) | 617 | if (this_busy_nack == 64) |
599 | break; | 618 | break; |
@@ -605,47 +624,17 @@ retry: | |||
605 | } | 624 | } |
606 | 625 | ||
607 | /* Multi-cpu list version. */ | 626 | /* Multi-cpu list version. */ |
608 | static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) | 627 | static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) |
609 | { | 628 | { |
610 | struct trap_per_cpu *tb; | 629 | int retries, this_cpu, prev_sent, i, saw_cpu_error; |
630 | unsigned long status; | ||
611 | u16 *cpu_list; | 631 | u16 *cpu_list; |
612 | u64 *mondo; | ||
613 | cpumask_t error_mask; | ||
614 | unsigned long flags, status; | ||
615 | int cnt, retries, this_cpu, prev_sent, i; | ||
616 | |||
617 | if (cpus_empty(mask)) | ||
618 | return; | ||
619 | |||
620 | /* We have to do this whole thing with interrupts fully disabled. | ||
621 | * Otherwise if we send an xcall from interrupt context it will | ||
622 | * corrupt both our mondo block and cpu list state. | ||
623 | * | ||
624 | * One consequence of this is that we cannot use timeout mechanisms | ||
625 | * that depend upon interrupts being delivered locally. So, for | ||
626 | * example, we cannot sample jiffies and expect it to advance. | ||
627 | * | ||
628 | * Fortunately, udelay() uses %stick/%tick so we can use that. | ||
629 | */ | ||
630 | local_irq_save(flags); | ||
631 | 632 | ||
632 | this_cpu = smp_processor_id(); | 633 | this_cpu = smp_processor_id(); |
633 | tb = &trap_block[this_cpu]; | ||
634 | |||
635 | mondo = __va(tb->cpu_mondo_block_pa); | ||
636 | mondo[0] = data0; | ||
637 | mondo[1] = data1; | ||
638 | mondo[2] = data2; | ||
639 | wmb(); | ||
640 | 634 | ||
641 | cpu_list = __va(tb->cpu_list_pa); | 635 | cpu_list = __va(tb->cpu_list_pa); |
642 | 636 | ||
643 | /* Setup the initial cpu list. */ | 637 | saw_cpu_error = 0; |
644 | cnt = 0; | ||
645 | for_each_cpu_mask(i, mask) | ||
646 | cpu_list[cnt++] = i; | ||
647 | |||
648 | cpus_clear(error_mask); | ||
649 | retries = 0; | 638 | retries = 0; |
650 | prev_sent = 0; | 639 | prev_sent = 0; |
651 | do { | 640 | do { |
@@ -690,10 +679,9 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t | |||
690 | continue; | 679 | continue; |
691 | 680 | ||
692 | err = sun4v_cpu_state(cpu); | 681 | err = sun4v_cpu_state(cpu); |
693 | if (err >= 0 && | 682 | if (err == HV_CPU_STATE_ERROR) { |
694 | err == HV_CPU_STATE_ERROR) { | 683 | saw_cpu_error = (cpu + 1); |
695 | cpu_list[i] = 0xffff; | 684 | cpu_list[i] = 0xffff; |
696 | cpu_set(cpu, error_mask); | ||
697 | } | 685 | } |
698 | } | 686 | } |
699 | } else if (unlikely(status != HV_EWOULDBLOCK)) | 687 | } else if (unlikely(status != HV_EWOULDBLOCK)) |
@@ -717,32 +705,24 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t | |||
717 | } | 705 | } |
718 | } while (1); | 706 | } while (1); |
719 | 707 | ||
720 | local_irq_restore(flags); | 708 | if (unlikely(saw_cpu_error)) |
721 | |||
722 | if (unlikely(!cpus_empty(error_mask))) | ||
723 | goto fatal_mondo_cpu_error; | 709 | goto fatal_mondo_cpu_error; |
724 | 710 | ||
725 | return; | 711 | return; |
726 | 712 | ||
727 | fatal_mondo_cpu_error: | 713 | fatal_mondo_cpu_error: |
728 | printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " | 714 | printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " |
729 | "were in error state\n", | 715 | "(including %d) were in error state\n", |
730 | this_cpu); | 716 | this_cpu, saw_cpu_error - 1); |
731 | printk(KERN_CRIT "CPU[%d]: Error mask [ ", this_cpu); | ||
732 | for_each_cpu_mask(i, error_mask) | ||
733 | printk("%d ", i); | ||
734 | printk("]\n"); | ||
735 | return; | 717 | return; |
736 | 718 | ||
737 | fatal_mondo_timeout: | 719 | fatal_mondo_timeout: |
738 | local_irq_restore(flags); | ||
739 | printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " | 720 | printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " |
740 | " progress after %d retries.\n", | 721 | " progress after %d retries.\n", |
741 | this_cpu, retries); | 722 | this_cpu, retries); |
742 | goto dump_cpu_list_and_out; | 723 | goto dump_cpu_list_and_out; |
743 | 724 | ||
744 | fatal_mondo_error: | 725 | fatal_mondo_error: |
745 | local_irq_restore(flags); | ||
746 | printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", | 726 | printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", |
747 | this_cpu, status); | 727 | this_cpu, status); |
748 | printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " | 728 | printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " |
@@ -756,58 +736,93 @@ dump_cpu_list_and_out: | |||
756 | printk("]\n"); | 736 | printk("]\n"); |
757 | } | 737 | } |
758 | 738 | ||
759 | /* Send cross call to all processors mentioned in MASK | 739 | static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); |
760 | * except self. | 740 | |
741 | static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) | ||
742 | { | ||
743 | struct trap_per_cpu *tb; | ||
744 | int this_cpu, i, cnt; | ||
745 | unsigned long flags; | ||
746 | u16 *cpu_list; | ||
747 | u64 *mondo; | ||
748 | |||
749 | /* We have to do this whole thing with interrupts fully disabled. | ||
750 | * Otherwise if we send an xcall from interrupt context it will | ||
751 | * corrupt both our mondo block and cpu list state. | ||
752 | * | ||
753 | * One consequence of this is that we cannot use timeout mechanisms | ||
754 | * that depend upon interrupts being delivered locally. So, for | ||
755 | * example, we cannot sample jiffies and expect it to advance. | ||
756 | * | ||
757 | * Fortunately, udelay() uses %stick/%tick so we can use that. | ||
758 | */ | ||
759 | local_irq_save(flags); | ||
760 | |||
761 | this_cpu = smp_processor_id(); | ||
762 | tb = &trap_block[this_cpu]; | ||
763 | |||
764 | mondo = __va(tb->cpu_mondo_block_pa); | ||
765 | mondo[0] = data0; | ||
766 | mondo[1] = data1; | ||
767 | mondo[2] = data2; | ||
768 | wmb(); | ||
769 | |||
770 | cpu_list = __va(tb->cpu_list_pa); | ||
771 | |||
772 | /* Setup the initial cpu list. */ | ||
773 | cnt = 0; | ||
774 | for_each_cpu_mask_nr(i, *mask) { | ||
775 | if (i == this_cpu || !cpu_online(i)) | ||
776 | continue; | ||
777 | cpu_list[cnt++] = i; | ||
778 | } | ||
779 | |||
780 | if (cnt) | ||
781 | xcall_deliver_impl(tb, cnt); | ||
782 | |||
783 | local_irq_restore(flags); | ||
784 | } | ||
785 | |||
786 | /* Send cross call to all processors mentioned in MASK_P | ||
787 | * except self. Really, there are only two cases currently, | ||
788 | * "&cpu_online_map" and "&mm->cpu_vm_mask". | ||
761 | */ | 789 | */ |
762 | static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, cpumask_t mask) | 790 | static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask) |
763 | { | 791 | { |
764 | u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff)); | 792 | u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff)); |
765 | int this_cpu = get_cpu(); | ||
766 | 793 | ||
767 | cpus_and(mask, mask, cpu_online_map); | 794 | xcall_deliver(data0, data1, data2, mask); |
768 | cpu_clear(this_cpu, mask); | 795 | } |
769 | |||
770 | if (tlb_type == spitfire) | ||
771 | spitfire_xcall_deliver(data0, data1, data2, mask); | ||
772 | else if (tlb_type == cheetah || tlb_type == cheetah_plus) | ||
773 | cheetah_xcall_deliver(data0, data1, data2, mask); | ||
774 | else | ||
775 | hypervisor_xcall_deliver(data0, data1, data2, mask); | ||
776 | /* NOTE: Caller runs local copy on master. */ | ||
777 | 796 | ||
778 | put_cpu(); | 797 | /* Send cross call to all processors except self. */ |
798 | static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2) | ||
799 | { | ||
800 | smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map); | ||
779 | } | 801 | } |
780 | 802 | ||
781 | extern unsigned long xcall_sync_tick; | 803 | extern unsigned long xcall_sync_tick; |
782 | 804 | ||
783 | static void smp_start_sync_tick_client(int cpu) | 805 | static void smp_start_sync_tick_client(int cpu) |
784 | { | 806 | { |
785 | cpumask_t mask = cpumask_of_cpu(cpu); | 807 | xcall_deliver((u64) &xcall_sync_tick, 0, 0, |
786 | 808 | &cpumask_of_cpu(cpu)); | |
787 | smp_cross_call_masked(&xcall_sync_tick, | ||
788 | 0, 0, 0, mask); | ||
789 | } | 809 | } |
790 | 810 | ||
791 | extern unsigned long xcall_call_function; | 811 | extern unsigned long xcall_call_function; |
792 | 812 | ||
793 | void arch_send_call_function_ipi(cpumask_t mask) | 813 | void arch_send_call_function_ipi(cpumask_t mask) |
794 | { | 814 | { |
795 | smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask); | 815 | xcall_deliver((u64) &xcall_call_function, 0, 0, &mask); |
796 | } | 816 | } |
797 | 817 | ||
798 | extern unsigned long xcall_call_function_single; | 818 | extern unsigned long xcall_call_function_single; |
799 | 819 | ||
800 | void arch_send_call_function_single_ipi(int cpu) | 820 | void arch_send_call_function_single_ipi(int cpu) |
801 | { | 821 | { |
802 | cpumask_t mask = cpumask_of_cpu(cpu); | 822 | xcall_deliver((u64) &xcall_call_function_single, 0, 0, |
803 | 823 | &cpumask_of_cpu(cpu)); | |
804 | smp_cross_call_masked(&xcall_call_function_single, 0, 0, 0, mask); | ||
805 | } | 824 | } |
806 | 825 | ||
807 | /* Send cross call to all processors except self. */ | ||
808 | #define smp_cross_call(func, ctx, data1, data2) \ | ||
809 | smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map) | ||
810 | |||
811 | void smp_call_function_client(int irq, struct pt_regs *regs) | 826 | void smp_call_function_client(int irq, struct pt_regs *regs) |
812 | { | 827 | { |
813 | clear_softint(1 << irq); | 828 | clear_softint(1 << irq); |
@@ -877,7 +892,6 @@ static inline void __local_flush_dcache_page(struct page *page) | |||
877 | 892 | ||
878 | void smp_flush_dcache_page_impl(struct page *page, int cpu) | 893 | void smp_flush_dcache_page_impl(struct page *page, int cpu) |
879 | { | 894 | { |
880 | cpumask_t mask = cpumask_of_cpu(cpu); | ||
881 | int this_cpu; | 895 | int this_cpu; |
882 | 896 | ||
883 | if (tlb_type == hypervisor) | 897 | if (tlb_type == hypervisor) |
@@ -893,29 +907,24 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) | |||
893 | __local_flush_dcache_page(page); | 907 | __local_flush_dcache_page(page); |
894 | } else if (cpu_online(cpu)) { | 908 | } else if (cpu_online(cpu)) { |
895 | void *pg_addr = page_address(page); | 909 | void *pg_addr = page_address(page); |
896 | u64 data0; | 910 | u64 data0 = 0; |
897 | 911 | ||
898 | if (tlb_type == spitfire) { | 912 | if (tlb_type == spitfire) { |
899 | data0 = | 913 | data0 = ((u64)&xcall_flush_dcache_page_spitfire); |
900 | ((u64)&xcall_flush_dcache_page_spitfire); | ||
901 | if (page_mapping(page) != NULL) | 914 | if (page_mapping(page) != NULL) |
902 | data0 |= ((u64)1 << 32); | 915 | data0 |= ((u64)1 << 32); |
903 | spitfire_xcall_deliver(data0, | ||
904 | __pa(pg_addr), | ||
905 | (u64) pg_addr, | ||
906 | mask); | ||
907 | } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { | 916 | } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { |
908 | #ifdef DCACHE_ALIASING_POSSIBLE | 917 | #ifdef DCACHE_ALIASING_POSSIBLE |
909 | data0 = | 918 | data0 = ((u64)&xcall_flush_dcache_page_cheetah); |
910 | ((u64)&xcall_flush_dcache_page_cheetah); | ||
911 | cheetah_xcall_deliver(data0, | ||
912 | __pa(pg_addr), | ||
913 | 0, mask); | ||
914 | #endif | 919 | #endif |
915 | } | 920 | } |
921 | if (data0) { | ||
922 | xcall_deliver(data0, __pa(pg_addr), | ||
923 | (u64) pg_addr, &cpumask_of_cpu(cpu)); | ||
916 | #ifdef CONFIG_DEBUG_DCFLUSH | 924 | #ifdef CONFIG_DEBUG_DCFLUSH |
917 | atomic_inc(&dcpage_flushes_xcall); | 925 | atomic_inc(&dcpage_flushes_xcall); |
918 | #endif | 926 | #endif |
927 | } | ||
919 | } | 928 | } |
920 | 929 | ||
921 | put_cpu(); | 930 | put_cpu(); |
@@ -923,66 +932,41 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) | |||
923 | 932 | ||
924 | void flush_dcache_page_all(struct mm_struct *mm, struct page *page) | 933 | void flush_dcache_page_all(struct mm_struct *mm, struct page *page) |
925 | { | 934 | { |
926 | void *pg_addr = page_address(page); | 935 | void *pg_addr; |
927 | cpumask_t mask = cpu_online_map; | ||
928 | u64 data0; | ||
929 | int this_cpu; | 936 | int this_cpu; |
937 | u64 data0; | ||
930 | 938 | ||
931 | if (tlb_type == hypervisor) | 939 | if (tlb_type == hypervisor) |
932 | return; | 940 | return; |
933 | 941 | ||
934 | this_cpu = get_cpu(); | 942 | this_cpu = get_cpu(); |
935 | 943 | ||
936 | cpu_clear(this_cpu, mask); | ||
937 | |||
938 | #ifdef CONFIG_DEBUG_DCFLUSH | 944 | #ifdef CONFIG_DEBUG_DCFLUSH |
939 | atomic_inc(&dcpage_flushes); | 945 | atomic_inc(&dcpage_flushes); |
940 | #endif | 946 | #endif |
941 | if (cpus_empty(mask)) | 947 | data0 = 0; |
942 | goto flush_self; | 948 | pg_addr = page_address(page); |
943 | if (tlb_type == spitfire) { | 949 | if (tlb_type == spitfire) { |
944 | data0 = ((u64)&xcall_flush_dcache_page_spitfire); | 950 | data0 = ((u64)&xcall_flush_dcache_page_spitfire); |
945 | if (page_mapping(page) != NULL) | 951 | if (page_mapping(page) != NULL) |
946 | data0 |= ((u64)1 << 32); | 952 | data0 |= ((u64)1 << 32); |
947 | spitfire_xcall_deliver(data0, | ||
948 | __pa(pg_addr), | ||
949 | (u64) pg_addr, | ||
950 | mask); | ||
951 | } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { | 953 | } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { |
952 | #ifdef DCACHE_ALIASING_POSSIBLE | 954 | #ifdef DCACHE_ALIASING_POSSIBLE |
953 | data0 = ((u64)&xcall_flush_dcache_page_cheetah); | 955 | data0 = ((u64)&xcall_flush_dcache_page_cheetah); |
954 | cheetah_xcall_deliver(data0, | ||
955 | __pa(pg_addr), | ||
956 | 0, mask); | ||
957 | #endif | 956 | #endif |
958 | } | 957 | } |
958 | if (data0) { | ||
959 | xcall_deliver(data0, __pa(pg_addr), | ||
960 | (u64) pg_addr, &cpu_online_map); | ||
959 | #ifdef CONFIG_DEBUG_DCFLUSH | 961 | #ifdef CONFIG_DEBUG_DCFLUSH |
960 | atomic_inc(&dcpage_flushes_xcall); | 962 | atomic_inc(&dcpage_flushes_xcall); |
961 | #endif | 963 | #endif |
962 | flush_self: | 964 | } |
963 | __local_flush_dcache_page(page); | 965 | __local_flush_dcache_page(page); |
964 | 966 | ||
965 | put_cpu(); | 967 | put_cpu(); |
966 | } | 968 | } |
967 | 969 | ||
968 | static void __smp_receive_signal_mask(cpumask_t mask) | ||
969 | { | ||
970 | smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask); | ||
971 | } | ||
972 | |||
973 | void smp_receive_signal(int cpu) | ||
974 | { | ||
975 | cpumask_t mask = cpumask_of_cpu(cpu); | ||
976 | |||
977 | if (cpu_online(cpu)) | ||
978 | __smp_receive_signal_mask(mask); | ||
979 | } | ||
980 | |||
981 | void smp_receive_signal_client(int irq, struct pt_regs *regs) | ||
982 | { | ||
983 | clear_softint(1 << irq); | ||
984 | } | ||
985 | |||
986 | void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) | 970 | void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) |
987 | { | 971 | { |
988 | struct mm_struct *mm; | 972 | struct mm_struct *mm; |
@@ -1083,7 +1067,7 @@ void smp_flush_tlb_mm(struct mm_struct *mm) | |||
1083 | 1067 | ||
1084 | smp_cross_call_masked(&xcall_flush_tlb_mm, | 1068 | smp_cross_call_masked(&xcall_flush_tlb_mm, |
1085 | ctx, 0, 0, | 1069 | ctx, 0, 0, |
1086 | mm->cpu_vm_mask); | 1070 | &mm->cpu_vm_mask); |
1087 | 1071 | ||
1088 | local_flush_and_out: | 1072 | local_flush_and_out: |
1089 | __flush_tlb_mm(ctx, SECONDARY_CONTEXT); | 1073 | __flush_tlb_mm(ctx, SECONDARY_CONTEXT); |
@@ -1101,7 +1085,7 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long | |||
1101 | else | 1085 | else |
1102 | smp_cross_call_masked(&xcall_flush_tlb_pending, | 1086 | smp_cross_call_masked(&xcall_flush_tlb_pending, |
1103 | ctx, nr, (unsigned long) vaddrs, | 1087 | ctx, nr, (unsigned long) vaddrs, |
1104 | mm->cpu_vm_mask); | 1088 | &mm->cpu_vm_mask); |
1105 | 1089 | ||
1106 | __flush_tlb_pending(ctx, nr, vaddrs); | 1090 | __flush_tlb_pending(ctx, nr, vaddrs); |
1107 | 1091 | ||
@@ -1202,6 +1186,16 @@ void __devinit smp_prepare_boot_cpu(void) | |||
1202 | { | 1186 | { |
1203 | } | 1187 | } |
1204 | 1188 | ||
1189 | void __init smp_setup_processor_id(void) | ||
1190 | { | ||
1191 | if (tlb_type == spitfire) | ||
1192 | xcall_deliver_impl = spitfire_xcall_deliver; | ||
1193 | else if (tlb_type == cheetah || tlb_type == cheetah_plus) | ||
1194 | xcall_deliver_impl = cheetah_xcall_deliver; | ||
1195 | else | ||
1196 | xcall_deliver_impl = hypervisor_xcall_deliver; | ||
1197 | } | ||
1198 | |||
1205 | void __devinit smp_fill_in_sib_core_maps(void) | 1199 | void __devinit smp_fill_in_sib_core_maps(void) |
1206 | { | 1200 | { |
1207 | unsigned int i; | 1201 | unsigned int i; |
@@ -1370,7 +1364,13 @@ void __init smp_cpus_done(unsigned int max_cpus) | |||
1370 | 1364 | ||
1371 | void smp_send_reschedule(int cpu) | 1365 | void smp_send_reschedule(int cpu) |
1372 | { | 1366 | { |
1373 | smp_receive_signal(cpu); | 1367 | xcall_deliver((u64) &xcall_receive_signal, 0, 0, |
1368 | &cpumask_of_cpu(cpu)); | ||
1369 | } | ||
1370 | |||
1371 | void smp_receive_signal_client(int irq, struct pt_regs *regs) | ||
1372 | { | ||
1373 | clear_softint(1 << irq); | ||
1374 | } | 1374 | } |
1375 | 1375 | ||
1376 | /* This is a nop because we capture all other cpus | 1376 | /* This is a nop because we capture all other cpus |