diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-08-04 22:04:36 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-08-04 22:04:36 -0400 |
| commit | e9ba9698187ddbc0c5bfcf41de0349a662d23d02 (patch) | |
| tree | b11f8658d0bb05fe8bd826de1ead328d44e84005 | |
| parent | 2e1e9212ed8c532c6b324de77d3cafef5d2bc846 (diff) | |
| parent | ae583885bfd07474789059cdef399289bd66c8d0 (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6:
sparc64: Remove all cpumask_t local variables in xcall dispatch.
sparc64: Kill error_mask from hypervisor_xcall_deliver().
sparc64: Build cpu list and mondo block at top-level xcall_deliver().
sparc64: Disable local interrupts around xcall_deliver_impl() invocation.
sparc64: Make all xcall_deliver's go through common helper function.
sparc64: Always allocate the send mondo blocks, even on non-sun4v.
sparc64: Make smp_cross_call_masked() take a cpumask_t pointer.
sparc64: Directly call xcall_deliver() in smp_start_sync_tick_client.
sparc64: Call xcall_deliver() directly in some cases.
sparc64: Use cpumask_t pointers and for_each_cpu_mask_nr() in xcall_deliver.
sparc64: Use xcall_deliver() consistently.
sparc64: Use function pointer for cross-call sending.
arch/sparc64/kernel/signal.c: removed duplicated #include
sparc64: Need to disable preemption around smp_tsb_sync().
| -rw-r--r-- | arch/sparc64/kernel/irq.c | 19 | ||||
| -rw-r--r-- | arch/sparc64/kernel/signal.c | 1 | ||||
| -rw-r--r-- | arch/sparc64/kernel/smp.c | 292 | ||||
| -rw-r--r-- | arch/sparc64/mm/tsb.c | 5 |
4 files changed, 166 insertions, 151 deletions
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index c481673d249c..ba43d85e8dde 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c | |||
| @@ -915,12 +915,18 @@ static void __init sun4v_init_mondo_queues(void) | |||
| 915 | alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask); | 915 | alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask); |
| 916 | alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, | 916 | alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, |
| 917 | tb->nonresum_qmask); | 917 | tb->nonresum_qmask); |
| 918 | } | ||
| 919 | } | ||
| 920 | |||
| 921 | static void __init init_send_mondo_info(void) | ||
| 922 | { | ||
| 923 | int cpu; | ||
| 924 | |||
| 925 | for_each_possible_cpu(cpu) { | ||
| 926 | struct trap_per_cpu *tb = &trap_block[cpu]; | ||
| 918 | 927 | ||
| 919 | init_cpu_send_mondo_info(tb); | 928 | init_cpu_send_mondo_info(tb); |
| 920 | } | 929 | } |
| 921 | |||
| 922 | /* Load up the boot cpu's entries. */ | ||
| 923 | sun4v_register_mondo_queues(hard_smp_processor_id()); | ||
| 924 | } | 930 | } |
| 925 | 931 | ||
| 926 | static struct irqaction timer_irq_action = { | 932 | static struct irqaction timer_irq_action = { |
| @@ -949,6 +955,13 @@ void __init init_IRQ(void) | |||
| 949 | if (tlb_type == hypervisor) | 955 | if (tlb_type == hypervisor) |
| 950 | sun4v_init_mondo_queues(); | 956 | sun4v_init_mondo_queues(); |
| 951 | 957 | ||
| 958 | init_send_mondo_info(); | ||
| 959 | |||
| 960 | if (tlb_type == hypervisor) { | ||
| 961 | /* Load up the boot cpu's entries. */ | ||
| 962 | sun4v_register_mondo_queues(hard_smp_processor_id()); | ||
| 963 | } | ||
| 964 | |||
| 952 | /* We need to clear any IRQ's pending in the soft interrupt | 965 | /* We need to clear any IRQ's pending in the soft interrupt |
| 953 | * registers, a spurious one could be left around from the | 966 | * registers, a spurious one could be left around from the |
| 954 | * PROM timer which we just disabled. | 967 | * PROM timer which we just disabled. |
diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c index ca5a6ae3a6e2..ec82d76dc6f2 100644 --- a/arch/sparc64/kernel/signal.c +++ b/arch/sparc64/kernel/signal.c | |||
| @@ -23,7 +23,6 @@ | |||
| 23 | #include <linux/tty.h> | 23 | #include <linux/tty.h> |
| 24 | #include <linux/binfmts.h> | 24 | #include <linux/binfmts.h> |
| 25 | #include <linux/bitops.h> | 25 | #include <linux/bitops.h> |
| 26 | #include <linux/tracehook.h> | ||
| 27 | 26 | ||
| 28 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
| 29 | #include <asm/ptrace.h> | 28 | #include <asm/ptrace.h> |
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 340842e51ce1..27b81775a4de 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c | |||
| @@ -459,27 +459,35 @@ again: | |||
| 459 | } | 459 | } |
| 460 | } | 460 | } |
| 461 | 461 | ||
| 462 | static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) | 462 | static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt) |
| 463 | { | 463 | { |
| 464 | u64 *mondo, data0, data1, data2; | ||
| 465 | u16 *cpu_list; | ||
| 464 | u64 pstate; | 466 | u64 pstate; |
| 465 | int i; | 467 | int i; |
| 466 | 468 | ||
| 467 | __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); | 469 | __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); |
| 468 | for_each_cpu_mask(i, mask) | 470 | cpu_list = __va(tb->cpu_list_pa); |
| 469 | spitfire_xcall_helper(data0, data1, data2, pstate, i); | 471 | mondo = __va(tb->cpu_mondo_block_pa); |
| 472 | data0 = mondo[0]; | ||
| 473 | data1 = mondo[1]; | ||
| 474 | data2 = mondo[2]; | ||
| 475 | for (i = 0; i < cnt; i++) | ||
| 476 | spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]); | ||
| 470 | } | 477 | } |
| 471 | 478 | ||
| 472 | /* Cheetah now allows to send the whole 64-bytes of data in the interrupt | 479 | /* Cheetah now allows to send the whole 64-bytes of data in the interrupt |
| 473 | * packet, but we have no use for that. However we do take advantage of | 480 | * packet, but we have no use for that. However we do take advantage of |
| 474 | * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). | 481 | * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). |
| 475 | */ | 482 | */ |
| 476 | static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) | 483 | static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt) |
| 477 | { | 484 | { |
| 478 | u64 pstate, ver, busy_mask; | ||
| 479 | int nack_busy_id, is_jbus, need_more; | 485 | int nack_busy_id, is_jbus, need_more; |
| 486 | u64 *mondo, pstate, ver, busy_mask; | ||
| 487 | u16 *cpu_list; | ||
| 480 | 488 | ||
| 481 | if (cpus_empty(mask)) | 489 | cpu_list = __va(tb->cpu_list_pa); |
| 482 | return; | 490 | mondo = __va(tb->cpu_mondo_block_pa); |
| 483 | 491 | ||
| 484 | /* Unfortunately, someone at Sun had the brilliant idea to make the | 492 | /* Unfortunately, someone at Sun had the brilliant idea to make the |
| 485 | * busy/nack fields hard-coded by ITID number for this Ultra-III | 493 | * busy/nack fields hard-coded by ITID number for this Ultra-III |
| @@ -502,7 +510,7 @@ retry: | |||
| 502 | "stxa %2, [%5] %6\n\t" | 510 | "stxa %2, [%5] %6\n\t" |
| 503 | "membar #Sync\n\t" | 511 | "membar #Sync\n\t" |
| 504 | : /* no outputs */ | 512 | : /* no outputs */ |
| 505 | : "r" (data0), "r" (data1), "r" (data2), | 513 | : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]), |
| 506 | "r" (0x40), "r" (0x50), "r" (0x60), | 514 | "r" (0x40), "r" (0x50), "r" (0x60), |
| 507 | "i" (ASI_INTR_W)); | 515 | "i" (ASI_INTR_W)); |
| 508 | 516 | ||
| @@ -511,11 +519,16 @@ retry: | |||
| 511 | { | 519 | { |
| 512 | int i; | 520 | int i; |
| 513 | 521 | ||
| 514 | for_each_cpu_mask(i, mask) { | 522 | for (i = 0; i < cnt; i++) { |
| 515 | u64 target = (i << 14) | 0x70; | 523 | u64 target, nr; |
| 524 | |||
| 525 | nr = cpu_list[i]; | ||
| 526 | if (nr == 0xffff) | ||
| 527 | continue; | ||
| 516 | 528 | ||
| 529 | target = (nr << 14) | 0x70; | ||
| 517 | if (is_jbus) { | 530 | if (is_jbus) { |
| 518 | busy_mask |= (0x1UL << (i * 2)); | 531 | busy_mask |= (0x1UL << (nr * 2)); |
| 519 | } else { | 532 | } else { |
| 520 | target |= (nack_busy_id << 24); | 533 | target |= (nack_busy_id << 24); |
| 521 | busy_mask |= (0x1UL << | 534 | busy_mask |= (0x1UL << |
| @@ -549,11 +562,13 @@ retry: | |||
| 549 | __asm__ __volatile__("wrpr %0, 0x0, %%pstate" | 562 | __asm__ __volatile__("wrpr %0, 0x0, %%pstate" |
| 550 | : : "r" (pstate)); | 563 | : : "r" (pstate)); |
| 551 | if (unlikely(need_more)) { | 564 | if (unlikely(need_more)) { |
| 552 | int i, cnt = 0; | 565 | int i, this_cnt = 0; |
| 553 | for_each_cpu_mask(i, mask) { | 566 | for (i = 0; i < cnt; i++) { |
| 554 | cpu_clear(i, mask); | 567 | if (cpu_list[i] == 0xffff) |
| 555 | cnt++; | 568 | continue; |
| 556 | if (cnt == 32) | 569 | cpu_list[i] = 0xffff; |
| 570 | this_cnt++; | ||
| 571 | if (this_cnt == 32) | ||
| 557 | break; | 572 | break; |
| 558 | } | 573 | } |
| 559 | goto retry; | 574 | goto retry; |
| @@ -584,16 +599,20 @@ retry: | |||
| 584 | /* Clear out the mask bits for cpus which did not | 599 | /* Clear out the mask bits for cpus which did not |
| 585 | * NACK us. | 600 | * NACK us. |
| 586 | */ | 601 | */ |
| 587 | for_each_cpu_mask(i, mask) { | 602 | for (i = 0; i < cnt; i++) { |
| 588 | u64 check_mask; | 603 | u64 check_mask, nr; |
| 604 | |||
| 605 | nr = cpu_list[i]; | ||
| 606 | if (nr == 0xffff) | ||
| 607 | continue; | ||
| 589 | 608 | ||
| 590 | if (is_jbus) | 609 | if (is_jbus) |
| 591 | check_mask = (0x2UL << (2*i)); | 610 | check_mask = (0x2UL << (2*nr)); |
| 592 | else | 611 | else |
| 593 | check_mask = (0x2UL << | 612 | check_mask = (0x2UL << |
| 594 | this_busy_nack); | 613 | this_busy_nack); |
| 595 | if ((dispatch_stat & check_mask) == 0) | 614 | if ((dispatch_stat & check_mask) == 0) |
| 596 | cpu_clear(i, mask); | 615 | cpu_list[i] = 0xffff; |
| 597 | this_busy_nack += 2; | 616 | this_busy_nack += 2; |
| 598 | if (this_busy_nack == 64) | 617 | if (this_busy_nack == 64) |
| 599 | break; | 618 | break; |
| @@ -605,47 +624,17 @@ retry: | |||
| 605 | } | 624 | } |
| 606 | 625 | ||
| 607 | /* Multi-cpu list version. */ | 626 | /* Multi-cpu list version. */ |
| 608 | static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) | 627 | static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) |
| 609 | { | 628 | { |
| 610 | struct trap_per_cpu *tb; | 629 | int retries, this_cpu, prev_sent, i, saw_cpu_error; |
| 630 | unsigned long status; | ||
| 611 | u16 *cpu_list; | 631 | u16 *cpu_list; |
| 612 | u64 *mondo; | ||
| 613 | cpumask_t error_mask; | ||
| 614 | unsigned long flags, status; | ||
| 615 | int cnt, retries, this_cpu, prev_sent, i; | ||
| 616 | |||
| 617 | if (cpus_empty(mask)) | ||
| 618 | return; | ||
| 619 | |||
| 620 | /* We have to do this whole thing with interrupts fully disabled. | ||
| 621 | * Otherwise if we send an xcall from interrupt context it will | ||
| 622 | * corrupt both our mondo block and cpu list state. | ||
| 623 | * | ||
| 624 | * One consequence of this is that we cannot use timeout mechanisms | ||
| 625 | * that depend upon interrupts being delivered locally. So, for | ||
| 626 | * example, we cannot sample jiffies and expect it to advance. | ||
| 627 | * | ||
| 628 | * Fortunately, udelay() uses %stick/%tick so we can use that. | ||
| 629 | */ | ||
| 630 | local_irq_save(flags); | ||
| 631 | 632 | ||
| 632 | this_cpu = smp_processor_id(); | 633 | this_cpu = smp_processor_id(); |
| 633 | tb = &trap_block[this_cpu]; | ||
| 634 | |||
| 635 | mondo = __va(tb->cpu_mondo_block_pa); | ||
| 636 | mondo[0] = data0; | ||
| 637 | mondo[1] = data1; | ||
| 638 | mondo[2] = data2; | ||
| 639 | wmb(); | ||
| 640 | 634 | ||
| 641 | cpu_list = __va(tb->cpu_list_pa); | 635 | cpu_list = __va(tb->cpu_list_pa); |
| 642 | 636 | ||
| 643 | /* Setup the initial cpu list. */ | 637 | saw_cpu_error = 0; |
| 644 | cnt = 0; | ||
| 645 | for_each_cpu_mask(i, mask) | ||
| 646 | cpu_list[cnt++] = i; | ||
| 647 | |||
| 648 | cpus_clear(error_mask); | ||
| 649 | retries = 0; | 638 | retries = 0; |
| 650 | prev_sent = 0; | 639 | prev_sent = 0; |
| 651 | do { | 640 | do { |
| @@ -690,10 +679,9 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t | |||
| 690 | continue; | 679 | continue; |
| 691 | 680 | ||
| 692 | err = sun4v_cpu_state(cpu); | 681 | err = sun4v_cpu_state(cpu); |
| 693 | if (err >= 0 && | 682 | if (err == HV_CPU_STATE_ERROR) { |
| 694 | err == HV_CPU_STATE_ERROR) { | 683 | saw_cpu_error = (cpu + 1); |
| 695 | cpu_list[i] = 0xffff; | 684 | cpu_list[i] = 0xffff; |
| 696 | cpu_set(cpu, error_mask); | ||
| 697 | } | 685 | } |
| 698 | } | 686 | } |
| 699 | } else if (unlikely(status != HV_EWOULDBLOCK)) | 687 | } else if (unlikely(status != HV_EWOULDBLOCK)) |
| @@ -717,32 +705,24 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t | |||
| 717 | } | 705 | } |
| 718 | } while (1); | 706 | } while (1); |
| 719 | 707 | ||
| 720 | local_irq_restore(flags); | 708 | if (unlikely(saw_cpu_error)) |
| 721 | |||
| 722 | if (unlikely(!cpus_empty(error_mask))) | ||
| 723 | goto fatal_mondo_cpu_error; | 709 | goto fatal_mondo_cpu_error; |
| 724 | 710 | ||
| 725 | return; | 711 | return; |
| 726 | 712 | ||
| 727 | fatal_mondo_cpu_error: | 713 | fatal_mondo_cpu_error: |
| 728 | printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " | 714 | printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " |
| 729 | "were in error state\n", | 715 | "(including %d) were in error state\n", |
| 730 | this_cpu); | 716 | this_cpu, saw_cpu_error - 1); |
| 731 | printk(KERN_CRIT "CPU[%d]: Error mask [ ", this_cpu); | ||
| 732 | for_each_cpu_mask(i, error_mask) | ||
| 733 | printk("%d ", i); | ||
| 734 | printk("]\n"); | ||
| 735 | return; | 717 | return; |
| 736 | 718 | ||
| 737 | fatal_mondo_timeout: | 719 | fatal_mondo_timeout: |
| 738 | local_irq_restore(flags); | ||
| 739 | printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " | 720 | printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " |
| 740 | " progress after %d retries.\n", | 721 | " progress after %d retries.\n", |
| 741 | this_cpu, retries); | 722 | this_cpu, retries); |
| 742 | goto dump_cpu_list_and_out; | 723 | goto dump_cpu_list_and_out; |
| 743 | 724 | ||
| 744 | fatal_mondo_error: | 725 | fatal_mondo_error: |
| 745 | local_irq_restore(flags); | ||
| 746 | printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", | 726 | printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", |
| 747 | this_cpu, status); | 727 | this_cpu, status); |
| 748 | printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " | 728 | printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " |
| @@ -756,58 +736,93 @@ dump_cpu_list_and_out: | |||
| 756 | printk("]\n"); | 736 | printk("]\n"); |
| 757 | } | 737 | } |
| 758 | 738 | ||
| 759 | /* Send cross call to all processors mentioned in MASK | 739 | static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); |
| 760 | * except self. | 740 | |
| 741 | static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) | ||
| 742 | { | ||
| 743 | struct trap_per_cpu *tb; | ||
| 744 | int this_cpu, i, cnt; | ||
| 745 | unsigned long flags; | ||
| 746 | u16 *cpu_list; | ||
| 747 | u64 *mondo; | ||
| 748 | |||
| 749 | /* We have to do this whole thing with interrupts fully disabled. | ||
| 750 | * Otherwise if we send an xcall from interrupt context it will | ||
| 751 | * corrupt both our mondo block and cpu list state. | ||
| 752 | * | ||
| 753 | * One consequence of this is that we cannot use timeout mechanisms | ||
| 754 | * that depend upon interrupts being delivered locally. So, for | ||
| 755 | * example, we cannot sample jiffies and expect it to advance. | ||
| 756 | * | ||
| 757 | * Fortunately, udelay() uses %stick/%tick so we can use that. | ||
| 758 | */ | ||
| 759 | local_irq_save(flags); | ||
| 760 | |||
| 761 | this_cpu = smp_processor_id(); | ||
| 762 | tb = &trap_block[this_cpu]; | ||
| 763 | |||
| 764 | mondo = __va(tb->cpu_mondo_block_pa); | ||
| 765 | mondo[0] = data0; | ||
| 766 | mondo[1] = data1; | ||
| 767 | mondo[2] = data2; | ||
| 768 | wmb(); | ||
| 769 | |||
| 770 | cpu_list = __va(tb->cpu_list_pa); | ||
| 771 | |||
| 772 | /* Setup the initial cpu list. */ | ||
| 773 | cnt = 0; | ||
| 774 | for_each_cpu_mask_nr(i, *mask) { | ||
| 775 | if (i == this_cpu || !cpu_online(i)) | ||
| 776 | continue; | ||
| 777 | cpu_list[cnt++] = i; | ||
| 778 | } | ||
| 779 | |||
| 780 | if (cnt) | ||
| 781 | xcall_deliver_impl(tb, cnt); | ||
| 782 | |||
| 783 | local_irq_restore(flags); | ||
| 784 | } | ||
| 785 | |||
| 786 | /* Send cross call to all processors mentioned in MASK_P | ||
| 787 | * except self. Really, there are only two cases currently, | ||
| 788 | * "&cpu_online_map" and "&mm->cpu_vm_mask". | ||
| 761 | */ | 789 | */ |
| 762 | static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, cpumask_t mask) | 790 | static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask) |
| 763 | { | 791 | { |
| 764 | u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff)); | 792 | u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff)); |
| 765 | int this_cpu = get_cpu(); | ||
| 766 | 793 | ||
| 767 | cpus_and(mask, mask, cpu_online_map); | 794 | xcall_deliver(data0, data1, data2, mask); |
| 768 | cpu_clear(this_cpu, mask); | 795 | } |
| 769 | |||
| 770 | if (tlb_type == spitfire) | ||
| 771 | spitfire_xcall_deliver(data0, data1, data2, mask); | ||
| 772 | else if (tlb_type == cheetah || tlb_type == cheetah_plus) | ||
| 773 | cheetah_xcall_deliver(data0, data1, data2, mask); | ||
| 774 | else | ||
| 775 | hypervisor_xcall_deliver(data0, data1, data2, mask); | ||
| 776 | /* NOTE: Caller runs local copy on master. */ | ||
| 777 | 796 | ||
| 778 | put_cpu(); | 797 | /* Send cross call to all processors except self. */ |
| 798 | static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2) | ||
| 799 | { | ||
| 800 | smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map); | ||
| 779 | } | 801 | } |
| 780 | 802 | ||
| 781 | extern unsigned long xcall_sync_tick; | 803 | extern unsigned long xcall_sync_tick; |
| 782 | 804 | ||
| 783 | static void smp_start_sync_tick_client(int cpu) | 805 | static void smp_start_sync_tick_client(int cpu) |
| 784 | { | 806 | { |
| 785 | cpumask_t mask = cpumask_of_cpu(cpu); | 807 | xcall_deliver((u64) &xcall_sync_tick, 0, 0, |
| 786 | 808 | &cpumask_of_cpu(cpu)); | |
| 787 | smp_cross_call_masked(&xcall_sync_tick, | ||
| 788 | 0, 0, 0, mask); | ||
| 789 | } | 809 | } |
| 790 | 810 | ||
| 791 | extern unsigned long xcall_call_function; | 811 | extern unsigned long xcall_call_function; |
| 792 | 812 | ||
| 793 | void arch_send_call_function_ipi(cpumask_t mask) | 813 | void arch_send_call_function_ipi(cpumask_t mask) |
| 794 | { | 814 | { |
| 795 | smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask); | 815 | xcall_deliver((u64) &xcall_call_function, 0, 0, &mask); |
| 796 | } | 816 | } |
| 797 | 817 | ||
| 798 | extern unsigned long xcall_call_function_single; | 818 | extern unsigned long xcall_call_function_single; |
| 799 | 819 | ||
| 800 | void arch_send_call_function_single_ipi(int cpu) | 820 | void arch_send_call_function_single_ipi(int cpu) |
| 801 | { | 821 | { |
| 802 | cpumask_t mask = cpumask_of_cpu(cpu); | 822 | xcall_deliver((u64) &xcall_call_function_single, 0, 0, |
| 803 | 823 | &cpumask_of_cpu(cpu)); | |
| 804 | smp_cross_call_masked(&xcall_call_function_single, 0, 0, 0, mask); | ||
| 805 | } | 824 | } |
| 806 | 825 | ||
| 807 | /* Send cross call to all processors except self. */ | ||
| 808 | #define smp_cross_call(func, ctx, data1, data2) \ | ||
| 809 | smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map) | ||
| 810 | |||
| 811 | void smp_call_function_client(int irq, struct pt_regs *regs) | 826 | void smp_call_function_client(int irq, struct pt_regs *regs) |
| 812 | { | 827 | { |
| 813 | clear_softint(1 << irq); | 828 | clear_softint(1 << irq); |
| @@ -877,7 +892,6 @@ static inline void __local_flush_dcache_page(struct page *page) | |||
| 877 | 892 | ||
| 878 | void smp_flush_dcache_page_impl(struct page *page, int cpu) | 893 | void smp_flush_dcache_page_impl(struct page *page, int cpu) |
| 879 | { | 894 | { |
| 880 | cpumask_t mask = cpumask_of_cpu(cpu); | ||
| 881 | int this_cpu; | 895 | int this_cpu; |
| 882 | 896 | ||
| 883 | if (tlb_type == hypervisor) | 897 | if (tlb_type == hypervisor) |
| @@ -893,29 +907,24 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) | |||
| 893 | __local_flush_dcache_page(page); | 907 | __local_flush_dcache_page(page); |
| 894 | } else if (cpu_online(cpu)) { | 908 | } else if (cpu_online(cpu)) { |
| 895 | void *pg_addr = page_address(page); | 909 | void *pg_addr = page_address(page); |
| 896 | u64 data0; | 910 | u64 data0 = 0; |
| 897 | 911 | ||
| 898 | if (tlb_type == spitfire) { | 912 | if (tlb_type == spitfire) { |
| 899 | data0 = | 913 | data0 = ((u64)&xcall_flush_dcache_page_spitfire); |
| 900 | ((u64)&xcall_flush_dcache_page_spitfire); | ||
| 901 | if (page_mapping(page) != NULL) | 914 | if (page_mapping(page) != NULL) |
| 902 | data0 |= ((u64)1 << 32); | 915 | data0 |= ((u64)1 << 32); |
| 903 | spitfire_xcall_deliver(data0, | ||
| 904 | __pa(pg_addr), | ||
| 905 | (u64) pg_addr, | ||
| 906 | mask); | ||
| 907 | } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { | 916 | } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { |
| 908 | #ifdef DCACHE_ALIASING_POSSIBLE | 917 | #ifdef DCACHE_ALIASING_POSSIBLE |
| 909 | data0 = | 918 | data0 = ((u64)&xcall_flush_dcache_page_cheetah); |
| 910 | ((u64)&xcall_flush_dcache_page_cheetah); | ||
| 911 | cheetah_xcall_deliver(data0, | ||
| 912 | __pa(pg_addr), | ||
| 913 | 0, mask); | ||
| 914 | #endif | 919 | #endif |
| 915 | } | 920 | } |
| 921 | if (data0) { | ||
| 922 | xcall_deliver(data0, __pa(pg_addr), | ||
| 923 | (u64) pg_addr, &cpumask_of_cpu(cpu)); | ||
| 916 | #ifdef CONFIG_DEBUG_DCFLUSH | 924 | #ifdef CONFIG_DEBUG_DCFLUSH |
| 917 | atomic_inc(&dcpage_flushes_xcall); | 925 | atomic_inc(&dcpage_flushes_xcall); |
| 918 | #endif | 926 | #endif |
| 927 | } | ||
| 919 | } | 928 | } |
| 920 | 929 | ||
| 921 | put_cpu(); | 930 | put_cpu(); |
| @@ -923,66 +932,41 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) | |||
| 923 | 932 | ||
| 924 | void flush_dcache_page_all(struct mm_struct *mm, struct page *page) | 933 | void flush_dcache_page_all(struct mm_struct *mm, struct page *page) |
| 925 | { | 934 | { |
| 926 | void *pg_addr = page_address(page); | 935 | void *pg_addr; |
| 927 | cpumask_t mask = cpu_online_map; | ||
| 928 | u64 data0; | ||
| 929 | int this_cpu; | 936 | int this_cpu; |
| 937 | u64 data0; | ||
| 930 | 938 | ||
| 931 | if (tlb_type == hypervisor) | 939 | if (tlb_type == hypervisor) |
| 932 | return; | 940 | return; |
| 933 | 941 | ||
| 934 | this_cpu = get_cpu(); | 942 | this_cpu = get_cpu(); |
| 935 | 943 | ||
| 936 | cpu_clear(this_cpu, mask); | ||
| 937 | |||
| 938 | #ifdef CONFIG_DEBUG_DCFLUSH | 944 | #ifdef CONFIG_DEBUG_DCFLUSH |
| 939 | atomic_inc(&dcpage_flushes); | 945 | atomic_inc(&dcpage_flushes); |
| 940 | #endif | 946 | #endif |
| 941 | if (cpus_empty(mask)) | 947 | data0 = 0; |
| 942 | goto flush_self; | 948 | pg_addr = page_address(page); |
| 943 | if (tlb_type == spitfire) { | 949 | if (tlb_type == spitfire) { |
| 944 | data0 = ((u64)&xcall_flush_dcache_page_spitfire); | 950 | data0 = ((u64)&xcall_flush_dcache_page_spitfire); |
| 945 | if (page_mapping(page) != NULL) | 951 | if (page_mapping(page) != NULL) |
| 946 | data0 |= ((u64)1 << 32); | 952 | data0 |= ((u64)1 << 32); |
| 947 | spitfire_xcall_deliver(data0, | ||
| 948 | __pa(pg_addr), | ||
| 949 | (u64) pg_addr, | ||
| 950 | mask); | ||
| 951 | } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { | 953 | } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { |
| 952 | #ifdef DCACHE_ALIASING_POSSIBLE | 954 | #ifdef DCACHE_ALIASING_POSSIBLE |
| 953 | data0 = ((u64)&xcall_flush_dcache_page_cheetah); | 955 | data0 = ((u64)&xcall_flush_dcache_page_cheetah); |
| 954 | cheetah_xcall_deliver(data0, | ||
| 955 | __pa(pg_addr), | ||
| 956 | 0, mask); | ||
| 957 | #endif | 956 | #endif |
| 958 | } | 957 | } |
| 958 | if (data0) { | ||
| 959 | xcall_deliver(data0, __pa(pg_addr), | ||
| 960 | (u64) pg_addr, &cpu_online_map); | ||
| 959 | #ifdef CONFIG_DEBUG_DCFLUSH | 961 | #ifdef CONFIG_DEBUG_DCFLUSH |
| 960 | atomic_inc(&dcpage_flushes_xcall); | 962 | atomic_inc(&dcpage_flushes_xcall); |
| 961 | #endif | 963 | #endif |
| 962 | flush_self: | 964 | } |
| 963 | __local_flush_dcache_page(page); | 965 | __local_flush_dcache_page(page); |
| 964 | 966 | ||
| 965 | put_cpu(); | 967 | put_cpu(); |
| 966 | } | 968 | } |
| 967 | 969 | ||
| 968 | static void __smp_receive_signal_mask(cpumask_t mask) | ||
| 969 | { | ||
| 970 | smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask); | ||
| 971 | } | ||
| 972 | |||
| 973 | void smp_receive_signal(int cpu) | ||
| 974 | { | ||
| 975 | cpumask_t mask = cpumask_of_cpu(cpu); | ||
| 976 | |||
| 977 | if (cpu_online(cpu)) | ||
| 978 | __smp_receive_signal_mask(mask); | ||
| 979 | } | ||
| 980 | |||
| 981 | void smp_receive_signal_client(int irq, struct pt_regs *regs) | ||
| 982 | { | ||
| 983 | clear_softint(1 << irq); | ||
| 984 | } | ||
| 985 | |||
| 986 | void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) | 970 | void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) |
| 987 | { | 971 | { |
| 988 | struct mm_struct *mm; | 972 | struct mm_struct *mm; |
| @@ -1083,7 +1067,7 @@ void smp_flush_tlb_mm(struct mm_struct *mm) | |||
| 1083 | 1067 | ||
| 1084 | smp_cross_call_masked(&xcall_flush_tlb_mm, | 1068 | smp_cross_call_masked(&xcall_flush_tlb_mm, |
| 1085 | ctx, 0, 0, | 1069 | ctx, 0, 0, |
| 1086 | mm->cpu_vm_mask); | 1070 | &mm->cpu_vm_mask); |
| 1087 | 1071 | ||
| 1088 | local_flush_and_out: | 1072 | local_flush_and_out: |
| 1089 | __flush_tlb_mm(ctx, SECONDARY_CONTEXT); | 1073 | __flush_tlb_mm(ctx, SECONDARY_CONTEXT); |
| @@ -1101,7 +1085,7 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long | |||
| 1101 | else | 1085 | else |
| 1102 | smp_cross_call_masked(&xcall_flush_tlb_pending, | 1086 | smp_cross_call_masked(&xcall_flush_tlb_pending, |
| 1103 | ctx, nr, (unsigned long) vaddrs, | 1087 | ctx, nr, (unsigned long) vaddrs, |
| 1104 | mm->cpu_vm_mask); | 1088 | &mm->cpu_vm_mask); |
| 1105 | 1089 | ||
| 1106 | __flush_tlb_pending(ctx, nr, vaddrs); | 1090 | __flush_tlb_pending(ctx, nr, vaddrs); |
| 1107 | 1091 | ||
| @@ -1202,6 +1186,16 @@ void __devinit smp_prepare_boot_cpu(void) | |||
| 1202 | { | 1186 | { |
| 1203 | } | 1187 | } |
| 1204 | 1188 | ||
| 1189 | void __init smp_setup_processor_id(void) | ||
| 1190 | { | ||
| 1191 | if (tlb_type == spitfire) | ||
| 1192 | xcall_deliver_impl = spitfire_xcall_deliver; | ||
| 1193 | else if (tlb_type == cheetah || tlb_type == cheetah_plus) | ||
| 1194 | xcall_deliver_impl = cheetah_xcall_deliver; | ||
| 1195 | else | ||
| 1196 | xcall_deliver_impl = hypervisor_xcall_deliver; | ||
| 1197 | } | ||
| 1198 | |||
| 1205 | void __devinit smp_fill_in_sib_core_maps(void) | 1199 | void __devinit smp_fill_in_sib_core_maps(void) |
| 1206 | { | 1200 | { |
| 1207 | unsigned int i; | 1201 | unsigned int i; |
| @@ -1370,7 +1364,13 @@ void __init smp_cpus_done(unsigned int max_cpus) | |||
| 1370 | 1364 | ||
| 1371 | void smp_send_reschedule(int cpu) | 1365 | void smp_send_reschedule(int cpu) |
| 1372 | { | 1366 | { |
| 1373 | smp_receive_signal(cpu); | 1367 | xcall_deliver((u64) &xcall_receive_signal, 0, 0, |
| 1368 | &cpumask_of_cpu(cpu)); | ||
| 1369 | } | ||
| 1370 | |||
| 1371 | void smp_receive_signal_client(int irq, struct pt_regs *regs) | ||
| 1372 | { | ||
| 1373 | clear_softint(1 << irq); | ||
| 1374 | } | 1374 | } |
| 1375 | 1375 | ||
| 1376 | /* This is a nop because we capture all other cpus | 1376 | /* This is a nop because we capture all other cpus |
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c index 3547937b17a2..587f8efb2e05 100644 --- a/arch/sparc64/mm/tsb.c +++ b/arch/sparc64/mm/tsb.c | |||
| @@ -1,9 +1,10 @@ | |||
| 1 | /* arch/sparc64/mm/tsb.c | 1 | /* arch/sparc64/mm/tsb.c |
| 2 | * | 2 | * |
| 3 | * Copyright (C) 2006 David S. Miller <davem@davemloft.net> | 3 | * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net> |
| 4 | */ | 4 | */ |
| 5 | 5 | ||
| 6 | #include <linux/kernel.h> | 6 | #include <linux/kernel.h> |
| 7 | #include <linux/preempt.h> | ||
| 7 | #include <asm/system.h> | 8 | #include <asm/system.h> |
| 8 | #include <asm/page.h> | 9 | #include <asm/page.h> |
| 9 | #include <asm/tlbflush.h> | 10 | #include <asm/tlbflush.h> |
| @@ -415,7 +416,9 @@ retry_tsb_alloc: | |||
| 415 | tsb_context_switch(mm); | 416 | tsb_context_switch(mm); |
| 416 | 417 | ||
| 417 | /* Now force other processors to do the same. */ | 418 | /* Now force other processors to do the same. */ |
| 419 | preempt_disable(); | ||
| 418 | smp_tsb_sync(mm); | 420 | smp_tsb_sync(mm); |
| 421 | preempt_enable(); | ||
| 419 | 422 | ||
| 420 | /* Now it is safe to free the old tsb. */ | 423 | /* Now it is safe to free the old tsb. */ |
| 421 | kmem_cache_free(tsb_caches[old_cache_index], old_tsb); | 424 | kmem_cache_free(tsb_caches[old_cache_index], old_tsb); |
