aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-18 06:14:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-18 06:14:34 -0400
commit39dcfa552c3fd4179f470e0b1f716db6241e95d1 (patch)
tree16eb0514faae29bc3adead91571e28a32fc85489
parent7f12b72bd8532cbbfb147470f0d902260cea36ce (diff)
parent14fb57dccb6e1defe9f89a66f548fcb24c374c1d (diff)
Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86, AMD: Fix ARAT feature setting again Revert "x86, AMD: Fix APIC timer erratum 400 affecting K8 Rev.A-E processors" x86, apic: Fix spurious error interrupts triggering on all non-boot APs x86, mce, AMD: Fix leaving freed data in a list x86: Fix UV BAU for non-consecutive nasids x86, UV: Fix NMI handler for UV platforms
-rw-r--r--arch/x86/include/asm/apicdef.h1
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h17
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h2
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h16
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c48
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c12
-rw-r--r--arch/x86/platform/uv/tlb_uv.c92
9 files changed, 147 insertions, 46 deletions
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index d87988bacf3e..34595d5e1038 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -78,6 +78,7 @@
78#define APIC_DEST_LOGICAL 0x00800 78#define APIC_DEST_LOGICAL 0x00800
79#define APIC_DEST_PHYSICAL 0x00000 79#define APIC_DEST_PHYSICAL 0x00000
80#define APIC_DM_FIXED 0x00000 80#define APIC_DM_FIXED 0x00000
81#define APIC_DM_FIXED_MASK 0x00700
81#define APIC_DM_LOWEST 0x00100 82#define APIC_DM_LOWEST 0x00100
82#define APIC_DM_SMI 0x00200 83#define APIC_DM_SMI 0x00200
83#define APIC_DM_REMRD 0x00300 84#define APIC_DM_REMRD 0x00300
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 3e094af443c3..130f1eeee5fe 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -94,6 +94,8 @@
94/* after this # consecutive successes, bump up the throttle if it was lowered */ 94/* after this # consecutive successes, bump up the throttle if it was lowered */
95#define COMPLETE_THRESHOLD 5 95#define COMPLETE_THRESHOLD 5
96 96
97#define UV_LB_SUBNODEID 0x10
98
97/* 99/*
98 * number of entries in the destination side payload queue 100 * number of entries in the destination side payload queue
99 */ 101 */
@@ -124,7 +126,7 @@
124 * The distribution specification (32 bytes) is interpreted as a 256-bit 126 * The distribution specification (32 bytes) is interpreted as a 256-bit
125 * distribution vector. Adjacent bits correspond to consecutive even numbered 127 * distribution vector. Adjacent bits correspond to consecutive even numbered
126 * nodeIDs. The result of adding the index of a given bit to the 15-bit 128 * nodeIDs. The result of adding the index of a given bit to the 15-bit
127 * 'base_dest_nodeid' field of the header corresponds to the 129 * 'base_dest_nasid' field of the header corresponds to the
128 * destination nodeID associated with that specified bit. 130 * destination nodeID associated with that specified bit.
129 */ 131 */
130struct bau_target_uvhubmask { 132struct bau_target_uvhubmask {
@@ -176,7 +178,7 @@ struct bau_msg_payload {
176struct bau_msg_header { 178struct bau_msg_header {
177 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 179 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
178 /* bits 5:0 */ 180 /* bits 5:0 */
179 unsigned int base_dest_nodeid:15; /* nasid of the */ 181 unsigned int base_dest_nasid:15; /* nasid of the */
180 /* bits 20:6 */ /* first bit in uvhub map */ 182 /* bits 20:6 */ /* first bit in uvhub map */
181 unsigned int command:8; /* message type */ 183 unsigned int command:8; /* message type */
182 /* bits 28:21 */ 184 /* bits 28:21 */
@@ -378,6 +380,10 @@ struct ptc_stats {
378 unsigned long d_rcanceled; /* number of messages canceled by resets */ 380 unsigned long d_rcanceled; /* number of messages canceled by resets */
379}; 381};
380 382
383struct hub_and_pnode {
384 short uvhub;
385 short pnode;
386};
381/* 387/*
382 * one per-cpu; to locate the software tables 388 * one per-cpu; to locate the software tables
383 */ 389 */
@@ -399,10 +405,12 @@ struct bau_control {
399 int baudisabled; 405 int baudisabled;
400 int set_bau_off; 406 int set_bau_off;
401 short cpu; 407 short cpu;
408 short osnode;
402 short uvhub_cpu; 409 short uvhub_cpu;
403 short uvhub; 410 short uvhub;
404 short cpus_in_socket; 411 short cpus_in_socket;
405 short cpus_in_uvhub; 412 short cpus_in_uvhub;
413 short partition_base_pnode;
406 unsigned short message_number; 414 unsigned short message_number;
407 unsigned short uvhub_quiesce; 415 unsigned short uvhub_quiesce;
408 short socket_acknowledge_count[DEST_Q_SIZE]; 416 short socket_acknowledge_count[DEST_Q_SIZE];
@@ -422,15 +430,16 @@ struct bau_control {
422 int congested_period; 430 int congested_period;
423 cycles_t period_time; 431 cycles_t period_time;
424 long period_requests; 432 long period_requests;
433 struct hub_and_pnode *target_hub_and_pnode;
425}; 434};
426 435
427static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) 436static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
428{ 437{
429 return constant_test_bit(uvhub, &dstp->bits[0]); 438 return constant_test_bit(uvhub, &dstp->bits[0]);
430} 439}
431static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) 440static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp)
432{ 441{
433 __set_bit(uvhub, &dstp->bits[0]); 442 __set_bit(pnode, &dstp->bits[0]);
434} 443}
435static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, 444static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
436 int nbits) 445 int nbits)
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index a501741c2335..4298002d0c83 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -398,6 +398,8 @@ struct uv_blade_info {
398 unsigned short nr_online_cpus; 398 unsigned short nr_online_cpus;
399 unsigned short pnode; 399 unsigned short pnode;
400 short memory_nid; 400 short memory_nid;
401 spinlock_t nmi_lock;
402 unsigned long nmi_count;
401}; 403};
402extern struct uv_blade_info *uv_blade_info; 404extern struct uv_blade_info *uv_blade_info;
403extern short *uv_node_to_blade; 405extern short *uv_node_to_blade;
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 20cafeac7455..f5bb64a823d7 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV MMR definitions 6 * SGI UV MMR definitions
7 * 7 *
8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#ifndef _ASM_X86_UV_UV_MMRS_H 11#ifndef _ASM_X86_UV_UV_MMRS_H
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
1099 } s; 1099 } s;
1100}; 1100};
1101 1101
1102/* ========================================================================= */
1103/* UVH_SCRATCH5 */
1104/* ========================================================================= */
1105#define UVH_SCRATCH5 0x2d0200UL
1106#define UVH_SCRATCH5_32 0x00778
1107
1108#define UVH_SCRATCH5_SCRATCH5_SHFT 0
1109#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
1110union uvh_scratch5_u {
1111 unsigned long v;
1112 struct uvh_scratch5_s {
1113 unsigned long scratch5 : 64; /* RW, W1CS */
1114 } s;
1115};
1102 1116
1103#endif /* __ASM_UV_MMRS_X86_H__ */ 1117#endif /* __ASM_UV_MMRS_X86_H__ */
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 33b10a0fc095..7acd2d2ac965 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -37,6 +37,13 @@
37#include <asm/smp.h> 37#include <asm/smp.h>
38#include <asm/x86_init.h> 38#include <asm/x86_init.h>
39#include <asm/emergency-restart.h> 39#include <asm/emergency-restart.h>
40#include <asm/nmi.h>
41
42/* BMC sets a bit this MMR non-zero before sending an NMI */
43#define UVH_NMI_MMR UVH_SCRATCH5
44#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8)
45#define UV_NMI_PENDING_MASK (1UL << 63)
46DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
40 47
41DEFINE_PER_CPU(int, x2apic_extra_bits); 48DEFINE_PER_CPU(int, x2apic_extra_bits);
42 49
@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void)
642 */ 649 */
643int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) 650int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
644{ 651{
652 unsigned long real_uv_nmi;
653 int bid;
654
645 if (reason != DIE_NMIUNKNOWN) 655 if (reason != DIE_NMIUNKNOWN)
646 return NOTIFY_OK; 656 return NOTIFY_OK;
647 657
648 if (in_crash_kexec) 658 if (in_crash_kexec)
649 /* do nothing if entering the crash kernel */ 659 /* do nothing if entering the crash kernel */
650 return NOTIFY_OK; 660 return NOTIFY_OK;
661
651 /* 662 /*
652 * Use a lock so only one cpu prints at a time 663 * Each blade has an MMR that indicates when an NMI has been sent
653 * to prevent intermixed output. 664 * to cpus on the blade. If an NMI is detected, atomically
665 * clear the MMR and update a per-blade NMI count used to
666 * cause each cpu on the blade to notice a new NMI.
667 */
668 bid = uv_numa_blade_id();
669 real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
670
671 if (unlikely(real_uv_nmi)) {
672 spin_lock(&uv_blade_info[bid].nmi_lock);
673 real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
674 if (real_uv_nmi) {
675 uv_blade_info[bid].nmi_count++;
676 uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
677 }
678 spin_unlock(&uv_blade_info[bid].nmi_lock);
679 }
680
681 if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
682 return NOTIFY_DONE;
683
684 __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
685
686 /*
687 * Use a lock so only one cpu prints at a time.
688 * This prevents intermixed output.
654 */ 689 */
655 spin_lock(&uv_nmi_lock); 690 spin_lock(&uv_nmi_lock);
656 pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); 691 pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
657 dump_stack(); 692 dump_stack();
658 spin_unlock(&uv_nmi_lock); 693 spin_unlock(&uv_nmi_lock);
659 694
@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
661} 696}
662 697
663static struct notifier_block uv_dump_stack_nmi_nb = { 698static struct notifier_block uv_dump_stack_nmi_nb = {
664 .notifier_call = uv_handle_nmi 699 .notifier_call = uv_handle_nmi,
700 .priority = NMI_LOCAL_LOW_PRIOR - 1,
665}; 701};
666 702
667void uv_register_nmi_notifier(void) 703void uv_register_nmi_notifier(void)
@@ -720,8 +756,9 @@ void __init uv_system_init(void)
720 printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); 756 printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
721 757
722 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); 758 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
723 uv_blade_info = kmalloc(bytes, GFP_KERNEL); 759 uv_blade_info = kzalloc(bytes, GFP_KERNEL);
724 BUG_ON(!uv_blade_info); 760 BUG_ON(!uv_blade_info);
761
725 for (blade = 0; blade < uv_num_possible_blades(); blade++) 762 for (blade = 0; blade < uv_num_possible_blades(); blade++)
726 uv_blade_info[blade].memory_nid = -1; 763 uv_blade_info[blade].memory_nid = -1;
727 764
@@ -747,6 +784,7 @@ void __init uv_system_init(void)
747 uv_blade_info[blade].pnode = pnode; 784 uv_blade_info[blade].pnode = pnode;
748 uv_blade_info[blade].nr_possible_cpus = 0; 785 uv_blade_info[blade].nr_possible_cpus = 0;
749 uv_blade_info[blade].nr_online_cpus = 0; 786 uv_blade_info[blade].nr_online_cpus = 0;
787 spin_lock_init(&uv_blade_info[blade].nmi_lock);
750 max_pnode = max(pnode, max_pnode); 788 max_pnode = max(pnode, max_pnode);
751 blade++; 789 blade++;
752 } 790 }
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb9eb29a52dd..6f9d1f6063e9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
613#endif 613#endif
614 614
615 /* As a rule processors have APIC timer running in deep C states */ 615 /* As a rule processors have APIC timer running in deep C states */
616 if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) 616 if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400))
617 set_cpu_cap(c, X86_FEATURE_ARAT); 617 set_cpu_cap(c, X86_FEATURE_ARAT);
618 618
619 /* 619 /*
@@ -698,7 +698,7 @@ cpu_dev_register(amd_cpu_dev);
698 */ 698 */
699 699
700const int amd_erratum_400[] = 700const int amd_erratum_400[] =
701 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf), 701 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
702 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); 702 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
703EXPORT_SYMBOL_GPL(amd_erratum_400); 703EXPORT_SYMBOL_GPL(amd_erratum_400);
704 704
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 167f97b5596e..bb0adad35143 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -509,6 +509,7 @@ recurse:
509out_free: 509out_free:
510 if (b) { 510 if (b) {
511 kobject_put(&b->kobj); 511 kobject_put(&b->kobj);
512 list_del(&b->miscj);
512 kfree(b); 513 kfree(b);
513 } 514 }
514 return err; 515 return err;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6f8c5e9da97f..0f034460260d 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -446,18 +446,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
446 */ 446 */
447 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 447 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
448 448
449 h = lvtthmr_init;
449 /* 450 /*
450 * The initial value of thermal LVT entries on all APs always reads 451 * The initial value of thermal LVT entries on all APs always reads
451 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI 452 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
452 * sequence to them and LVT registers are reset to 0s except for 453 * sequence to them and LVT registers are reset to 0s except for
453 * the mask bits which are set to 1s when APs receive INIT IPI. 454 * the mask bits which are set to 1s when APs receive INIT IPI.
454 * Always restore the value that BIOS has programmed on AP based on 455 * If BIOS takes over the thermal interrupt and sets its interrupt
455 * BSP's info we saved since BIOS is always setting the same value 456 * delivery mode to SMI (not fixed), it restores the value that the
456 * for all threads/cores 457 * BIOS has programmed on AP based on BSP's info we saved since BIOS
458 * is always setting the same value for all threads/cores.
457 */ 459 */
458 apic_write(APIC_LVTTHMR, lvtthmr_init); 460 if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
461 apic_write(APIC_LVTTHMR, lvtthmr_init);
459 462
460 h = lvtthmr_init;
461 463
462 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { 464 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
463 printk(KERN_DEBUG 465 printk(KERN_DEBUG
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 7cb6424317f6..c58e0ea39ef5 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
699 struct mm_struct *mm, 699 struct mm_struct *mm,
700 unsigned long va, unsigned int cpu) 700 unsigned long va, unsigned int cpu)
701{ 701{
702 int tcpu;
703 int uvhub;
704 int locals = 0; 702 int locals = 0;
705 int remotes = 0; 703 int remotes = 0;
706 int hubs = 0; 704 int hubs = 0;
705 int tcpu;
706 int tpnode;
707 struct bau_desc *bau_desc; 707 struct bau_desc *bau_desc;
708 struct cpumask *flush_mask; 708 struct cpumask *flush_mask;
709 struct ptc_stats *stat; 709 struct ptc_stats *stat;
710 struct bau_control *bcp; 710 struct bau_control *bcp;
711 struct bau_control *tbcp; 711 struct bau_control *tbcp;
712 struct hub_and_pnode *hpp;
712 713
713 /* kernel was booted 'nobau' */ 714 /* kernel was booted 'nobau' */
714 if (nobau) 715 if (nobau)
@@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
750 bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; 751 bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
751 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 752 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
752 753
753 /* cpu statistics */
754 for_each_cpu(tcpu, flush_mask) { 754 for_each_cpu(tcpu, flush_mask) {
755 uvhub = uv_cpu_to_blade_id(tcpu); 755 /*
756 bau_uvhub_set(uvhub, &bau_desc->distribution); 756 * The distribution vector is a bit map of pnodes, relative
757 if (uvhub == bcp->uvhub) 757 * to the partition base pnode (and the partition base nasid
758 * in the header).
759 * Translate cpu to pnode and hub using an array stored
760 * in local memory.
761 */
762 hpp = &bcp->socket_master->target_hub_and_pnode[tcpu];
763 tpnode = hpp->pnode - bcp->partition_base_pnode;
764 bau_uvhub_set(tpnode, &bau_desc->distribution);
765 if (hpp->uvhub == bcp->uvhub)
758 locals++; 766 locals++;
759 else 767 else
760 remotes++; 768 remotes++;
@@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
855 * an interrupt, but causes an error message to be returned to 863 * an interrupt, but causes an error message to be returned to
856 * the sender. 864 * the sender.
857 */ 865 */
858static void uv_enable_timeouts(void) 866static void __init uv_enable_timeouts(void)
859{ 867{
860 int uvhub; 868 int uvhub;
861 int nuvhubs; 869 int nuvhubs;
@@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void)
1326} 1334}
1327 1335
1328/* 1336/*
1329 * initialize the sending side's sending buffers 1337 * Initialize the sending side's sending buffers.
1330 */ 1338 */
1331static void 1339static void
1332uv_activation_descriptor_init(int node, int pnode) 1340uv_activation_descriptor_init(int node, int pnode, int base_pnode)
1333{ 1341{
1334 int i; 1342 int i;
1335 int cpu; 1343 int cpu;
@@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode)
1352 n = pa >> uv_nshift; 1360 n = pa >> uv_nshift;
1353 m = pa & uv_mmask; 1361 m = pa & uv_mmask;
1354 1362
1363 /* the 14-bit pnode */
1355 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, 1364 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
1356 (n << UV_DESC_BASE_PNODE_SHIFT | m)); 1365 (n << UV_DESC_BASE_PNODE_SHIFT | m));
1357
1358 /* 1366 /*
1359 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each 1367 * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
1360 * cpu even though we only use the first one; one descriptor can 1368 * cpu even though we only use the first one; one descriptor can
1361 * describe a broadcast to 256 uv hubs. 1369 * describe a broadcast to 256 uv hubs.
1362 */ 1370 */
@@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode)
1365 memset(bd2, 0, sizeof(struct bau_desc)); 1373 memset(bd2, 0, sizeof(struct bau_desc));
1366 bd2->header.sw_ack_flag = 1; 1374 bd2->header.sw_ack_flag = 1;
1367 /* 1375 /*
1368 * base_dest_nodeid is the nasid of the first uvhub 1376 * The base_dest_nasid set in the message header is the nasid
1369 * in the partition. The bit map will indicate uvhub numbers, 1377 * of the first uvhub in the partition. The bit map will
1370 * which are 0-N in a partition. Pnodes are unique system-wide. 1378 * indicate destination pnode numbers relative to that base.
1379 * They may not be consecutive if nasid striding is being used.
1371 */ 1380 */
1372 bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode); 1381 bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
1373 bd2->header.dest_subnodeid = 0x10; /* the LB */ 1382 bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
1374 bd2->header.command = UV_NET_ENDPOINT_INTD; 1383 bd2->header.command = UV_NET_ENDPOINT_INTD;
1375 bd2->header.int_both = 1; 1384 bd2->header.int_both = 1;
1376 /* 1385 /*
@@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode)
1442/* 1451/*
1443 * Initialization of each UV hub's structures 1452 * Initialization of each UV hub's structures
1444 */ 1453 */
1445static void __init uv_init_uvhub(int uvhub, int vector) 1454static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode)
1446{ 1455{
1447 int node; 1456 int node;
1448 int pnode; 1457 int pnode;
@@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector)
1450 1459
1451 node = uvhub_to_first_node(uvhub); 1460 node = uvhub_to_first_node(uvhub);
1452 pnode = uv_blade_to_pnode(uvhub); 1461 pnode = uv_blade_to_pnode(uvhub);
1453 uv_activation_descriptor_init(node, pnode); 1462 uv_activation_descriptor_init(node, pnode, base_pnode);
1454 uv_payload_queue_init(node, pnode); 1463 uv_payload_queue_init(node, pnode);
1455 /* 1464 /*
1456 * the below initialization can't be in firmware because the 1465 * The below initialization can't be in firmware because the
1457 * messaging IRQ will be determined by the OS 1466 * messaging IRQ will be determined by the OS.
1458 */ 1467 */
1459 apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; 1468 apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
1460 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, 1469 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@@ -1491,10 +1500,11 @@ calculate_destination_timeout(void)
1491/* 1500/*
1492 * initialize the bau_control structure for each cpu 1501 * initialize the bau_control structure for each cpu
1493 */ 1502 */
1494static int __init uv_init_per_cpu(int nuvhubs) 1503static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode)
1495{ 1504{
1496 int i; 1505 int i;
1497 int cpu; 1506 int cpu;
1507 int tcpu;
1498 int pnode; 1508 int pnode;
1499 int uvhub; 1509 int uvhub;
1500 int have_hmaster; 1510 int have_hmaster;
@@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs)
1528 bcp = &per_cpu(bau_control, cpu); 1538 bcp = &per_cpu(bau_control, cpu);
1529 memset(bcp, 0, sizeof(struct bau_control)); 1539 memset(bcp, 0, sizeof(struct bau_control));
1530 pnode = uv_cpu_hub_info(cpu)->pnode; 1540 pnode = uv_cpu_hub_info(cpu)->pnode;
1541 if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) {
1542 printk(KERN_EMERG
1543 "cpu %d pnode %d-%d beyond %d; BAU disabled\n",
1544 cpu, pnode, base_part_pnode,
1545 UV_DISTRIBUTION_SIZE);
1546 return 1;
1547 }
1548 bcp->osnode = cpu_to_node(cpu);
1549 bcp->partition_base_pnode = uv_partition_base_pnode;
1531 uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; 1550 uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
1532 *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); 1551 *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
1533 bdp = &uvhub_descs[uvhub]; 1552 bdp = &uvhub_descs[uvhub];
@@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs)
1536 bdp->pnode = pnode; 1555 bdp->pnode = pnode;
1537 /* kludge: 'assuming' one node per socket, and assuming that 1556 /* kludge: 'assuming' one node per socket, and assuming that
1538 disabling a socket just leaves a gap in node numbers */ 1557 disabling a socket just leaves a gap in node numbers */
1539 socket = (cpu_to_node(cpu) & 1); 1558 socket = bcp->osnode & 1;
1540 bdp->socket_mask |= (1 << socket); 1559 bdp->socket_mask |= (1 << socket);
1541 sdp = &bdp->socket[socket]; 1560 sdp = &bdp->socket[socket];
1542 sdp->cpu_number[sdp->num_cpus] = cpu; 1561 sdp->cpu_number[sdp->num_cpus] = cpu;
@@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs)
1585nextsocket: 1604nextsocket:
1586 socket++; 1605 socket++;
1587 socket_mask = (socket_mask >> 1); 1606 socket_mask = (socket_mask >> 1);
1607 /* each socket gets a local array of pnodes/hubs */
1608 bcp = smaster;
1609 bcp->target_hub_and_pnode = kmalloc_node(
1610 sizeof(struct hub_and_pnode) *
1611 num_possible_cpus(), GFP_KERNEL, bcp->osnode);
1612 memset(bcp->target_hub_and_pnode, 0,
1613 sizeof(struct hub_and_pnode) *
1614 num_possible_cpus());
1615 for_each_present_cpu(tcpu) {
1616 bcp->target_hub_and_pnode[tcpu].pnode =
1617 uv_cpu_hub_info(tcpu)->pnode;
1618 bcp->target_hub_and_pnode[tcpu].uvhub =
1619 uv_cpu_hub_info(tcpu)->numa_blade_id;
1620 }
1588 } 1621 }
1589 } 1622 }
1590 kfree(uvhub_descs); 1623 kfree(uvhub_descs);
@@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void)
1637 spin_lock_init(&disable_lock); 1670 spin_lock_init(&disable_lock);
1638 congested_cycles = microsec_2_cycles(congested_response_us); 1671 congested_cycles = microsec_2_cycles(congested_response_us);
1639 1672
1640 if (uv_init_per_cpu(nuvhubs)) {
1641 nobau = 1;
1642 return 0;
1643 }
1644
1645 uv_partition_base_pnode = 0x7fffffff; 1673 uv_partition_base_pnode = 0x7fffffff;
1646 for (uvhub = 0; uvhub < nuvhubs; uvhub++) 1674 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
1647 if (uv_blade_nr_possible_cpus(uvhub) && 1675 if (uv_blade_nr_possible_cpus(uvhub) &&
1648 (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) 1676 (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
1649 uv_partition_base_pnode = uv_blade_to_pnode(uvhub); 1677 uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
1678 }
1679
1680 if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) {
1681 nobau = 1;
1682 return 0;
1683 }
1650 1684
1651 vector = UV_BAU_MESSAGE; 1685 vector = UV_BAU_MESSAGE;
1652 for_each_possible_blade(uvhub) 1686 for_each_possible_blade(uvhub)
1653 if (uv_blade_nr_possible_cpus(uvhub)) 1687 if (uv_blade_nr_possible_cpus(uvhub))
1654 uv_init_uvhub(uvhub, vector); 1688 uv_init_uvhub(uvhub, vector, uv_partition_base_pnode);
1655 1689
1656 uv_enable_timeouts(); 1690 uv_enable_timeouts();
1657 alloc_intr_gate(vector, uv_bau_message_intr1); 1691 alloc_intr_gate(vector, uv_bau_message_intr1);