diff options
Diffstat (limited to 'arch/x86')
26 files changed, 352 insertions, 123 deletions
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index cae3feb1035..db75d07c364 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c | |||
@@ -91,7 +91,7 @@ static int detect_memory_e801(void) | |||
91 | if (oreg.ax > 15*1024) { | 91 | if (oreg.ax > 15*1024) { |
92 | return -1; /* Bogus! */ | 92 | return -1; /* Bogus! */ |
93 | } else if (oreg.ax == 15*1024) { | 93 | } else if (oreg.ax == 15*1024) { |
94 | boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax; | 94 | boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax; |
95 | } else { | 95 | } else { |
96 | /* | 96 | /* |
97 | * This ignores memory above 16MB if we have a memory | 97 | * This ignores memory above 16MB if we have a memory |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index d87988bacf3..34595d5e103 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
@@ -78,6 +78,7 @@ | |||
78 | #define APIC_DEST_LOGICAL 0x00800 | 78 | #define APIC_DEST_LOGICAL 0x00800 |
79 | #define APIC_DEST_PHYSICAL 0x00000 | 79 | #define APIC_DEST_PHYSICAL 0x00000 |
80 | #define APIC_DM_FIXED 0x00000 | 80 | #define APIC_DM_FIXED 0x00000 |
81 | #define APIC_DM_FIXED_MASK 0x00700 | ||
81 | #define APIC_DM_LOWEST 0x00100 | 82 | #define APIC_DM_LOWEST 0x00100 |
82 | #define APIC_DM_SMI 0x00200 | 83 | #define APIC_DM_SMI 0x00200 |
83 | #define APIC_DM_REMRD 0x00300 | 84 | #define APIC_DM_REMRD 0x00300 |
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index c4bd267dfc5..a97a240f67f 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -150,7 +150,7 @@ void setup_IO_APIC_irq_extra(u32 gsi); | |||
150 | extern void ioapic_and_gsi_init(void); | 150 | extern void ioapic_and_gsi_init(void); |
151 | extern void ioapic_insert_resources(void); | 151 | extern void ioapic_insert_resources(void); |
152 | 152 | ||
153 | int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr); | 153 | int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); |
154 | 154 | ||
155 | extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); | 155 | extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); |
156 | extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); | 156 | extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 7db7723d1f3..d56187c6b83 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
299 | /* Install a pte for a particular vaddr in kernel space. */ | 299 | /* Install a pte for a particular vaddr in kernel space. */ |
300 | void set_pte_vaddr(unsigned long vaddr, pte_t pte); | 300 | void set_pte_vaddr(unsigned long vaddr, pte_t pte); |
301 | 301 | ||
302 | extern void native_pagetable_reserve(u64 start, u64 end); | ||
302 | #ifdef CONFIG_X86_32 | 303 | #ifdef CONFIG_X86_32 |
303 | extern void native_pagetable_setup_start(pgd_t *base); | 304 | extern void native_pagetable_setup_start(pgd_t *base); |
304 | extern void native_pagetable_setup_done(pgd_t *base); | 305 | extern void native_pagetable_setup_done(pgd_t *base); |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 3e094af443c..130f1eeee5f 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -94,6 +94,8 @@ | |||
94 | /* after this # consecutive successes, bump up the throttle if it was lowered */ | 94 | /* after this # consecutive successes, bump up the throttle if it was lowered */ |
95 | #define COMPLETE_THRESHOLD 5 | 95 | #define COMPLETE_THRESHOLD 5 |
96 | 96 | ||
97 | #define UV_LB_SUBNODEID 0x10 | ||
98 | |||
97 | /* | 99 | /* |
98 | * number of entries in the destination side payload queue | 100 | * number of entries in the destination side payload queue |
99 | */ | 101 | */ |
@@ -124,7 +126,7 @@ | |||
124 | * The distribution specification (32 bytes) is interpreted as a 256-bit | 126 | * The distribution specification (32 bytes) is interpreted as a 256-bit |
125 | * distribution vector. Adjacent bits correspond to consecutive even numbered | 127 | * distribution vector. Adjacent bits correspond to consecutive even numbered |
126 | * nodeIDs. The result of adding the index of a given bit to the 15-bit | 128 | * nodeIDs. The result of adding the index of a given bit to the 15-bit |
127 | * 'base_dest_nodeid' field of the header corresponds to the | 129 | * 'base_dest_nasid' field of the header corresponds to the |
128 | * destination nodeID associated with that specified bit. | 130 | * destination nodeID associated with that specified bit. |
129 | */ | 131 | */ |
130 | struct bau_target_uvhubmask { | 132 | struct bau_target_uvhubmask { |
@@ -176,7 +178,7 @@ struct bau_msg_payload { | |||
176 | struct bau_msg_header { | 178 | struct bau_msg_header { |
177 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ | 179 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ |
178 | /* bits 5:0 */ | 180 | /* bits 5:0 */ |
179 | unsigned int base_dest_nodeid:15; /* nasid of the */ | 181 | unsigned int base_dest_nasid:15; /* nasid of the */ |
180 | /* bits 20:6 */ /* first bit in uvhub map */ | 182 | /* bits 20:6 */ /* first bit in uvhub map */ |
181 | unsigned int command:8; /* message type */ | 183 | unsigned int command:8; /* message type */ |
182 | /* bits 28:21 */ | 184 | /* bits 28:21 */ |
@@ -378,6 +380,10 @@ struct ptc_stats { | |||
378 | unsigned long d_rcanceled; /* number of messages canceled by resets */ | 380 | unsigned long d_rcanceled; /* number of messages canceled by resets */ |
379 | }; | 381 | }; |
380 | 382 | ||
383 | struct hub_and_pnode { | ||
384 | short uvhub; | ||
385 | short pnode; | ||
386 | }; | ||
381 | /* | 387 | /* |
382 | * one per-cpu; to locate the software tables | 388 | * one per-cpu; to locate the software tables |
383 | */ | 389 | */ |
@@ -399,10 +405,12 @@ struct bau_control { | |||
399 | int baudisabled; | 405 | int baudisabled; |
400 | int set_bau_off; | 406 | int set_bau_off; |
401 | short cpu; | 407 | short cpu; |
408 | short osnode; | ||
402 | short uvhub_cpu; | 409 | short uvhub_cpu; |
403 | short uvhub; | 410 | short uvhub; |
404 | short cpus_in_socket; | 411 | short cpus_in_socket; |
405 | short cpus_in_uvhub; | 412 | short cpus_in_uvhub; |
413 | short partition_base_pnode; | ||
406 | unsigned short message_number; | 414 | unsigned short message_number; |
407 | unsigned short uvhub_quiesce; | 415 | unsigned short uvhub_quiesce; |
408 | short socket_acknowledge_count[DEST_Q_SIZE]; | 416 | short socket_acknowledge_count[DEST_Q_SIZE]; |
@@ -422,15 +430,16 @@ struct bau_control { | |||
422 | int congested_period; | 430 | int congested_period; |
423 | cycles_t period_time; | 431 | cycles_t period_time; |
424 | long period_requests; | 432 | long period_requests; |
433 | struct hub_and_pnode *target_hub_and_pnode; | ||
425 | }; | 434 | }; |
426 | 435 | ||
427 | static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) | 436 | static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) |
428 | { | 437 | { |
429 | return constant_test_bit(uvhub, &dstp->bits[0]); | 438 | return constant_test_bit(uvhub, &dstp->bits[0]); |
430 | } | 439 | } |
431 | static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) | 440 | static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp) |
432 | { | 441 | { |
433 | __set_bit(uvhub, &dstp->bits[0]); | 442 | __set_bit(pnode, &dstp->bits[0]); |
434 | } | 443 | } |
435 | static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, | 444 | static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, |
436 | int nbits) | 445 | int nbits) |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index a501741c233..4298002d0c8 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -398,6 +398,8 @@ struct uv_blade_info { | |||
398 | unsigned short nr_online_cpus; | 398 | unsigned short nr_online_cpus; |
399 | unsigned short pnode; | 399 | unsigned short pnode; |
400 | short memory_nid; | 400 | short memory_nid; |
401 | spinlock_t nmi_lock; | ||
402 | unsigned long nmi_count; | ||
401 | }; | 403 | }; |
402 | extern struct uv_blade_info *uv_blade_info; | 404 | extern struct uv_blade_info *uv_blade_info; |
403 | extern short *uv_node_to_blade; | 405 | extern short *uv_node_to_blade; |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 20cafeac745..f5bb64a823d 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * SGI UV MMR definitions | 6 | * SGI UV MMR definitions |
7 | * | 7 | * |
8 | * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #ifndef _ASM_X86_UV_UV_MMRS_H | 11 | #ifndef _ASM_X86_UV_UV_MMRS_H |
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u { | |||
1099 | } s; | 1099 | } s; |
1100 | }; | 1100 | }; |
1101 | 1101 | ||
1102 | /* ========================================================================= */ | ||
1103 | /* UVH_SCRATCH5 */ | ||
1104 | /* ========================================================================= */ | ||
1105 | #define UVH_SCRATCH5 0x2d0200UL | ||
1106 | #define UVH_SCRATCH5_32 0x00778 | ||
1107 | |||
1108 | #define UVH_SCRATCH5_SCRATCH5_SHFT 0 | ||
1109 | #define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL | ||
1110 | union uvh_scratch5_u { | ||
1111 | unsigned long v; | ||
1112 | struct uvh_scratch5_s { | ||
1113 | unsigned long scratch5 : 64; /* RW, W1CS */ | ||
1114 | } s; | ||
1115 | }; | ||
1102 | 1116 | ||
1103 | #endif /* __ASM_UV_MMRS_X86_H__ */ | 1117 | #endif /* __ASM_UV_MMRS_X86_H__ */ |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 643ebf2e2ad..d3d859035af 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -68,6 +68,17 @@ struct x86_init_oem { | |||
68 | }; | 68 | }; |
69 | 69 | ||
70 | /** | 70 | /** |
71 | * struct x86_init_mapping - platform specific initial kernel pagetable setup | ||
72 | * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage | ||
73 | * | ||
74 | * For more details on the purpose of this hook, look in | ||
75 | * init_memory_mapping and the commit that added it. | ||
76 | */ | ||
77 | struct x86_init_mapping { | ||
78 | void (*pagetable_reserve)(u64 start, u64 end); | ||
79 | }; | ||
80 | |||
81 | /** | ||
71 | * struct x86_init_paging - platform specific paging functions | 82 | * struct x86_init_paging - platform specific paging functions |
72 | * @pagetable_setup_start: platform specific pre paging_init() call | 83 | * @pagetable_setup_start: platform specific pre paging_init() call |
73 | * @pagetable_setup_done: platform specific post paging_init() call | 84 | * @pagetable_setup_done: platform specific post paging_init() call |
@@ -123,6 +134,7 @@ struct x86_init_ops { | |||
123 | struct x86_init_mpparse mpparse; | 134 | struct x86_init_mpparse mpparse; |
124 | struct x86_init_irqs irqs; | 135 | struct x86_init_irqs irqs; |
125 | struct x86_init_oem oem; | 136 | struct x86_init_oem oem; |
137 | struct x86_init_mapping mapping; | ||
126 | struct x86_init_paging paging; | 138 | struct x86_init_paging paging; |
127 | struct x86_init_timers timers; | 139 | struct x86_init_timers timers; |
128 | struct x86_init_iommu iommu; | 140 | struct x86_init_iommu iommu; |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 68df09bba92..45fd33d1fd3 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -128,8 +128,8 @@ static int __init parse_noapic(char *str) | |||
128 | } | 128 | } |
129 | early_param("noapic", parse_noapic); | 129 | early_param("noapic", parse_noapic); |
130 | 130 | ||
131 | static int io_apic_setup_irq_pin_once(unsigned int irq, int node, | 131 | static int io_apic_setup_irq_pin(unsigned int irq, int node, |
132 | struct io_apic_irq_attr *attr); | 132 | struct io_apic_irq_attr *attr); |
133 | 133 | ||
134 | /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ | 134 | /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ |
135 | void mp_save_irq(struct mpc_intsrc *m) | 135 | void mp_save_irq(struct mpc_intsrc *m) |
@@ -3570,7 +3570,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3570 | } | 3570 | } |
3571 | #endif /* CONFIG_HT_IRQ */ | 3571 | #endif /* CONFIG_HT_IRQ */ |
3572 | 3572 | ||
3573 | int | 3573 | static int |
3574 | io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) | 3574 | io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) |
3575 | { | 3575 | { |
3576 | struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); | 3576 | struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); |
@@ -3585,8 +3585,8 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) | |||
3585 | return ret; | 3585 | return ret; |
3586 | } | 3586 | } |
3587 | 3587 | ||
3588 | static int io_apic_setup_irq_pin_once(unsigned int irq, int node, | 3588 | int io_apic_setup_irq_pin_once(unsigned int irq, int node, |
3589 | struct io_apic_irq_attr *attr) | 3589 | struct io_apic_irq_attr *attr) |
3590 | { | 3590 | { |
3591 | unsigned int id = attr->ioapic, pin = attr->ioapic_pin; | 3591 | unsigned int id = attr->ioapic, pin = attr->ioapic_pin; |
3592 | int ret; | 3592 | int ret; |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 33b10a0fc09..7acd2d2ac96 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -37,6 +37,13 @@ | |||
37 | #include <asm/smp.h> | 37 | #include <asm/smp.h> |
38 | #include <asm/x86_init.h> | 38 | #include <asm/x86_init.h> |
39 | #include <asm/emergency-restart.h> | 39 | #include <asm/emergency-restart.h> |
40 | #include <asm/nmi.h> | ||
41 | |||
42 | /* BMC sets a bit this MMR non-zero before sending an NMI */ | ||
43 | #define UVH_NMI_MMR UVH_SCRATCH5 | ||
44 | #define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) | ||
45 | #define UV_NMI_PENDING_MASK (1UL << 63) | ||
46 | DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); | ||
40 | 47 | ||
41 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 48 | DEFINE_PER_CPU(int, x2apic_extra_bits); |
42 | 49 | ||
@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void) | |||
642 | */ | 649 | */ |
643 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | 650 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) |
644 | { | 651 | { |
652 | unsigned long real_uv_nmi; | ||
653 | int bid; | ||
654 | |||
645 | if (reason != DIE_NMIUNKNOWN) | 655 | if (reason != DIE_NMIUNKNOWN) |
646 | return NOTIFY_OK; | 656 | return NOTIFY_OK; |
647 | 657 | ||
648 | if (in_crash_kexec) | 658 | if (in_crash_kexec) |
649 | /* do nothing if entering the crash kernel */ | 659 | /* do nothing if entering the crash kernel */ |
650 | return NOTIFY_OK; | 660 | return NOTIFY_OK; |
661 | |||
651 | /* | 662 | /* |
652 | * Use a lock so only one cpu prints at a time | 663 | * Each blade has an MMR that indicates when an NMI has been sent |
653 | * to prevent intermixed output. | 664 | * to cpus on the blade. If an NMI is detected, atomically |
665 | * clear the MMR and update a per-blade NMI count used to | ||
666 | * cause each cpu on the blade to notice a new NMI. | ||
667 | */ | ||
668 | bid = uv_numa_blade_id(); | ||
669 | real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); | ||
670 | |||
671 | if (unlikely(real_uv_nmi)) { | ||
672 | spin_lock(&uv_blade_info[bid].nmi_lock); | ||
673 | real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); | ||
674 | if (real_uv_nmi) { | ||
675 | uv_blade_info[bid].nmi_count++; | ||
676 | uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK); | ||
677 | } | ||
678 | spin_unlock(&uv_blade_info[bid].nmi_lock); | ||
679 | } | ||
680 | |||
681 | if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) | ||
682 | return NOTIFY_DONE; | ||
683 | |||
684 | __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; | ||
685 | |||
686 | /* | ||
687 | * Use a lock so only one cpu prints at a time. | ||
688 | * This prevents intermixed output. | ||
654 | */ | 689 | */ |
655 | spin_lock(&uv_nmi_lock); | 690 | spin_lock(&uv_nmi_lock); |
656 | pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); | 691 | pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); |
657 | dump_stack(); | 692 | dump_stack(); |
658 | spin_unlock(&uv_nmi_lock); | 693 | spin_unlock(&uv_nmi_lock); |
659 | 694 | ||
@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | |||
661 | } | 696 | } |
662 | 697 | ||
663 | static struct notifier_block uv_dump_stack_nmi_nb = { | 698 | static struct notifier_block uv_dump_stack_nmi_nb = { |
664 | .notifier_call = uv_handle_nmi | 699 | .notifier_call = uv_handle_nmi, |
700 | .priority = NMI_LOCAL_LOW_PRIOR - 1, | ||
665 | }; | 701 | }; |
666 | 702 | ||
667 | void uv_register_nmi_notifier(void) | 703 | void uv_register_nmi_notifier(void) |
@@ -720,8 +756,9 @@ void __init uv_system_init(void) | |||
720 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); | 756 | printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); |
721 | 757 | ||
722 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); | 758 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); |
723 | uv_blade_info = kmalloc(bytes, GFP_KERNEL); | 759 | uv_blade_info = kzalloc(bytes, GFP_KERNEL); |
724 | BUG_ON(!uv_blade_info); | 760 | BUG_ON(!uv_blade_info); |
761 | |||
725 | for (blade = 0; blade < uv_num_possible_blades(); blade++) | 762 | for (blade = 0; blade < uv_num_possible_blades(); blade++) |
726 | uv_blade_info[blade].memory_nid = -1; | 763 | uv_blade_info[blade].memory_nid = -1; |
727 | 764 | ||
@@ -747,6 +784,7 @@ void __init uv_system_init(void) | |||
747 | uv_blade_info[blade].pnode = pnode; | 784 | uv_blade_info[blade].pnode = pnode; |
748 | uv_blade_info[blade].nr_possible_cpus = 0; | 785 | uv_blade_info[blade].nr_possible_cpus = 0; |
749 | uv_blade_info[blade].nr_online_cpus = 0; | 786 | uv_blade_info[blade].nr_online_cpus = 0; |
787 | spin_lock_init(&uv_blade_info[blade].nmi_lock); | ||
750 | max_pnode = max(pnode, max_pnode); | 788 | max_pnode = max(pnode, max_pnode); |
751 | blade++; | 789 | blade++; |
752 | } | 790 | } |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3532d3bf810..6f9d1f6063e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
613 | #endif | 613 | #endif |
614 | 614 | ||
615 | /* As a rule processors have APIC timer running in deep C states */ | 615 | /* As a rule processors have APIC timer running in deep C states */ |
616 | if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) | 616 | if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400)) |
617 | set_cpu_cap(c, X86_FEATURE_ARAT); | 617 | set_cpu_cap(c, X86_FEATURE_ARAT); |
618 | 618 | ||
619 | /* | 619 | /* |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 167f97b5596..bb0adad3514 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -509,6 +509,7 @@ recurse: | |||
509 | out_free: | 509 | out_free: |
510 | if (b) { | 510 | if (b) { |
511 | kobject_put(&b->kobj); | 511 | kobject_put(&b->kobj); |
512 | list_del(&b->miscj); | ||
512 | kfree(b); | 513 | kfree(b); |
513 | } | 514 | } |
514 | return err; | 515 | return err; |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 6f8c5e9da97..0f034460260 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -446,18 +446,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
446 | */ | 446 | */ |
447 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 447 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
448 | 448 | ||
449 | h = lvtthmr_init; | ||
449 | /* | 450 | /* |
450 | * The initial value of thermal LVT entries on all APs always reads | 451 | * The initial value of thermal LVT entries on all APs always reads |
451 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI | 452 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI |
452 | * sequence to them and LVT registers are reset to 0s except for | 453 | * sequence to them and LVT registers are reset to 0s except for |
453 | * the mask bits which are set to 1s when APs receive INIT IPI. | 454 | * the mask bits which are set to 1s when APs receive INIT IPI. |
454 | * Always restore the value that BIOS has programmed on AP based on | 455 | * If BIOS takes over the thermal interrupt and sets its interrupt |
455 | * BSP's info we saved since BIOS is always setting the same value | 456 | * delivery mode to SMI (not fixed), it restores the value that the |
456 | * for all threads/cores | 457 | * BIOS has programmed on AP based on BSP's info we saved since BIOS |
458 | * is always setting the same value for all threads/cores. | ||
457 | */ | 459 | */ |
458 | apic_write(APIC_LVTTHMR, lvtthmr_init); | 460 | if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) |
461 | apic_write(APIC_LVTTHMR, lvtthmr_init); | ||
459 | 462 | ||
460 | h = lvtthmr_init; | ||
461 | 463 | ||
462 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | 464 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { |
463 | printk(KERN_DEBUG | 465 | printk(KERN_DEBUG |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 632e5dc9c9c..e638689279d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -613,8 +613,8 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
613 | /* | 613 | /* |
614 | * Branch tracing: | 614 | * Branch tracing: |
615 | */ | 615 | */ |
616 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 616 | if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && |
617 | (hwc->sample_period == 1)) { | 617 | !attr->freq && hwc->sample_period == 1) { |
618 | /* BTS is not supported by this architecture. */ | 618 | /* BTS is not supported by this architecture. */ |
619 | if (!x86_pmu.bts_active) | 619 | if (!x86_pmu.bts_active) |
620 | return -EOPNOTSUPP; | 620 | return -EOPNOTSUPP; |
@@ -1288,6 +1288,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1288 | 1288 | ||
1289 | cpuc = &__get_cpu_var(cpu_hw_events); | 1289 | cpuc = &__get_cpu_var(cpu_hw_events); |
1290 | 1290 | ||
1291 | /* | ||
1292 | * Some chipsets need to unmask the LVTPC in a particular spot | ||
1293 | * inside the nmi handler. As a result, the unmasking was pushed | ||
1294 | * into all the nmi handlers. | ||
1295 | * | ||
1296 | * This generic handler doesn't seem to have any issues where the | ||
1297 | * unmasking occurs so it was left at the top. | ||
1298 | */ | ||
1299 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1300 | |||
1291 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1301 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1292 | if (!test_bit(idx, cpuc->active_mask)) { | 1302 | if (!test_bit(idx, cpuc->active_mask)) { |
1293 | /* | 1303 | /* |
@@ -1374,8 +1384,6 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
1374 | return NOTIFY_DONE; | 1384 | return NOTIFY_DONE; |
1375 | } | 1385 | } |
1376 | 1386 | ||
1377 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1378 | |||
1379 | handled = x86_pmu.handle_irq(args->regs); | 1387 | handled = x86_pmu.handle_irq(args->regs); |
1380 | if (!handled) | 1388 | if (!handled) |
1381 | return NOTIFY_DONE; | 1389 | return NOTIFY_DONE; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 43fa20b1381..447a28de6f0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -25,7 +25,7 @@ struct intel_percore { | |||
25 | /* | 25 | /* |
26 | * Intel PerfMon, used on Core and later. | 26 | * Intel PerfMon, used on Core and later. |
27 | */ | 27 | */ |
28 | static const u64 intel_perfmon_event_map[] = | 28 | static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = |
29 | { | 29 | { |
30 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | 30 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, |
31 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | 31 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
@@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids | |||
184 | }, | 184 | }, |
185 | }, | 185 | }, |
186 | [ C(LL ) ] = { | 186 | [ C(LL ) ] = { |
187 | /* | ||
188 | * TBD: Need Off-core Response Performance Monitoring support | ||
189 | */ | ||
190 | [ C(OP_READ) ] = { | 187 | [ C(OP_READ) ] = { |
191 | /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ | 188 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
192 | [ C(RESULT_ACCESS) ] = 0x01b7, | 189 | [ C(RESULT_ACCESS) ] = 0x01b7, |
193 | /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ | 190 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ |
194 | [ C(RESULT_MISS) ] = 0x01bb, | 191 | [ C(RESULT_MISS) ] = 0x01b7, |
195 | }, | 192 | }, |
196 | [ C(OP_WRITE) ] = { | 193 | [ C(OP_WRITE) ] = { |
197 | /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ | 194 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
198 | [ C(RESULT_ACCESS) ] = 0x01b7, | 195 | [ C(RESULT_ACCESS) ] = 0x01b7, |
199 | /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ | 196 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ |
200 | [ C(RESULT_MISS) ] = 0x01bb, | 197 | [ C(RESULT_MISS) ] = 0x01b7, |
201 | }, | 198 | }, |
202 | [ C(OP_PREFETCH) ] = { | 199 | [ C(OP_PREFETCH) ] = { |
203 | /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ | 200 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
204 | [ C(RESULT_ACCESS) ] = 0x01b7, | 201 | [ C(RESULT_ACCESS) ] = 0x01b7, |
205 | /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ | 202 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ |
206 | [ C(RESULT_MISS) ] = 0x01bb, | 203 | [ C(RESULT_MISS) ] = 0x01b7, |
207 | }, | 204 | }, |
208 | }, | 205 | }, |
209 | [ C(DTLB) ] = { | 206 | [ C(DTLB) ] = { |
@@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
285 | }, | 282 | }, |
286 | [ C(LL ) ] = { | 283 | [ C(LL ) ] = { |
287 | [ C(OP_READ) ] = { | 284 | [ C(OP_READ) ] = { |
288 | /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ | 285 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
289 | [ C(RESULT_ACCESS) ] = 0x01b7, | 286 | [ C(RESULT_ACCESS) ] = 0x01b7, |
290 | /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ | 287 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ |
291 | [ C(RESULT_MISS) ] = 0x01bb, | 288 | [ C(RESULT_MISS) ] = 0x01b7, |
292 | }, | 289 | }, |
293 | /* | 290 | /* |
294 | * Use RFO, not WRITEBACK, because a write miss would typically occur | 291 | * Use RFO, not WRITEBACK, because a write miss would typically occur |
295 | * on RFO. | 292 | * on RFO. |
296 | */ | 293 | */ |
297 | [ C(OP_WRITE) ] = { | 294 | [ C(OP_WRITE) ] = { |
298 | /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ | 295 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
299 | [ C(RESULT_ACCESS) ] = 0x01bb, | 296 | [ C(RESULT_ACCESS) ] = 0x01b7, |
300 | /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ | 297 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ |
301 | [ C(RESULT_MISS) ] = 0x01b7, | 298 | [ C(RESULT_MISS) ] = 0x01b7, |
302 | }, | 299 | }, |
303 | [ C(OP_PREFETCH) ] = { | 300 | [ C(OP_PREFETCH) ] = { |
304 | /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ | 301 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
305 | [ C(RESULT_ACCESS) ] = 0x01b7, | 302 | [ C(RESULT_ACCESS) ] = 0x01b7, |
306 | /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ | 303 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ |
307 | [ C(RESULT_MISS) ] = 0x01bb, | 304 | [ C(RESULT_MISS) ] = 0x01b7, |
308 | }, | 305 | }, |
309 | }, | 306 | }, |
310 | [ C(DTLB) ] = { | 307 | [ C(DTLB) ] = { |
@@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
352 | }; | 349 | }; |
353 | 350 | ||
354 | /* | 351 | /* |
355 | * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 | 352 | * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; |
353 | * See IA32 SDM Vol 3B 30.6.1.3 | ||
356 | */ | 354 | */ |
357 | 355 | ||
358 | #define DMND_DATA_RD (1 << 0) | 356 | #define NHM_DMND_DATA_RD (1 << 0) |
359 | #define DMND_RFO (1 << 1) | 357 | #define NHM_DMND_RFO (1 << 1) |
360 | #define DMND_WB (1 << 3) | 358 | #define NHM_DMND_IFETCH (1 << 2) |
361 | #define PF_DATA_RD (1 << 4) | 359 | #define NHM_DMND_WB (1 << 3) |
362 | #define PF_DATA_RFO (1 << 5) | 360 | #define NHM_PF_DATA_RD (1 << 4) |
363 | #define RESP_UNCORE_HIT (1 << 8) | 361 | #define NHM_PF_DATA_RFO (1 << 5) |
364 | #define RESP_MISS (0xf600) /* non uncore hit */ | 362 | #define NHM_PF_IFETCH (1 << 6) |
363 | #define NHM_OFFCORE_OTHER (1 << 7) | ||
364 | #define NHM_UNCORE_HIT (1 << 8) | ||
365 | #define NHM_OTHER_CORE_HIT_SNP (1 << 9) | ||
366 | #define NHM_OTHER_CORE_HITM (1 << 10) | ||
367 | /* reserved */ | ||
368 | #define NHM_REMOTE_CACHE_FWD (1 << 12) | ||
369 | #define NHM_REMOTE_DRAM (1 << 13) | ||
370 | #define NHM_LOCAL_DRAM (1 << 14) | ||
371 | #define NHM_NON_DRAM (1 << 15) | ||
372 | |||
373 | #define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM) | ||
374 | |||
375 | #define NHM_DMND_READ (NHM_DMND_DATA_RD) | ||
376 | #define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) | ||
377 | #define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) | ||
378 | |||
379 | #define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) | ||
380 | #define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD) | ||
381 | #define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) | ||
365 | 382 | ||
366 | static __initconst const u64 nehalem_hw_cache_extra_regs | 383 | static __initconst const u64 nehalem_hw_cache_extra_regs |
367 | [PERF_COUNT_HW_CACHE_MAX] | 384 | [PERF_COUNT_HW_CACHE_MAX] |
@@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs | |||
370 | { | 387 | { |
371 | [ C(LL ) ] = { | 388 | [ C(LL ) ] = { |
372 | [ C(OP_READ) ] = { | 389 | [ C(OP_READ) ] = { |
373 | [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, | 390 | [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, |
374 | [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, | 391 | [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, |
375 | }, | 392 | }, |
376 | [ C(OP_WRITE) ] = { | 393 | [ C(OP_WRITE) ] = { |
377 | [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, | 394 | [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, |
378 | [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, | 395 | [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, |
379 | }, | 396 | }, |
380 | [ C(OP_PREFETCH) ] = { | 397 | [ C(OP_PREFETCH) ] = { |
381 | [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, | 398 | [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, |
382 | [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, | 399 | [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, |
383 | }, | 400 | }, |
384 | } | 401 | } |
385 | }; | 402 | }; |
@@ -933,6 +950,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
933 | 950 | ||
934 | cpuc = &__get_cpu_var(cpu_hw_events); | 951 | cpuc = &__get_cpu_var(cpu_hw_events); |
935 | 952 | ||
953 | /* | ||
954 | * Some chipsets need to unmask the LVTPC in a particular spot | ||
955 | * inside the nmi handler. As a result, the unmasking was pushed | ||
956 | * into all the nmi handlers. | ||
957 | * | ||
958 | * This handler doesn't seem to have any issues with the unmasking | ||
959 | * so it was left at the top. | ||
960 | */ | ||
961 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
962 | |||
936 | intel_pmu_disable_all(); | 963 | intel_pmu_disable_all(); |
937 | handled = intel_pmu_drain_bts_buffer(); | 964 | handled = intel_pmu_drain_bts_buffer(); |
938 | status = intel_pmu_get_status(); | 965 | status = intel_pmu_get_status(); |
@@ -998,6 +1025,9 @@ intel_bts_constraints(struct perf_event *event) | |||
998 | struct hw_perf_event *hwc = &event->hw; | 1025 | struct hw_perf_event *hwc = &event->hw; |
999 | unsigned int hw_event, bts_event; | 1026 | unsigned int hw_event, bts_event; |
1000 | 1027 | ||
1028 | if (event->attr.freq) | ||
1029 | return NULL; | ||
1030 | |||
1001 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; | 1031 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
1002 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); | 1032 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
1003 | 1033 | ||
@@ -1305,7 +1335,7 @@ static void intel_clovertown_quirks(void) | |||
1305 | * AJ106 could possibly be worked around by not allowing LBR | 1335 | * AJ106 could possibly be worked around by not allowing LBR |
1306 | * usage from PEBS, including the fixup. | 1336 | * usage from PEBS, including the fixup. |
1307 | * AJ68 could possibly be worked around by always programming | 1337 | * AJ68 could possibly be worked around by always programming |
1308 | * a pebs_event_reset[0] value and coping with the lost events. | 1338 | * a pebs_event_reset[0] value and coping with the lost events. |
1309 | * | 1339 | * |
1310 | * But taken together it might just make sense to not enable PEBS on | 1340 | * But taken together it might just make sense to not enable PEBS on |
1311 | * these chips. | 1341 | * these chips. |
@@ -1409,6 +1439,18 @@ static __init int intel_pmu_init(void) | |||
1409 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; | 1439 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; |
1410 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1440 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1411 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 1441 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
1442 | |||
1443 | if (ebx & 0x40) { | ||
1444 | /* | ||
1445 | * Erratum AAJ80 detected, we work it around by using | ||
1446 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
1447 | * branch-misses, but it's still much better than the | ||
1448 | * architectural event which is often completely bogus: | ||
1449 | */ | ||
1450 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
1451 | |||
1452 | pr_cont("erratum AAJ80 worked around, "); | ||
1453 | } | ||
1412 | pr_cont("Nehalem events, "); | 1454 | pr_cont("Nehalem events, "); |
1413 | break; | 1455 | break; |
1414 | 1456 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index d1f77e2934a..e93fcd55fae 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -950,11 +950,20 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
950 | x86_pmu_stop(event, 0); | 950 | x86_pmu_stop(event, 0); |
951 | } | 951 | } |
952 | 952 | ||
953 | if (handled) { | 953 | if (handled) |
954 | /* p4 quirk: unmask it again */ | ||
955 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
956 | inc_irq_stat(apic_perf_irqs); | 954 | inc_irq_stat(apic_perf_irqs); |
957 | } | 955 | |
956 | /* | ||
957 | * When dealing with the unmasking of the LVTPC on P4 perf hw, it has | ||
958 | * been observed that the OVF bit flag has to be cleared first _before_ | ||
959 | * the LVTPC can be unmasked. | ||
960 | * | ||
961 | * The reason is the NMI line will continue to be asserted while the OVF | ||
962 | * bit is set. This causes a second NMI to generate if the LVTPC is | ||
963 | * unmasked before the OVF bit is cleared, leading to unknown NMI | ||
964 | * messages. | ||
965 | */ | ||
966 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
958 | 967 | ||
959 | return handled; | 968 | return handled; |
960 | } | 969 | } |
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 706a9fb46a5..e90f08458e6 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c | |||
@@ -391,7 +391,7 @@ static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize, | |||
391 | 391 | ||
392 | set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); | 392 | set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); |
393 | 393 | ||
394 | return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr); | 394 | return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr); |
395 | } | 395 | } |
396 | 396 | ||
397 | static void __init ioapic_add_ofnode(struct device_node *np) | 397 | static void __init ioapic_add_ofnode(struct device_node *np) |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index c969fd9d156..f1a6244d7d9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, | |||
1183 | struct pt_regs *regs) | 1183 | struct pt_regs *regs) |
1184 | { | 1184 | { |
1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | 1185 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); |
1186 | unsigned long flags; | ||
1186 | 1187 | ||
1187 | /* This is possible if op is under delayed unoptimizing */ | 1188 | /* This is possible if op is under delayed unoptimizing */ |
1188 | if (kprobe_disabled(&op->kp)) | 1189 | if (kprobe_disabled(&op->kp)) |
1189 | return; | 1190 | return; |
1190 | 1191 | ||
1191 | preempt_disable(); | 1192 | local_irq_save(flags); |
1192 | if (kprobe_running()) { | 1193 | if (kprobe_running()) { |
1193 | kprobes_inc_nmissed_count(&op->kp); | 1194 | kprobes_inc_nmissed_count(&op->kp); |
1194 | } else { | 1195 | } else { |
@@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, | |||
1207 | opt_pre_handler(&op->kp, regs); | 1208 | opt_pre_handler(&op->kp, regs); |
1208 | __this_cpu_write(current_kprobe, NULL); | 1209 | __this_cpu_write(current_kprobe, NULL); |
1209 | } | 1210 | } |
1210 | preempt_enable_no_resched(); | 1211 | local_irq_restore(flags); |
1211 | } | 1212 | } |
1212 | 1213 | ||
1213 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | 1214 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 45892dc4b72..f65e5b521db 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -608,6 +608,9 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) | |||
608 | unsigned len, type; | 608 | unsigned len, type; |
609 | struct perf_event *bp; | 609 | struct perf_event *bp; |
610 | 610 | ||
611 | if (ptrace_get_breakpoints(tsk) < 0) | ||
612 | return -ESRCH; | ||
613 | |||
611 | data &= ~DR_CONTROL_RESERVED; | 614 | data &= ~DR_CONTROL_RESERVED; |
612 | old_dr7 = ptrace_get_dr7(thread->ptrace_bps); | 615 | old_dr7 = ptrace_get_dr7(thread->ptrace_bps); |
613 | restore: | 616 | restore: |
@@ -655,6 +658,9 @@ restore: | |||
655 | } | 658 | } |
656 | goto restore; | 659 | goto restore; |
657 | } | 660 | } |
661 | |||
662 | ptrace_put_breakpoints(tsk); | ||
663 | |||
658 | return ((orig_ret < 0) ? orig_ret : rc); | 664 | return ((orig_ret < 0) ? orig_ret : rc); |
659 | } | 665 | } |
660 | 666 | ||
@@ -668,10 +674,17 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) | |||
668 | 674 | ||
669 | if (n < HBP_NUM) { | 675 | if (n < HBP_NUM) { |
670 | struct perf_event *bp; | 676 | struct perf_event *bp; |
677 | |||
678 | if (ptrace_get_breakpoints(tsk) < 0) | ||
679 | return -ESRCH; | ||
680 | |||
671 | bp = thread->ptrace_bps[n]; | 681 | bp = thread->ptrace_bps[n]; |
672 | if (!bp) | 682 | if (!bp) |
673 | return 0; | 683 | val = 0; |
674 | val = bp->hw.info.address; | 684 | else |
685 | val = bp->hw.info.address; | ||
686 | |||
687 | ptrace_put_breakpoints(tsk); | ||
675 | } else if (n == 6) { | 688 | } else if (n == 6) { |
676 | val = thread->debugreg6; | 689 | val = thread->debugreg6; |
677 | } else if (n == 7) { | 690 | } else if (n == 7) { |
@@ -686,6 +699,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | |||
686 | struct perf_event *bp; | 699 | struct perf_event *bp; |
687 | struct thread_struct *t = &tsk->thread; | 700 | struct thread_struct *t = &tsk->thread; |
688 | struct perf_event_attr attr; | 701 | struct perf_event_attr attr; |
702 | int err = 0; | ||
703 | |||
704 | if (ptrace_get_breakpoints(tsk) < 0) | ||
705 | return -ESRCH; | ||
689 | 706 | ||
690 | if (!t->ptrace_bps[nr]) { | 707 | if (!t->ptrace_bps[nr]) { |
691 | ptrace_breakpoint_init(&attr); | 708 | ptrace_breakpoint_init(&attr); |
@@ -709,24 +726,23 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | |||
709 | * writing for the user. And anyway this is the previous | 726 | * writing for the user. And anyway this is the previous |
710 | * behaviour. | 727 | * behaviour. |
711 | */ | 728 | */ |
712 | if (IS_ERR(bp)) | 729 | if (IS_ERR(bp)) { |
713 | return PTR_ERR(bp); | 730 | err = PTR_ERR(bp); |
731 | goto put; | ||
732 | } | ||
714 | 733 | ||
715 | t->ptrace_bps[nr] = bp; | 734 | t->ptrace_bps[nr] = bp; |
716 | } else { | 735 | } else { |
717 | int err; | ||
718 | |||
719 | bp = t->ptrace_bps[nr]; | 736 | bp = t->ptrace_bps[nr]; |
720 | 737 | ||
721 | attr = bp->attr; | 738 | attr = bp->attr; |
722 | attr.bp_addr = addr; | 739 | attr.bp_addr = addr; |
723 | err = modify_user_hw_breakpoint(bp, &attr); | 740 | err = modify_user_hw_breakpoint(bp, &attr); |
724 | if (err) | ||
725 | return err; | ||
726 | } | 741 | } |
727 | 742 | ||
728 | 743 | put: | |
729 | return 0; | 744 | ptrace_put_breakpoints(tsk); |
745 | return err; | ||
730 | } | 746 | } |
731 | 747 | ||
732 | /* | 748 | /* |
diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S index 29092b38d81..1d5c46df0d7 100644 --- a/arch/x86/kernel/reboot_32.S +++ b/arch/x86/kernel/reboot_32.S | |||
@@ -21,26 +21,26 @@ r_base = . | |||
21 | /* Get our own relocated address */ | 21 | /* Get our own relocated address */ |
22 | call 1f | 22 | call 1f |
23 | 1: popl %ebx | 23 | 1: popl %ebx |
24 | subl $1b, %ebx | 24 | subl $(1b - r_base), %ebx |
25 | 25 | ||
26 | /* Compute the equivalent real-mode segment */ | 26 | /* Compute the equivalent real-mode segment */ |
27 | movl %ebx, %ecx | 27 | movl %ebx, %ecx |
28 | shrl $4, %ecx | 28 | shrl $4, %ecx |
29 | 29 | ||
30 | /* Patch post-real-mode segment jump */ | 30 | /* Patch post-real-mode segment jump */ |
31 | movw dispatch_table(%ebx,%eax,2),%ax | 31 | movw (dispatch_table - r_base)(%ebx,%eax,2),%ax |
32 | movw %ax, 101f(%ebx) | 32 | movw %ax, (101f - r_base)(%ebx) |
33 | movw %cx, 102f(%ebx) | 33 | movw %cx, (102f - r_base)(%ebx) |
34 | 34 | ||
35 | /* Set up the IDT for real mode. */ | 35 | /* Set up the IDT for real mode. */ |
36 | lidtl machine_real_restart_idt(%ebx) | 36 | lidtl (machine_real_restart_idt - r_base)(%ebx) |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * Set up a GDT from which we can load segment descriptors for real | 39 | * Set up a GDT from which we can load segment descriptors for real |
40 | * mode. The GDT is not used in real mode; it is just needed here to | 40 | * mode. The GDT is not used in real mode; it is just needed here to |
41 | * prepare the descriptors. | 41 | * prepare the descriptors. |
42 | */ | 42 | */ |
43 | lgdtl machine_real_restart_gdt(%ebx) | 43 | lgdtl (machine_real_restart_gdt - r_base)(%ebx) |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Load the data segment registers with 16-bit compatible values | 46 | * Load the data segment registers with 16-bit compatible values |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index c11514e9128..75ef4b18e9b 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = { | |||
61 | .banner = default_banner, | 61 | .banner = default_banner, |
62 | }, | 62 | }, |
63 | 63 | ||
64 | .mapping = { | ||
65 | .pagetable_reserve = native_pagetable_reserve, | ||
66 | }, | ||
67 | |||
64 | .paging = { | 68 | .paging = { |
65 | .pagetable_setup_start = native_pagetable_setup_start, | 69 | .pagetable_setup_start = native_pagetable_setup_start, |
66 | .pagetable_setup_done = native_pagetable_setup_done, | 70 | .pagetable_setup_done = native_pagetable_setup_done, |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 286d289b039..37b8b0fe832 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
81 | end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); | 81 | end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); |
82 | } | 82 | } |
83 | 83 | ||
84 | void __init native_pagetable_reserve(u64 start, u64 end) | ||
85 | { | ||
86 | memblock_x86_reserve_range(start, end, "PGTABLE"); | ||
87 | } | ||
88 | |||
84 | struct map_range { | 89 | struct map_range { |
85 | unsigned long start; | 90 | unsigned long start; |
86 | unsigned long end; | 91 | unsigned long end; |
@@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
272 | 277 | ||
273 | __flush_tlb_all(); | 278 | __flush_tlb_all(); |
274 | 279 | ||
280 | /* | ||
281 | * Reserve the kernel pagetable pages we used (pgt_buf_start - | ||
282 | * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) | ||
283 | * so that they can be reused for other purposes. | ||
284 | * | ||
285 | * On native it just means calling memblock_x86_reserve_range, on Xen it | ||
286 | * also means marking RW the pagetable pages that we allocated before | ||
287 | * but that haven't been used. | ||
288 | * | ||
289 | * In fact on xen we mark RO the whole range pgt_buf_start - | ||
290 | * pgt_buf_top, because we have to make sure that when | ||
291 | * init_memory_mapping reaches the pagetable pages area, it maps | ||
292 | * RO all the pagetable pages, including the ones that are beyond | ||
293 | * pgt_buf_end at that time. | ||
294 | */ | ||
275 | if (!after_bootmem && pgt_buf_end > pgt_buf_start) | 295 | if (!after_bootmem && pgt_buf_end > pgt_buf_start) |
276 | memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT, | 296 | x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start), |
277 | pgt_buf_end << PAGE_SHIFT, "PGTABLE"); | 297 | PFN_PHYS(pgt_buf_end)); |
278 | 298 | ||
279 | if (!after_bootmem) | 299 | if (!after_bootmem) |
280 | early_memtest(start, end); | 300 | early_memtest(start, end); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index e8c00cc7203..85b52fc0308 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -306,7 +306,7 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) | |||
306 | bi->end = min(bi->end, high); | 306 | bi->end = min(bi->end, high); |
307 | 307 | ||
308 | /* and there's no empty block */ | 308 | /* and there's no empty block */ |
309 | if (bi->start == bi->end) { | 309 | if (bi->start >= bi->end) { |
310 | numa_remove_memblk_from(i--, mi); | 310 | numa_remove_memblk_from(i--, mi); |
311 | continue; | 311 | continue; |
312 | } | 312 | } |
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts index 2d6d226f2b1..e70be38ce03 100644 --- a/arch/x86/platform/ce4100/falconfalls.dts +++ b/arch/x86/platform/ce4100/falconfalls.dts | |||
@@ -347,7 +347,7 @@ | |||
347 | "pciclass0c03"; | 347 | "pciclass0c03"; |
348 | 348 | ||
349 | reg = <0x16800 0x0 0x0 0x0 0x0>; | 349 | reg = <0x16800 0x0 0x0 0x0 0x0>; |
350 | interrupts = <22 3>; | 350 | interrupts = <22 1>; |
351 | }; | 351 | }; |
352 | 352 | ||
353 | usb@d,1 { | 353 | usb@d,1 { |
@@ -357,7 +357,7 @@ | |||
357 | "pciclass0c03"; | 357 | "pciclass0c03"; |
358 | 358 | ||
359 | reg = <0x16900 0x0 0x0 0x0 0x0>; | 359 | reg = <0x16900 0x0 0x0 0x0 0x0>; |
360 | interrupts = <22 3>; | 360 | interrupts = <22 1>; |
361 | }; | 361 | }; |
362 | 362 | ||
363 | sata@e,0 { | 363 | sata@e,0 { |
@@ -367,7 +367,7 @@ | |||
367 | "pciclass0106"; | 367 | "pciclass0106"; |
368 | 368 | ||
369 | reg = <0x17000 0x0 0x0 0x0 0x0>; | 369 | reg = <0x17000 0x0 0x0 0x0 0x0>; |
370 | interrupts = <23 3>; | 370 | interrupts = <23 1>; |
371 | }; | 371 | }; |
372 | 372 | ||
373 | flash@f,0 { | 373 | flash@f,0 { |
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 7cb6424317f..c58e0ea39ef 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
699 | struct mm_struct *mm, | 699 | struct mm_struct *mm, |
700 | unsigned long va, unsigned int cpu) | 700 | unsigned long va, unsigned int cpu) |
701 | { | 701 | { |
702 | int tcpu; | ||
703 | int uvhub; | ||
704 | int locals = 0; | 702 | int locals = 0; |
705 | int remotes = 0; | 703 | int remotes = 0; |
706 | int hubs = 0; | 704 | int hubs = 0; |
705 | int tcpu; | ||
706 | int tpnode; | ||
707 | struct bau_desc *bau_desc; | 707 | struct bau_desc *bau_desc; |
708 | struct cpumask *flush_mask; | 708 | struct cpumask *flush_mask; |
709 | struct ptc_stats *stat; | 709 | struct ptc_stats *stat; |
710 | struct bau_control *bcp; | 710 | struct bau_control *bcp; |
711 | struct bau_control *tbcp; | 711 | struct bau_control *tbcp; |
712 | struct hub_and_pnode *hpp; | ||
712 | 713 | ||
713 | /* kernel was booted 'nobau' */ | 714 | /* kernel was booted 'nobau' */ |
714 | if (nobau) | 715 | if (nobau) |
@@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
750 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; | 751 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; |
751 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 752 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
752 | 753 | ||
753 | /* cpu statistics */ | ||
754 | for_each_cpu(tcpu, flush_mask) { | 754 | for_each_cpu(tcpu, flush_mask) { |
755 | uvhub = uv_cpu_to_blade_id(tcpu); | 755 | /* |
756 | bau_uvhub_set(uvhub, &bau_desc->distribution); | 756 | * The distribution vector is a bit map of pnodes, relative |
757 | if (uvhub == bcp->uvhub) | 757 | * to the partition base pnode (and the partition base nasid |
758 | * in the header). | ||
759 | * Translate cpu to pnode and hub using an array stored | ||
760 | * in local memory. | ||
761 | */ | ||
762 | hpp = &bcp->socket_master->target_hub_and_pnode[tcpu]; | ||
763 | tpnode = hpp->pnode - bcp->partition_base_pnode; | ||
764 | bau_uvhub_set(tpnode, &bau_desc->distribution); | ||
765 | if (hpp->uvhub == bcp->uvhub) | ||
758 | locals++; | 766 | locals++; |
759 | else | 767 | else |
760 | remotes++; | 768 | remotes++; |
@@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs) | |||
855 | * an interrupt, but causes an error message to be returned to | 863 | * an interrupt, but causes an error message to be returned to |
856 | * the sender. | 864 | * the sender. |
857 | */ | 865 | */ |
858 | static void uv_enable_timeouts(void) | 866 | static void __init uv_enable_timeouts(void) |
859 | { | 867 | { |
860 | int uvhub; | 868 | int uvhub; |
861 | int nuvhubs; | 869 | int nuvhubs; |
@@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void) | |||
1326 | } | 1334 | } |
1327 | 1335 | ||
1328 | /* | 1336 | /* |
1329 | * initialize the sending side's sending buffers | 1337 | * Initialize the sending side's sending buffers. |
1330 | */ | 1338 | */ |
1331 | static void | 1339 | static void |
1332 | uv_activation_descriptor_init(int node, int pnode) | 1340 | uv_activation_descriptor_init(int node, int pnode, int base_pnode) |
1333 | { | 1341 | { |
1334 | int i; | 1342 | int i; |
1335 | int cpu; | 1343 | int cpu; |
@@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode) | |||
1352 | n = pa >> uv_nshift; | 1360 | n = pa >> uv_nshift; |
1353 | m = pa & uv_mmask; | 1361 | m = pa & uv_mmask; |
1354 | 1362 | ||
1363 | /* the 14-bit pnode */ | ||
1355 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, | 1364 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, |
1356 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); | 1365 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); |
1357 | |||
1358 | /* | 1366 | /* |
1359 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each | 1367 | * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each |
1360 | * cpu even though we only use the first one; one descriptor can | 1368 | * cpu even though we only use the first one; one descriptor can |
1361 | * describe a broadcast to 256 uv hubs. | 1369 | * describe a broadcast to 256 uv hubs. |
1362 | */ | 1370 | */ |
@@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode) | |||
1365 | memset(bd2, 0, sizeof(struct bau_desc)); | 1373 | memset(bd2, 0, sizeof(struct bau_desc)); |
1366 | bd2->header.sw_ack_flag = 1; | 1374 | bd2->header.sw_ack_flag = 1; |
1367 | /* | 1375 | /* |
1368 | * base_dest_nodeid is the nasid of the first uvhub | 1376 | * The base_dest_nasid set in the message header is the nasid |
1369 | * in the partition. The bit map will indicate uvhub numbers, | 1377 | * of the first uvhub in the partition. The bit map will |
1370 | * which are 0-N in a partition. Pnodes are unique system-wide. | 1378 | * indicate destination pnode numbers relative to that base. |
1379 | * They may not be consecutive if nasid striding is being used. | ||
1371 | */ | 1380 | */ |
1372 | bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode); | 1381 | bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); |
1373 | bd2->header.dest_subnodeid = 0x10; /* the LB */ | 1382 | bd2->header.dest_subnodeid = UV_LB_SUBNODEID; |
1374 | bd2->header.command = UV_NET_ENDPOINT_INTD; | 1383 | bd2->header.command = UV_NET_ENDPOINT_INTD; |
1375 | bd2->header.int_both = 1; | 1384 | bd2->header.int_both = 1; |
1376 | /* | 1385 | /* |
@@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode) | |||
1442 | /* | 1451 | /* |
1443 | * Initialization of each UV hub's structures | 1452 | * Initialization of each UV hub's structures |
1444 | */ | 1453 | */ |
1445 | static void __init uv_init_uvhub(int uvhub, int vector) | 1454 | static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) |
1446 | { | 1455 | { |
1447 | int node; | 1456 | int node; |
1448 | int pnode; | 1457 | int pnode; |
@@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector) | |||
1450 | 1459 | ||
1451 | node = uvhub_to_first_node(uvhub); | 1460 | node = uvhub_to_first_node(uvhub); |
1452 | pnode = uv_blade_to_pnode(uvhub); | 1461 | pnode = uv_blade_to_pnode(uvhub); |
1453 | uv_activation_descriptor_init(node, pnode); | 1462 | uv_activation_descriptor_init(node, pnode, base_pnode); |
1454 | uv_payload_queue_init(node, pnode); | 1463 | uv_payload_queue_init(node, pnode); |
1455 | /* | 1464 | /* |
1456 | * the below initialization can't be in firmware because the | 1465 | * The below initialization can't be in firmware because the |
1457 | * messaging IRQ will be determined by the OS | 1466 | * messaging IRQ will be determined by the OS. |
1458 | */ | 1467 | */ |
1459 | apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; | 1468 | apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; |
1460 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | 1469 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, |
@@ -1491,10 +1500,11 @@ calculate_destination_timeout(void) | |||
1491 | /* | 1500 | /* |
1492 | * initialize the bau_control structure for each cpu | 1501 | * initialize the bau_control structure for each cpu |
1493 | */ | 1502 | */ |
1494 | static int __init uv_init_per_cpu(int nuvhubs) | 1503 | static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) |
1495 | { | 1504 | { |
1496 | int i; | 1505 | int i; |
1497 | int cpu; | 1506 | int cpu; |
1507 | int tcpu; | ||
1498 | int pnode; | 1508 | int pnode; |
1499 | int uvhub; | 1509 | int uvhub; |
1500 | int have_hmaster; | 1510 | int have_hmaster; |
@@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs) | |||
1528 | bcp = &per_cpu(bau_control, cpu); | 1538 | bcp = &per_cpu(bau_control, cpu); |
1529 | memset(bcp, 0, sizeof(struct bau_control)); | 1539 | memset(bcp, 0, sizeof(struct bau_control)); |
1530 | pnode = uv_cpu_hub_info(cpu)->pnode; | 1540 | pnode = uv_cpu_hub_info(cpu)->pnode; |
1541 | if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) { | ||
1542 | printk(KERN_EMERG | ||
1543 | "cpu %d pnode %d-%d beyond %d; BAU disabled\n", | ||
1544 | cpu, pnode, base_part_pnode, | ||
1545 | UV_DISTRIBUTION_SIZE); | ||
1546 | return 1; | ||
1547 | } | ||
1548 | bcp->osnode = cpu_to_node(cpu); | ||
1549 | bcp->partition_base_pnode = uv_partition_base_pnode; | ||
1531 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; | 1550 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; |
1532 | *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); | 1551 | *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); |
1533 | bdp = &uvhub_descs[uvhub]; | 1552 | bdp = &uvhub_descs[uvhub]; |
@@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs) | |||
1536 | bdp->pnode = pnode; | 1555 | bdp->pnode = pnode; |
1537 | /* kludge: 'assuming' one node per socket, and assuming that | 1556 | /* kludge: 'assuming' one node per socket, and assuming that |
1538 | disabling a socket just leaves a gap in node numbers */ | 1557 | disabling a socket just leaves a gap in node numbers */ |
1539 | socket = (cpu_to_node(cpu) & 1); | 1558 | socket = bcp->osnode & 1; |
1540 | bdp->socket_mask |= (1 << socket); | 1559 | bdp->socket_mask |= (1 << socket); |
1541 | sdp = &bdp->socket[socket]; | 1560 | sdp = &bdp->socket[socket]; |
1542 | sdp->cpu_number[sdp->num_cpus] = cpu; | 1561 | sdp->cpu_number[sdp->num_cpus] = cpu; |
@@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs) | |||
1585 | nextsocket: | 1604 | nextsocket: |
1586 | socket++; | 1605 | socket++; |
1587 | socket_mask = (socket_mask >> 1); | 1606 | socket_mask = (socket_mask >> 1); |
1607 | /* each socket gets a local array of pnodes/hubs */ | ||
1608 | bcp = smaster; | ||
1609 | bcp->target_hub_and_pnode = kmalloc_node( | ||
1610 | sizeof(struct hub_and_pnode) * | ||
1611 | num_possible_cpus(), GFP_KERNEL, bcp->osnode); | ||
1612 | memset(bcp->target_hub_and_pnode, 0, | ||
1613 | sizeof(struct hub_and_pnode) * | ||
1614 | num_possible_cpus()); | ||
1615 | for_each_present_cpu(tcpu) { | ||
1616 | bcp->target_hub_and_pnode[tcpu].pnode = | ||
1617 | uv_cpu_hub_info(tcpu)->pnode; | ||
1618 | bcp->target_hub_and_pnode[tcpu].uvhub = | ||
1619 | uv_cpu_hub_info(tcpu)->numa_blade_id; | ||
1620 | } | ||
1588 | } | 1621 | } |
1589 | } | 1622 | } |
1590 | kfree(uvhub_descs); | 1623 | kfree(uvhub_descs); |
@@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void) | |||
1637 | spin_lock_init(&disable_lock); | 1670 | spin_lock_init(&disable_lock); |
1638 | congested_cycles = microsec_2_cycles(congested_response_us); | 1671 | congested_cycles = microsec_2_cycles(congested_response_us); |
1639 | 1672 | ||
1640 | if (uv_init_per_cpu(nuvhubs)) { | ||
1641 | nobau = 1; | ||
1642 | return 0; | ||
1643 | } | ||
1644 | |||
1645 | uv_partition_base_pnode = 0x7fffffff; | 1673 | uv_partition_base_pnode = 0x7fffffff; |
1646 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) | 1674 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { |
1647 | if (uv_blade_nr_possible_cpus(uvhub) && | 1675 | if (uv_blade_nr_possible_cpus(uvhub) && |
1648 | (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) | 1676 | (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) |
1649 | uv_partition_base_pnode = uv_blade_to_pnode(uvhub); | 1677 | uv_partition_base_pnode = uv_blade_to_pnode(uvhub); |
1678 | } | ||
1679 | |||
1680 | if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) { | ||
1681 | nobau = 1; | ||
1682 | return 0; | ||
1683 | } | ||
1650 | 1684 | ||
1651 | vector = UV_BAU_MESSAGE; | 1685 | vector = UV_BAU_MESSAGE; |
1652 | for_each_possible_blade(uvhub) | 1686 | for_each_possible_blade(uvhub) |
1653 | if (uv_blade_nr_possible_cpus(uvhub)) | 1687 | if (uv_blade_nr_possible_cpus(uvhub)) |
1654 | uv_init_uvhub(uvhub, vector); | 1688 | uv_init_uvhub(uvhub, vector, uv_partition_base_pnode); |
1655 | 1689 | ||
1656 | uv_enable_timeouts(); | 1690 | uv_enable_timeouts(); |
1657 | alloc_intr_gate(vector, uv_bau_message_intr1); | 1691 | alloc_intr_gate(vector, uv_bau_message_intr1); |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index aef7af92b28..0684f3c74d5 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base) | |||
1275 | { | 1275 | { |
1276 | } | 1276 | } |
1277 | 1277 | ||
1278 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | ||
1279 | { | ||
1280 | /* reserve the range used */ | ||
1281 | native_pagetable_reserve(start, end); | ||
1282 | |||
1283 | /* set as RW the rest */ | ||
1284 | printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, | ||
1285 | PFN_PHYS(pgt_buf_top)); | ||
1286 | while (end < PFN_PHYS(pgt_buf_top)) { | ||
1287 | make_lowmem_page_readwrite(__va(end)); | ||
1288 | end += PAGE_SIZE; | ||
1289 | } | ||
1290 | } | ||
1291 | |||
1278 | static void xen_post_allocator_init(void); | 1292 | static void xen_post_allocator_init(void); |
1279 | 1293 | ||
1280 | static __init void xen_pagetable_setup_done(pgd_t *base) | 1294 | static __init void xen_pagetable_setup_done(pgd_t *base) |
@@ -1495,7 +1509,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1495 | * it is RO. | 1509 | * it is RO. |
1496 | */ | 1510 | */ |
1497 | if (((!is_early_ioremap_ptep(ptep) && | 1511 | if (((!is_early_ioremap_ptep(ptep) && |
1498 | pfn >= pgt_buf_start && pfn < pgt_buf_end)) || | 1512 | pfn >= pgt_buf_start && pfn < pgt_buf_top)) || |
1499 | (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) | 1513 | (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) |
1500 | pte = pte_wrprotect(pte); | 1514 | pte = pte_wrprotect(pte); |
1501 | 1515 | ||
@@ -2105,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
2105 | 2119 | ||
2106 | void __init xen_init_mmu_ops(void) | 2120 | void __init xen_init_mmu_ops(void) |
2107 | { | 2121 | { |
2122 | x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; | ||
2108 | x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; | 2123 | x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; |
2109 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; | 2124 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; |
2110 | pv_mmu_ops = xen_mmu_ops; | 2125 | pv_mmu_ops = xen_mmu_ops; |