aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/memory.c2
-rw-r--r--arch/x86/include/asm/apicdef.h1
-rw-r--r--arch/x86/include/asm/io_apic.h2
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h17
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h2
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h16
-rw-r--r--arch/x86/include/asm/x86_init.h12
-rw-r--r--arch/x86/kernel/apic/io_apic.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c48
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c12
-rw-r--r--arch/x86/kernel/cpu/perf_event.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c116
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c17
-rw-r--r--arch/x86/kernel/devicetree.c2
-rw-r--r--arch/x86/kernel/kprobes.c5
-rw-r--r--arch/x86/kernel/ptrace.c36
-rw-r--r--arch/x86/kernel/reboot_32.S12
-rw-r--r--arch/x86/kernel/x86_init.c4
-rw-r--r--arch/x86/mm/init.c24
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/platform/ce4100/falconfalls.dts6
-rw-r--r--arch/x86/platform/uv/tlb_uv.c92
-rw-r--r--arch/x86/xen/mmu.c17
26 files changed, 352 insertions, 123 deletions
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index cae3feb1035..db75d07c364 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -91,7 +91,7 @@ static int detect_memory_e801(void)
91 if (oreg.ax > 15*1024) { 91 if (oreg.ax > 15*1024) {
92 return -1; /* Bogus! */ 92 return -1; /* Bogus! */
93 } else if (oreg.ax == 15*1024) { 93 } else if (oreg.ax == 15*1024) {
94 boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax; 94 boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax;
95 } else { 95 } else {
96 /* 96 /*
97 * This ignores memory above 16MB if we have a memory 97 * This ignores memory above 16MB if we have a memory
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index d87988bacf3..34595d5e103 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -78,6 +78,7 @@
78#define APIC_DEST_LOGICAL 0x00800 78#define APIC_DEST_LOGICAL 0x00800
79#define APIC_DEST_PHYSICAL 0x00000 79#define APIC_DEST_PHYSICAL 0x00000
80#define APIC_DM_FIXED 0x00000 80#define APIC_DM_FIXED 0x00000
81#define APIC_DM_FIXED_MASK 0x00700
81#define APIC_DM_LOWEST 0x00100 82#define APIC_DM_LOWEST 0x00100
82#define APIC_DM_SMI 0x00200 83#define APIC_DM_SMI 0x00200
83#define APIC_DM_REMRD 0x00300 84#define APIC_DM_REMRD 0x00300
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index c4bd267dfc5..a97a240f67f 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -150,7 +150,7 @@ void setup_IO_APIC_irq_extra(u32 gsi);
150extern void ioapic_and_gsi_init(void); 150extern void ioapic_and_gsi_init(void);
151extern void ioapic_insert_resources(void); 151extern void ioapic_insert_resources(void);
152 152
153int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr); 153int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
154 154
155extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); 155extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
156extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); 156extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7db7723d1f3..d56187c6b83 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
299/* Install a pte for a particular vaddr in kernel space. */ 299/* Install a pte for a particular vaddr in kernel space. */
300void set_pte_vaddr(unsigned long vaddr, pte_t pte); 300void set_pte_vaddr(unsigned long vaddr, pte_t pte);
301 301
302extern void native_pagetable_reserve(u64 start, u64 end);
302#ifdef CONFIG_X86_32 303#ifdef CONFIG_X86_32
303extern void native_pagetable_setup_start(pgd_t *base); 304extern void native_pagetable_setup_start(pgd_t *base);
304extern void native_pagetable_setup_done(pgd_t *base); 305extern void native_pagetable_setup_done(pgd_t *base);
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 3e094af443c..130f1eeee5f 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -94,6 +94,8 @@
94/* after this # consecutive successes, bump up the throttle if it was lowered */ 94/* after this # consecutive successes, bump up the throttle if it was lowered */
95#define COMPLETE_THRESHOLD 5 95#define COMPLETE_THRESHOLD 5
96 96
97#define UV_LB_SUBNODEID 0x10
98
97/* 99/*
98 * number of entries in the destination side payload queue 100 * number of entries in the destination side payload queue
99 */ 101 */
@@ -124,7 +126,7 @@
124 * The distribution specification (32 bytes) is interpreted as a 256-bit 126 * The distribution specification (32 bytes) is interpreted as a 256-bit
125 * distribution vector. Adjacent bits correspond to consecutive even numbered 127 * distribution vector. Adjacent bits correspond to consecutive even numbered
126 * nodeIDs. The result of adding the index of a given bit to the 15-bit 128 * nodeIDs. The result of adding the index of a given bit to the 15-bit
127 * 'base_dest_nodeid' field of the header corresponds to the 129 * 'base_dest_nasid' field of the header corresponds to the
128 * destination nodeID associated with that specified bit. 130 * destination nodeID associated with that specified bit.
129 */ 131 */
130struct bau_target_uvhubmask { 132struct bau_target_uvhubmask {
@@ -176,7 +178,7 @@ struct bau_msg_payload {
176struct bau_msg_header { 178struct bau_msg_header {
177 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 179 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
178 /* bits 5:0 */ 180 /* bits 5:0 */
179 unsigned int base_dest_nodeid:15; /* nasid of the */ 181 unsigned int base_dest_nasid:15; /* nasid of the */
180 /* bits 20:6 */ /* first bit in uvhub map */ 182 /* bits 20:6 */ /* first bit in uvhub map */
181 unsigned int command:8; /* message type */ 183 unsigned int command:8; /* message type */
182 /* bits 28:21 */ 184 /* bits 28:21 */
@@ -378,6 +380,10 @@ struct ptc_stats {
378 unsigned long d_rcanceled; /* number of messages canceled by resets */ 380 unsigned long d_rcanceled; /* number of messages canceled by resets */
379}; 381};
380 382
383struct hub_and_pnode {
384 short uvhub;
385 short pnode;
386};
381/* 387/*
382 * one per-cpu; to locate the software tables 388 * one per-cpu; to locate the software tables
383 */ 389 */
@@ -399,10 +405,12 @@ struct bau_control {
399 int baudisabled; 405 int baudisabled;
400 int set_bau_off; 406 int set_bau_off;
401 short cpu; 407 short cpu;
408 short osnode;
402 short uvhub_cpu; 409 short uvhub_cpu;
403 short uvhub; 410 short uvhub;
404 short cpus_in_socket; 411 short cpus_in_socket;
405 short cpus_in_uvhub; 412 short cpus_in_uvhub;
413 short partition_base_pnode;
406 unsigned short message_number; 414 unsigned short message_number;
407 unsigned short uvhub_quiesce; 415 unsigned short uvhub_quiesce;
408 short socket_acknowledge_count[DEST_Q_SIZE]; 416 short socket_acknowledge_count[DEST_Q_SIZE];
@@ -422,15 +430,16 @@ struct bau_control {
422 int congested_period; 430 int congested_period;
423 cycles_t period_time; 431 cycles_t period_time;
424 long period_requests; 432 long period_requests;
433 struct hub_and_pnode *target_hub_and_pnode;
425}; 434};
426 435
427static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) 436static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
428{ 437{
429 return constant_test_bit(uvhub, &dstp->bits[0]); 438 return constant_test_bit(uvhub, &dstp->bits[0]);
430} 439}
431static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) 440static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp)
432{ 441{
433 __set_bit(uvhub, &dstp->bits[0]); 442 __set_bit(pnode, &dstp->bits[0]);
434} 443}
435static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, 444static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
436 int nbits) 445 int nbits)
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index a501741c233..4298002d0c8 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -398,6 +398,8 @@ struct uv_blade_info {
398 unsigned short nr_online_cpus; 398 unsigned short nr_online_cpus;
399 unsigned short pnode; 399 unsigned short pnode;
400 short memory_nid; 400 short memory_nid;
401 spinlock_t nmi_lock;
402 unsigned long nmi_count;
401}; 403};
402extern struct uv_blade_info *uv_blade_info; 404extern struct uv_blade_info *uv_blade_info;
403extern short *uv_node_to_blade; 405extern short *uv_node_to_blade;
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 20cafeac745..f5bb64a823d 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV MMR definitions 6 * SGI UV MMR definitions
7 * 7 *
8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#ifndef _ASM_X86_UV_UV_MMRS_H 11#ifndef _ASM_X86_UV_UV_MMRS_H
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
1099 } s; 1099 } s;
1100}; 1100};
1101 1101
1102/* ========================================================================= */
1103/* UVH_SCRATCH5 */
1104/* ========================================================================= */
1105#define UVH_SCRATCH5 0x2d0200UL
1106#define UVH_SCRATCH5_32 0x00778
1107
1108#define UVH_SCRATCH5_SCRATCH5_SHFT 0
1109#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
1110union uvh_scratch5_u {
1111 unsigned long v;
1112 struct uvh_scratch5_s {
1113 unsigned long scratch5 : 64; /* RW, W1CS */
1114 } s;
1115};
1102 1116
1103#endif /* __ASM_UV_MMRS_X86_H__ */ 1117#endif /* __ASM_UV_MMRS_X86_H__ */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 643ebf2e2ad..d3d859035af 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -68,6 +68,17 @@ struct x86_init_oem {
68}; 68};
69 69
70/** 70/**
71 * struct x86_init_mapping - platform specific initial kernel pagetable setup
72 * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
73 *
74 * For more details on the purpose of this hook, look in
75 * init_memory_mapping and the commit that added it.
76 */
77struct x86_init_mapping {
78 void (*pagetable_reserve)(u64 start, u64 end);
79};
80
81/**
71 * struct x86_init_paging - platform specific paging functions 82 * struct x86_init_paging - platform specific paging functions
72 * @pagetable_setup_start: platform specific pre paging_init() call 83 * @pagetable_setup_start: platform specific pre paging_init() call
73 * @pagetable_setup_done: platform specific post paging_init() call 84 * @pagetable_setup_done: platform specific post paging_init() call
@@ -123,6 +134,7 @@ struct x86_init_ops {
123 struct x86_init_mpparse mpparse; 134 struct x86_init_mpparse mpparse;
124 struct x86_init_irqs irqs; 135 struct x86_init_irqs irqs;
125 struct x86_init_oem oem; 136 struct x86_init_oem oem;
137 struct x86_init_mapping mapping;
126 struct x86_init_paging paging; 138 struct x86_init_paging paging;
127 struct x86_init_timers timers; 139 struct x86_init_timers timers;
128 struct x86_init_iommu iommu; 140 struct x86_init_iommu iommu;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 68df09bba92..45fd33d1fd3 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -128,8 +128,8 @@ static int __init parse_noapic(char *str)
128} 128}
129early_param("noapic", parse_noapic); 129early_param("noapic", parse_noapic);
130 130
131static int io_apic_setup_irq_pin_once(unsigned int irq, int node, 131static int io_apic_setup_irq_pin(unsigned int irq, int node,
132 struct io_apic_irq_attr *attr); 132 struct io_apic_irq_attr *attr);
133 133
134/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ 134/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
135void mp_save_irq(struct mpc_intsrc *m) 135void mp_save_irq(struct mpc_intsrc *m)
@@ -3570,7 +3570,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3570} 3570}
3571#endif /* CONFIG_HT_IRQ */ 3571#endif /* CONFIG_HT_IRQ */
3572 3572
3573int 3573static int
3574io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) 3574io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
3575{ 3575{
3576 struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); 3576 struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
@@ -3585,8 +3585,8 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
3585 return ret; 3585 return ret;
3586} 3586}
3587 3587
3588static int io_apic_setup_irq_pin_once(unsigned int irq, int node, 3588int io_apic_setup_irq_pin_once(unsigned int irq, int node,
3589 struct io_apic_irq_attr *attr) 3589 struct io_apic_irq_attr *attr)
3590{ 3590{
3591 unsigned int id = attr->ioapic, pin = attr->ioapic_pin; 3591 unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
3592 int ret; 3592 int ret;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 33b10a0fc09..7acd2d2ac96 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -37,6 +37,13 @@
37#include <asm/smp.h> 37#include <asm/smp.h>
38#include <asm/x86_init.h> 38#include <asm/x86_init.h>
39#include <asm/emergency-restart.h> 39#include <asm/emergency-restart.h>
40#include <asm/nmi.h>
41
42/* BMC sets a bit this MMR non-zero before sending an NMI */
43#define UVH_NMI_MMR UVH_SCRATCH5
44#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8)
45#define UV_NMI_PENDING_MASK (1UL << 63)
46DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
40 47
41DEFINE_PER_CPU(int, x2apic_extra_bits); 48DEFINE_PER_CPU(int, x2apic_extra_bits);
42 49
@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void)
642 */ 649 */
643int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) 650int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
644{ 651{
652 unsigned long real_uv_nmi;
653 int bid;
654
645 if (reason != DIE_NMIUNKNOWN) 655 if (reason != DIE_NMIUNKNOWN)
646 return NOTIFY_OK; 656 return NOTIFY_OK;
647 657
648 if (in_crash_kexec) 658 if (in_crash_kexec)
649 /* do nothing if entering the crash kernel */ 659 /* do nothing if entering the crash kernel */
650 return NOTIFY_OK; 660 return NOTIFY_OK;
661
651 /* 662 /*
652 * Use a lock so only one cpu prints at a time 663 * Each blade has an MMR that indicates when an NMI has been sent
653 * to prevent intermixed output. 664 * to cpus on the blade. If an NMI is detected, atomically
665 * clear the MMR and update a per-blade NMI count used to
666 * cause each cpu on the blade to notice a new NMI.
667 */
668 bid = uv_numa_blade_id();
669 real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
670
671 if (unlikely(real_uv_nmi)) {
672 spin_lock(&uv_blade_info[bid].nmi_lock);
673 real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
674 if (real_uv_nmi) {
675 uv_blade_info[bid].nmi_count++;
676 uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
677 }
678 spin_unlock(&uv_blade_info[bid].nmi_lock);
679 }
680
681 if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
682 return NOTIFY_DONE;
683
684 __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
685
686 /*
687 * Use a lock so only one cpu prints at a time.
688 * This prevents intermixed output.
654 */ 689 */
655 spin_lock(&uv_nmi_lock); 690 spin_lock(&uv_nmi_lock);
656 pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); 691 pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
657 dump_stack(); 692 dump_stack();
658 spin_unlock(&uv_nmi_lock); 693 spin_unlock(&uv_nmi_lock);
659 694
@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
661} 696}
662 697
663static struct notifier_block uv_dump_stack_nmi_nb = { 698static struct notifier_block uv_dump_stack_nmi_nb = {
664 .notifier_call = uv_handle_nmi 699 .notifier_call = uv_handle_nmi,
700 .priority = NMI_LOCAL_LOW_PRIOR - 1,
665}; 701};
666 702
667void uv_register_nmi_notifier(void) 703void uv_register_nmi_notifier(void)
@@ -720,8 +756,9 @@ void __init uv_system_init(void)
720 printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); 756 printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
721 757
722 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); 758 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
723 uv_blade_info = kmalloc(bytes, GFP_KERNEL); 759 uv_blade_info = kzalloc(bytes, GFP_KERNEL);
724 BUG_ON(!uv_blade_info); 760 BUG_ON(!uv_blade_info);
761
725 for (blade = 0; blade < uv_num_possible_blades(); blade++) 762 for (blade = 0; blade < uv_num_possible_blades(); blade++)
726 uv_blade_info[blade].memory_nid = -1; 763 uv_blade_info[blade].memory_nid = -1;
727 764
@@ -747,6 +784,7 @@ void __init uv_system_init(void)
747 uv_blade_info[blade].pnode = pnode; 784 uv_blade_info[blade].pnode = pnode;
748 uv_blade_info[blade].nr_possible_cpus = 0; 785 uv_blade_info[blade].nr_possible_cpus = 0;
749 uv_blade_info[blade].nr_online_cpus = 0; 786 uv_blade_info[blade].nr_online_cpus = 0;
787 spin_lock_init(&uv_blade_info[blade].nmi_lock);
750 max_pnode = max(pnode, max_pnode); 788 max_pnode = max(pnode, max_pnode);
751 blade++; 789 blade++;
752 } 790 }
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 3532d3bf810..6f9d1f6063e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
613#endif 613#endif
614 614
615 /* As a rule processors have APIC timer running in deep C states */ 615 /* As a rule processors have APIC timer running in deep C states */
616 if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) 616 if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400))
617 set_cpu_cap(c, X86_FEATURE_ARAT); 617 set_cpu_cap(c, X86_FEATURE_ARAT);
618 618
619 /* 619 /*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 167f97b5596..bb0adad3514 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -509,6 +509,7 @@ recurse:
509out_free: 509out_free:
510 if (b) { 510 if (b) {
511 kobject_put(&b->kobj); 511 kobject_put(&b->kobj);
512 list_del(&b->miscj);
512 kfree(b); 513 kfree(b);
513 } 514 }
514 return err; 515 return err;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6f8c5e9da97..0f034460260 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -446,18 +446,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
446 */ 446 */
447 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 447 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
448 448
449 h = lvtthmr_init;
449 /* 450 /*
450 * The initial value of thermal LVT entries on all APs always reads 451 * The initial value of thermal LVT entries on all APs always reads
451 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI 452 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
452 * sequence to them and LVT registers are reset to 0s except for 453 * sequence to them and LVT registers are reset to 0s except for
453 * the mask bits which are set to 1s when APs receive INIT IPI. 454 * the mask bits which are set to 1s when APs receive INIT IPI.
454 * Always restore the value that BIOS has programmed on AP based on 455 * If BIOS takes over the thermal interrupt and sets its interrupt
455 * BSP's info we saved since BIOS is always setting the same value 456 * delivery mode to SMI (not fixed), it restores the value that the
456 * for all threads/cores 457 * BIOS has programmed on AP based on BSP's info we saved since BIOS
458 * is always setting the same value for all threads/cores.
457 */ 459 */
458 apic_write(APIC_LVTTHMR, lvtthmr_init); 460 if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
461 apic_write(APIC_LVTTHMR, lvtthmr_init);
459 462
460 h = lvtthmr_init;
461 463
462 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { 464 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
463 printk(KERN_DEBUG 465 printk(KERN_DEBUG
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 632e5dc9c9c..e638689279d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -613,8 +613,8 @@ static int x86_setup_perfctr(struct perf_event *event)
613 /* 613 /*
614 * Branch tracing: 614 * Branch tracing:
615 */ 615 */
616 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 616 if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
617 (hwc->sample_period == 1)) { 617 !attr->freq && hwc->sample_period == 1) {
618 /* BTS is not supported by this architecture. */ 618 /* BTS is not supported by this architecture. */
619 if (!x86_pmu.bts_active) 619 if (!x86_pmu.bts_active)
620 return -EOPNOTSUPP; 620 return -EOPNOTSUPP;
@@ -1288,6 +1288,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1288 1288
1289 cpuc = &__get_cpu_var(cpu_hw_events); 1289 cpuc = &__get_cpu_var(cpu_hw_events);
1290 1290
1291 /*
1292 * Some chipsets need to unmask the LVTPC in a particular spot
1293 * inside the nmi handler. As a result, the unmasking was pushed
1294 * into all the nmi handlers.
1295 *
1296 * This generic handler doesn't seem to have any issues where the
1297 * unmasking occurs so it was left at the top.
1298 */
1299 apic_write(APIC_LVTPC, APIC_DM_NMI);
1300
1291 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1301 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1292 if (!test_bit(idx, cpuc->active_mask)) { 1302 if (!test_bit(idx, cpuc->active_mask)) {
1293 /* 1303 /*
@@ -1374,8 +1384,6 @@ perf_event_nmi_handler(struct notifier_block *self,
1374 return NOTIFY_DONE; 1384 return NOTIFY_DONE;
1375 } 1385 }
1376 1386
1377 apic_write(APIC_LVTPC, APIC_DM_NMI);
1378
1379 handled = x86_pmu.handle_irq(args->regs); 1387 handled = x86_pmu.handle_irq(args->regs);
1380 if (!handled) 1388 if (!handled)
1381 return NOTIFY_DONE; 1389 return NOTIFY_DONE;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 43fa20b1381..447a28de6f0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -25,7 +25,7 @@ struct intel_percore {
25/* 25/*
26 * Intel PerfMon, used on Core and later. 26 * Intel PerfMon, used on Core and later.
27 */ 27 */
28static const u64 intel_perfmon_event_map[] = 28static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
29{ 29{
30 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, 30 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
31 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 31 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids
184 }, 184 },
185 }, 185 },
186 [ C(LL ) ] = { 186 [ C(LL ) ] = {
187 /*
188 * TBD: Need Off-core Response Performance Monitoring support
189 */
190 [ C(OP_READ) ] = { 187 [ C(OP_READ) ] = {
191 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ 188 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
192 [ C(RESULT_ACCESS) ] = 0x01b7, 189 [ C(RESULT_ACCESS) ] = 0x01b7,
193 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ 190 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
194 [ C(RESULT_MISS) ] = 0x01bb, 191 [ C(RESULT_MISS) ] = 0x01b7,
195 }, 192 },
196 [ C(OP_WRITE) ] = { 193 [ C(OP_WRITE) ] = {
197 /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ 194 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
198 [ C(RESULT_ACCESS) ] = 0x01b7, 195 [ C(RESULT_ACCESS) ] = 0x01b7,
199 /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ 196 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
200 [ C(RESULT_MISS) ] = 0x01bb, 197 [ C(RESULT_MISS) ] = 0x01b7,
201 }, 198 },
202 [ C(OP_PREFETCH) ] = { 199 [ C(OP_PREFETCH) ] = {
203 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ 200 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
204 [ C(RESULT_ACCESS) ] = 0x01b7, 201 [ C(RESULT_ACCESS) ] = 0x01b7,
205 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ 202 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
206 [ C(RESULT_MISS) ] = 0x01bb, 203 [ C(RESULT_MISS) ] = 0x01b7,
207 }, 204 },
208 }, 205 },
209 [ C(DTLB) ] = { 206 [ C(DTLB) ] = {
@@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
285 }, 282 },
286 [ C(LL ) ] = { 283 [ C(LL ) ] = {
287 [ C(OP_READ) ] = { 284 [ C(OP_READ) ] = {
288 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ 285 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
289 [ C(RESULT_ACCESS) ] = 0x01b7, 286 [ C(RESULT_ACCESS) ] = 0x01b7,
290 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ 287 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
291 [ C(RESULT_MISS) ] = 0x01bb, 288 [ C(RESULT_MISS) ] = 0x01b7,
292 }, 289 },
293 /* 290 /*
294 * Use RFO, not WRITEBACK, because a write miss would typically occur 291 * Use RFO, not WRITEBACK, because a write miss would typically occur
295 * on RFO. 292 * on RFO.
296 */ 293 */
297 [ C(OP_WRITE) ] = { 294 [ C(OP_WRITE) ] = {
298 /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ 295 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
299 [ C(RESULT_ACCESS) ] = 0x01bb, 296 [ C(RESULT_ACCESS) ] = 0x01b7,
300 /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ 297 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
301 [ C(RESULT_MISS) ] = 0x01b7, 298 [ C(RESULT_MISS) ] = 0x01b7,
302 }, 299 },
303 [ C(OP_PREFETCH) ] = { 300 [ C(OP_PREFETCH) ] = {
304 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ 301 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
305 [ C(RESULT_ACCESS) ] = 0x01b7, 302 [ C(RESULT_ACCESS) ] = 0x01b7,
306 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ 303 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
307 [ C(RESULT_MISS) ] = 0x01bb, 304 [ C(RESULT_MISS) ] = 0x01b7,
308 }, 305 },
309 }, 306 },
310 [ C(DTLB) ] = { 307 [ C(DTLB) ] = {
@@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids
352}; 349};
353 350
354/* 351/*
355 * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 352 * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
353 * See IA32 SDM Vol 3B 30.6.1.3
356 */ 354 */
357 355
358#define DMND_DATA_RD (1 << 0) 356#define NHM_DMND_DATA_RD (1 << 0)
359#define DMND_RFO (1 << 1) 357#define NHM_DMND_RFO (1 << 1)
360#define DMND_WB (1 << 3) 358#define NHM_DMND_IFETCH (1 << 2)
361#define PF_DATA_RD (1 << 4) 359#define NHM_DMND_WB (1 << 3)
362#define PF_DATA_RFO (1 << 5) 360#define NHM_PF_DATA_RD (1 << 4)
363#define RESP_UNCORE_HIT (1 << 8) 361#define NHM_PF_DATA_RFO (1 << 5)
364#define RESP_MISS (0xf600) /* non uncore hit */ 362#define NHM_PF_IFETCH (1 << 6)
363#define NHM_OFFCORE_OTHER (1 << 7)
364#define NHM_UNCORE_HIT (1 << 8)
365#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
366#define NHM_OTHER_CORE_HITM (1 << 10)
367 /* reserved */
368#define NHM_REMOTE_CACHE_FWD (1 << 12)
369#define NHM_REMOTE_DRAM (1 << 13)
370#define NHM_LOCAL_DRAM (1 << 14)
371#define NHM_NON_DRAM (1 << 15)
372
373#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
374
375#define NHM_DMND_READ (NHM_DMND_DATA_RD)
376#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
377#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
378
379#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
380#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
381#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
365 382
366static __initconst const u64 nehalem_hw_cache_extra_regs 383static __initconst const u64 nehalem_hw_cache_extra_regs
367 [PERF_COUNT_HW_CACHE_MAX] 384 [PERF_COUNT_HW_CACHE_MAX]
@@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
370{ 387{
371 [ C(LL ) ] = { 388 [ C(LL ) ] = {
372 [ C(OP_READ) ] = { 389 [ C(OP_READ) ] = {
373 [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, 390 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
374 [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, 391 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS,
375 }, 392 },
376 [ C(OP_WRITE) ] = { 393 [ C(OP_WRITE) ] = {
377 [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, 394 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
378 [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, 395 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS,
379 }, 396 },
380 [ C(OP_PREFETCH) ] = { 397 [ C(OP_PREFETCH) ] = {
381 [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, 398 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
382 [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, 399 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
383 }, 400 },
384 } 401 }
385}; 402};
@@ -933,6 +950,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
933 950
934 cpuc = &__get_cpu_var(cpu_hw_events); 951 cpuc = &__get_cpu_var(cpu_hw_events);
935 952
953 /*
954 * Some chipsets need to unmask the LVTPC in a particular spot
955 * inside the nmi handler. As a result, the unmasking was pushed
956 * into all the nmi handlers.
957 *
958 * This handler doesn't seem to have any issues with the unmasking
959 * so it was left at the top.
960 */
961 apic_write(APIC_LVTPC, APIC_DM_NMI);
962
936 intel_pmu_disable_all(); 963 intel_pmu_disable_all();
937 handled = intel_pmu_drain_bts_buffer(); 964 handled = intel_pmu_drain_bts_buffer();
938 status = intel_pmu_get_status(); 965 status = intel_pmu_get_status();
@@ -998,6 +1025,9 @@ intel_bts_constraints(struct perf_event *event)
998 struct hw_perf_event *hwc = &event->hw; 1025 struct hw_perf_event *hwc = &event->hw;
999 unsigned int hw_event, bts_event; 1026 unsigned int hw_event, bts_event;
1000 1027
1028 if (event->attr.freq)
1029 return NULL;
1030
1001 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; 1031 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
1002 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); 1032 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
1003 1033
@@ -1305,7 +1335,7 @@ static void intel_clovertown_quirks(void)
1305 * AJ106 could possibly be worked around by not allowing LBR 1335 * AJ106 could possibly be worked around by not allowing LBR
1306 * usage from PEBS, including the fixup. 1336 * usage from PEBS, including the fixup.
1307 * AJ68 could possibly be worked around by always programming 1337 * AJ68 could possibly be worked around by always programming
1308 * a pebs_event_reset[0] value and coping with the lost events. 1338 * a pebs_event_reset[0] value and coping with the lost events.
1309 * 1339 *
1310 * But taken together it might just make sense to not enable PEBS on 1340 * But taken together it might just make sense to not enable PEBS on
1311 * these chips. 1341 * these chips.
@@ -1409,6 +1439,18 @@ static __init int intel_pmu_init(void)
1409 x86_pmu.percore_constraints = intel_nehalem_percore_constraints; 1439 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1410 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1440 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1411 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1441 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1442
1443 if (ebx & 0x40) {
1444 /*
1445 * Erratum AAJ80 detected, we work it around by using
1446 * the BR_MISP_EXEC.ANY event. This will over-count
1447 * branch-misses, but it's still much better than the
1448 * architectural event which is often completely bogus:
1449 */
1450 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1451
1452 pr_cont("erratum AAJ80 worked around, ");
1453 }
1412 pr_cont("Nehalem events, "); 1454 pr_cont("Nehalem events, ");
1413 break; 1455 break;
1414 1456
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index d1f77e2934a..e93fcd55fae 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -950,11 +950,20 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
950 x86_pmu_stop(event, 0); 950 x86_pmu_stop(event, 0);
951 } 951 }
952 952
953 if (handled) { 953 if (handled)
954 /* p4 quirk: unmask it again */
955 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
956 inc_irq_stat(apic_perf_irqs); 954 inc_irq_stat(apic_perf_irqs);
957 } 955
956 /*
957 * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
958 * been observed that the OVF bit flag has to be cleared first _before_
959 * the LVTPC can be unmasked.
960 *
961 * The reason is the NMI line will continue to be asserted while the OVF
962 * bit is set. This causes a second NMI to generate if the LVTPC is
963 * unmasked before the OVF bit is cleared, leading to unknown NMI
964 * messages.
965 */
966 apic_write(APIC_LVTPC, APIC_DM_NMI);
958 967
959 return handled; 968 return handled;
960} 969}
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 706a9fb46a5..e90f08458e6 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -391,7 +391,7 @@ static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize,
391 391
392 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); 392 set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
393 393
394 return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr); 394 return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr);
395} 395}
396 396
397static void __init ioapic_add_ofnode(struct device_node *np) 397static void __init ioapic_add_ofnode(struct device_node *np)
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index c969fd9d156..f1a6244d7d9 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1183 struct pt_regs *regs) 1183 struct pt_regs *regs)
1184{ 1184{
1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1186 unsigned long flags;
1186 1187
1187 /* This is possible if op is under delayed unoptimizing */ 1188 /* This is possible if op is under delayed unoptimizing */
1188 if (kprobe_disabled(&op->kp)) 1189 if (kprobe_disabled(&op->kp))
1189 return; 1190 return;
1190 1191
1191 preempt_disable(); 1192 local_irq_save(flags);
1192 if (kprobe_running()) { 1193 if (kprobe_running()) {
1193 kprobes_inc_nmissed_count(&op->kp); 1194 kprobes_inc_nmissed_count(&op->kp);
1194 } else { 1195 } else {
@@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1207 opt_pre_handler(&op->kp, regs); 1208 opt_pre_handler(&op->kp, regs);
1208 __this_cpu_write(current_kprobe, NULL); 1209 __this_cpu_write(current_kprobe, NULL);
1209 } 1210 }
1210 preempt_enable_no_resched(); 1211 local_irq_restore(flags);
1211} 1212}
1212 1213
1213static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) 1214static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 45892dc4b72..f65e5b521db 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -608,6 +608,9 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
608 unsigned len, type; 608 unsigned len, type;
609 struct perf_event *bp; 609 struct perf_event *bp;
610 610
611 if (ptrace_get_breakpoints(tsk) < 0)
612 return -ESRCH;
613
611 data &= ~DR_CONTROL_RESERVED; 614 data &= ~DR_CONTROL_RESERVED;
612 old_dr7 = ptrace_get_dr7(thread->ptrace_bps); 615 old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
613restore: 616restore:
@@ -655,6 +658,9 @@ restore:
655 } 658 }
656 goto restore; 659 goto restore;
657 } 660 }
661
662 ptrace_put_breakpoints(tsk);
663
658 return ((orig_ret < 0) ? orig_ret : rc); 664 return ((orig_ret < 0) ? orig_ret : rc);
659} 665}
660 666
@@ -668,10 +674,17 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
668 674
669 if (n < HBP_NUM) { 675 if (n < HBP_NUM) {
670 struct perf_event *bp; 676 struct perf_event *bp;
677
678 if (ptrace_get_breakpoints(tsk) < 0)
679 return -ESRCH;
680
671 bp = thread->ptrace_bps[n]; 681 bp = thread->ptrace_bps[n];
672 if (!bp) 682 if (!bp)
673 return 0; 683 val = 0;
674 val = bp->hw.info.address; 684 else
685 val = bp->hw.info.address;
686
687 ptrace_put_breakpoints(tsk);
675 } else if (n == 6) { 688 } else if (n == 6) {
676 val = thread->debugreg6; 689 val = thread->debugreg6;
677 } else if (n == 7) { 690 } else if (n == 7) {
@@ -686,6 +699,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
686 struct perf_event *bp; 699 struct perf_event *bp;
687 struct thread_struct *t = &tsk->thread; 700 struct thread_struct *t = &tsk->thread;
688 struct perf_event_attr attr; 701 struct perf_event_attr attr;
702 int err = 0;
703
704 if (ptrace_get_breakpoints(tsk) < 0)
705 return -ESRCH;
689 706
690 if (!t->ptrace_bps[nr]) { 707 if (!t->ptrace_bps[nr]) {
691 ptrace_breakpoint_init(&attr); 708 ptrace_breakpoint_init(&attr);
@@ -709,24 +726,23 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
709 * writing for the user. And anyway this is the previous 726 * writing for the user. And anyway this is the previous
710 * behaviour. 727 * behaviour.
711 */ 728 */
712 if (IS_ERR(bp)) 729 if (IS_ERR(bp)) {
713 return PTR_ERR(bp); 730 err = PTR_ERR(bp);
731 goto put;
732 }
714 733
715 t->ptrace_bps[nr] = bp; 734 t->ptrace_bps[nr] = bp;
716 } else { 735 } else {
717 int err;
718
719 bp = t->ptrace_bps[nr]; 736 bp = t->ptrace_bps[nr];
720 737
721 attr = bp->attr; 738 attr = bp->attr;
722 attr.bp_addr = addr; 739 attr.bp_addr = addr;
723 err = modify_user_hw_breakpoint(bp, &attr); 740 err = modify_user_hw_breakpoint(bp, &attr);
724 if (err)
725 return err;
726 } 741 }
727 742
728 743put:
729 return 0; 744 ptrace_put_breakpoints(tsk);
745 return err;
730} 746}
731 747
732/* 748/*
diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S
index 29092b38d81..1d5c46df0d7 100644
--- a/arch/x86/kernel/reboot_32.S
+++ b/arch/x86/kernel/reboot_32.S
@@ -21,26 +21,26 @@ r_base = .
21 /* Get our own relocated address */ 21 /* Get our own relocated address */
22 call 1f 22 call 1f
231: popl %ebx 231: popl %ebx
24 subl $1b, %ebx 24 subl $(1b - r_base), %ebx
25 25
26 /* Compute the equivalent real-mode segment */ 26 /* Compute the equivalent real-mode segment */
27 movl %ebx, %ecx 27 movl %ebx, %ecx
28 shrl $4, %ecx 28 shrl $4, %ecx
29 29
30 /* Patch post-real-mode segment jump */ 30 /* Patch post-real-mode segment jump */
31 movw dispatch_table(%ebx,%eax,2),%ax 31 movw (dispatch_table - r_base)(%ebx,%eax,2),%ax
32 movw %ax, 101f(%ebx) 32 movw %ax, (101f - r_base)(%ebx)
33 movw %cx, 102f(%ebx) 33 movw %cx, (102f - r_base)(%ebx)
34 34
35 /* Set up the IDT for real mode. */ 35 /* Set up the IDT for real mode. */
36 lidtl machine_real_restart_idt(%ebx) 36 lidtl (machine_real_restart_idt - r_base)(%ebx)
37 37
38 /* 38 /*
39 * Set up a GDT from which we can load segment descriptors for real 39 * Set up a GDT from which we can load segment descriptors for real
40 * mode. The GDT is not used in real mode; it is just needed here to 40 * mode. The GDT is not used in real mode; it is just needed here to
41 * prepare the descriptors. 41 * prepare the descriptors.
42 */ 42 */
43 lgdtl machine_real_restart_gdt(%ebx) 43 lgdtl (machine_real_restart_gdt - r_base)(%ebx)
44 44
45 /* 45 /*
46 * Load the data segment registers with 16-bit compatible values 46 * Load the data segment registers with 16-bit compatible values
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c11514e9128..75ef4b18e9b 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = {
61 .banner = default_banner, 61 .banner = default_banner,
62 }, 62 },
63 63
64 .mapping = {
65 .pagetable_reserve = native_pagetable_reserve,
66 },
67
64 .paging = { 68 .paging = {
65 .pagetable_setup_start = native_pagetable_setup_start, 69 .pagetable_setup_start = native_pagetable_setup_start,
66 .pagetable_setup_done = native_pagetable_setup_done, 70 .pagetable_setup_done = native_pagetable_setup_done,
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 286d289b039..37b8b0fe832 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
81 end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); 81 end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
82} 82}
83 83
84void __init native_pagetable_reserve(u64 start, u64 end)
85{
86 memblock_x86_reserve_range(start, end, "PGTABLE");
87}
88
84struct map_range { 89struct map_range {
85 unsigned long start; 90 unsigned long start;
86 unsigned long end; 91 unsigned long end;
@@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
272 277
273 __flush_tlb_all(); 278 __flush_tlb_all();
274 279
280 /*
281 * Reserve the kernel pagetable pages we used (pgt_buf_start -
282 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
283 * so that they can be reused for other purposes.
284 *
285 * On native it just means calling memblock_x86_reserve_range, on Xen it
286 * also means marking RW the pagetable pages that we allocated before
287 * but that haven't been used.
288 *
289 * In fact on xen we mark RO the whole range pgt_buf_start -
290 * pgt_buf_top, because we have to make sure that when
291 * init_memory_mapping reaches the pagetable pages area, it maps
292 * RO all the pagetable pages, including the ones that are beyond
293 * pgt_buf_end at that time.
294 */
275 if (!after_bootmem && pgt_buf_end > pgt_buf_start) 295 if (!after_bootmem && pgt_buf_end > pgt_buf_start)
276 memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT, 296 x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
277 pgt_buf_end << PAGE_SHIFT, "PGTABLE"); 297 PFN_PHYS(pgt_buf_end));
278 298
279 if (!after_bootmem) 299 if (!after_bootmem)
280 early_memtest(start, end); 300 early_memtest(start, end);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index e8c00cc7203..85b52fc0308 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -306,7 +306,7 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
306 bi->end = min(bi->end, high); 306 bi->end = min(bi->end, high);
307 307
308 /* and there's no empty block */ 308 /* and there's no empty block */
309 if (bi->start == bi->end) { 309 if (bi->start >= bi->end) {
310 numa_remove_memblk_from(i--, mi); 310 numa_remove_memblk_from(i--, mi);
311 continue; 311 continue;
312 } 312 }
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
index 2d6d226f2b1..e70be38ce03 100644
--- a/arch/x86/platform/ce4100/falconfalls.dts
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -347,7 +347,7 @@
347 "pciclass0c03"; 347 "pciclass0c03";
348 348
349 reg = <0x16800 0x0 0x0 0x0 0x0>; 349 reg = <0x16800 0x0 0x0 0x0 0x0>;
350 interrupts = <22 3>; 350 interrupts = <22 1>;
351 }; 351 };
352 352
353 usb@d,1 { 353 usb@d,1 {
@@ -357,7 +357,7 @@
357 "pciclass0c03"; 357 "pciclass0c03";
358 358
359 reg = <0x16900 0x0 0x0 0x0 0x0>; 359 reg = <0x16900 0x0 0x0 0x0 0x0>;
360 interrupts = <22 3>; 360 interrupts = <22 1>;
361 }; 361 };
362 362
363 sata@e,0 { 363 sata@e,0 {
@@ -367,7 +367,7 @@
367 "pciclass0106"; 367 "pciclass0106";
368 368
369 reg = <0x17000 0x0 0x0 0x0 0x0>; 369 reg = <0x17000 0x0 0x0 0x0 0x0>;
370 interrupts = <23 3>; 370 interrupts = <23 1>;
371 }; 371 };
372 372
373 flash@f,0 { 373 flash@f,0 {
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 7cb6424317f..c58e0ea39ef 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
699 struct mm_struct *mm, 699 struct mm_struct *mm,
700 unsigned long va, unsigned int cpu) 700 unsigned long va, unsigned int cpu)
701{ 701{
702 int tcpu;
703 int uvhub;
704 int locals = 0; 702 int locals = 0;
705 int remotes = 0; 703 int remotes = 0;
706 int hubs = 0; 704 int hubs = 0;
705 int tcpu;
706 int tpnode;
707 struct bau_desc *bau_desc; 707 struct bau_desc *bau_desc;
708 struct cpumask *flush_mask; 708 struct cpumask *flush_mask;
709 struct ptc_stats *stat; 709 struct ptc_stats *stat;
710 struct bau_control *bcp; 710 struct bau_control *bcp;
711 struct bau_control *tbcp; 711 struct bau_control *tbcp;
712 struct hub_and_pnode *hpp;
712 713
713 /* kernel was booted 'nobau' */ 714 /* kernel was booted 'nobau' */
714 if (nobau) 715 if (nobau)
@@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
750 bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; 751 bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
751 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 752 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
752 753
753 /* cpu statistics */
754 for_each_cpu(tcpu, flush_mask) { 754 for_each_cpu(tcpu, flush_mask) {
755 uvhub = uv_cpu_to_blade_id(tcpu); 755 /*
756 bau_uvhub_set(uvhub, &bau_desc->distribution); 756 * The distribution vector is a bit map of pnodes, relative
757 if (uvhub == bcp->uvhub) 757 * to the partition base pnode (and the partition base nasid
758 * in the header).
759 * Translate cpu to pnode and hub using an array stored
760 * in local memory.
761 */
762 hpp = &bcp->socket_master->target_hub_and_pnode[tcpu];
763 tpnode = hpp->pnode - bcp->partition_base_pnode;
764 bau_uvhub_set(tpnode, &bau_desc->distribution);
765 if (hpp->uvhub == bcp->uvhub)
758 locals++; 766 locals++;
759 else 767 else
760 remotes++; 768 remotes++;
@@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
855 * an interrupt, but causes an error message to be returned to 863 * an interrupt, but causes an error message to be returned to
856 * the sender. 864 * the sender.
857 */ 865 */
858static void uv_enable_timeouts(void) 866static void __init uv_enable_timeouts(void)
859{ 867{
860 int uvhub; 868 int uvhub;
861 int nuvhubs; 869 int nuvhubs;
@@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void)
1326} 1334}
1327 1335
1328/* 1336/*
1329 * initialize the sending side's sending buffers 1337 * Initialize the sending side's sending buffers.
1330 */ 1338 */
1331static void 1339static void
1332uv_activation_descriptor_init(int node, int pnode) 1340uv_activation_descriptor_init(int node, int pnode, int base_pnode)
1333{ 1341{
1334 int i; 1342 int i;
1335 int cpu; 1343 int cpu;
@@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode)
1352 n = pa >> uv_nshift; 1360 n = pa >> uv_nshift;
1353 m = pa & uv_mmask; 1361 m = pa & uv_mmask;
1354 1362
1363 /* the 14-bit pnode */
1355 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, 1364 uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
1356 (n << UV_DESC_BASE_PNODE_SHIFT | m)); 1365 (n << UV_DESC_BASE_PNODE_SHIFT | m));
1357
1358 /* 1366 /*
1359 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each 1367 * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
1360 * cpu even though we only use the first one; one descriptor can 1368 * cpu even though we only use the first one; one descriptor can
1361 * describe a broadcast to 256 uv hubs. 1369 * describe a broadcast to 256 uv hubs.
1362 */ 1370 */
@@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode)
1365 memset(bd2, 0, sizeof(struct bau_desc)); 1373 memset(bd2, 0, sizeof(struct bau_desc));
1366 bd2->header.sw_ack_flag = 1; 1374 bd2->header.sw_ack_flag = 1;
1367 /* 1375 /*
1368 * base_dest_nodeid is the nasid of the first uvhub 1376 * The base_dest_nasid set in the message header is the nasid
1369 * in the partition. The bit map will indicate uvhub numbers, 1377 * of the first uvhub in the partition. The bit map will
1370 * which are 0-N in a partition. Pnodes are unique system-wide. 1378 * indicate destination pnode numbers relative to that base.
1379 * They may not be consecutive if nasid striding is being used.
1371 */ 1380 */
1372 bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode); 1381 bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
1373 bd2->header.dest_subnodeid = 0x10; /* the LB */ 1382 bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
1374 bd2->header.command = UV_NET_ENDPOINT_INTD; 1383 bd2->header.command = UV_NET_ENDPOINT_INTD;
1375 bd2->header.int_both = 1; 1384 bd2->header.int_both = 1;
1376 /* 1385 /*
@@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode)
1442/* 1451/*
1443 * Initialization of each UV hub's structures 1452 * Initialization of each UV hub's structures
1444 */ 1453 */
1445static void __init uv_init_uvhub(int uvhub, int vector) 1454static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode)
1446{ 1455{
1447 int node; 1456 int node;
1448 int pnode; 1457 int pnode;
@@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector)
1450 1459
1451 node = uvhub_to_first_node(uvhub); 1460 node = uvhub_to_first_node(uvhub);
1452 pnode = uv_blade_to_pnode(uvhub); 1461 pnode = uv_blade_to_pnode(uvhub);
1453 uv_activation_descriptor_init(node, pnode); 1462 uv_activation_descriptor_init(node, pnode, base_pnode);
1454 uv_payload_queue_init(node, pnode); 1463 uv_payload_queue_init(node, pnode);
1455 /* 1464 /*
1456 * the below initialization can't be in firmware because the 1465 * The below initialization can't be in firmware because the
1457 * messaging IRQ will be determined by the OS 1466 * messaging IRQ will be determined by the OS.
1458 */ 1467 */
1459 apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; 1468 apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
1460 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, 1469 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@@ -1491,10 +1500,11 @@ calculate_destination_timeout(void)
1491/* 1500/*
1492 * initialize the bau_control structure for each cpu 1501 * initialize the bau_control structure for each cpu
1493 */ 1502 */
1494static int __init uv_init_per_cpu(int nuvhubs) 1503static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode)
1495{ 1504{
1496 int i; 1505 int i;
1497 int cpu; 1506 int cpu;
1507 int tcpu;
1498 int pnode; 1508 int pnode;
1499 int uvhub; 1509 int uvhub;
1500 int have_hmaster; 1510 int have_hmaster;
@@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs)
1528 bcp = &per_cpu(bau_control, cpu); 1538 bcp = &per_cpu(bau_control, cpu);
1529 memset(bcp, 0, sizeof(struct bau_control)); 1539 memset(bcp, 0, sizeof(struct bau_control));
1530 pnode = uv_cpu_hub_info(cpu)->pnode; 1540 pnode = uv_cpu_hub_info(cpu)->pnode;
1541 if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) {
1542 printk(KERN_EMERG
1543 "cpu %d pnode %d-%d beyond %d; BAU disabled\n",
1544 cpu, pnode, base_part_pnode,
1545 UV_DISTRIBUTION_SIZE);
1546 return 1;
1547 }
1548 bcp->osnode = cpu_to_node(cpu);
1549 bcp->partition_base_pnode = uv_partition_base_pnode;
1531 uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; 1550 uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
1532 *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); 1551 *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
1533 bdp = &uvhub_descs[uvhub]; 1552 bdp = &uvhub_descs[uvhub];
@@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs)
1536 bdp->pnode = pnode; 1555 bdp->pnode = pnode;
1537 /* kludge: 'assuming' one node per socket, and assuming that 1556 /* kludge: 'assuming' one node per socket, and assuming that
1538 disabling a socket just leaves a gap in node numbers */ 1557 disabling a socket just leaves a gap in node numbers */
1539 socket = (cpu_to_node(cpu) & 1); 1558 socket = bcp->osnode & 1;
1540 bdp->socket_mask |= (1 << socket); 1559 bdp->socket_mask |= (1 << socket);
1541 sdp = &bdp->socket[socket]; 1560 sdp = &bdp->socket[socket];
1542 sdp->cpu_number[sdp->num_cpus] = cpu; 1561 sdp->cpu_number[sdp->num_cpus] = cpu;
@@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs)
1585nextsocket: 1604nextsocket:
1586 socket++; 1605 socket++;
1587 socket_mask = (socket_mask >> 1); 1606 socket_mask = (socket_mask >> 1);
1607 /* each socket gets a local array of pnodes/hubs */
1608 bcp = smaster;
1609 bcp->target_hub_and_pnode = kmalloc_node(
1610 sizeof(struct hub_and_pnode) *
1611 num_possible_cpus(), GFP_KERNEL, bcp->osnode);
1612 memset(bcp->target_hub_and_pnode, 0,
1613 sizeof(struct hub_and_pnode) *
1614 num_possible_cpus());
1615 for_each_present_cpu(tcpu) {
1616 bcp->target_hub_and_pnode[tcpu].pnode =
1617 uv_cpu_hub_info(tcpu)->pnode;
1618 bcp->target_hub_and_pnode[tcpu].uvhub =
1619 uv_cpu_hub_info(tcpu)->numa_blade_id;
1620 }
1588 } 1621 }
1589 } 1622 }
1590 kfree(uvhub_descs); 1623 kfree(uvhub_descs);
@@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void)
1637 spin_lock_init(&disable_lock); 1670 spin_lock_init(&disable_lock);
1638 congested_cycles = microsec_2_cycles(congested_response_us); 1671 congested_cycles = microsec_2_cycles(congested_response_us);
1639 1672
1640 if (uv_init_per_cpu(nuvhubs)) {
1641 nobau = 1;
1642 return 0;
1643 }
1644
1645 uv_partition_base_pnode = 0x7fffffff; 1673 uv_partition_base_pnode = 0x7fffffff;
1646 for (uvhub = 0; uvhub < nuvhubs; uvhub++) 1674 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
1647 if (uv_blade_nr_possible_cpus(uvhub) && 1675 if (uv_blade_nr_possible_cpus(uvhub) &&
1648 (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) 1676 (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
1649 uv_partition_base_pnode = uv_blade_to_pnode(uvhub); 1677 uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
1678 }
1679
1680 if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) {
1681 nobau = 1;
1682 return 0;
1683 }
1650 1684
1651 vector = UV_BAU_MESSAGE; 1685 vector = UV_BAU_MESSAGE;
1652 for_each_possible_blade(uvhub) 1686 for_each_possible_blade(uvhub)
1653 if (uv_blade_nr_possible_cpus(uvhub)) 1687 if (uv_blade_nr_possible_cpus(uvhub))
1654 uv_init_uvhub(uvhub, vector); 1688 uv_init_uvhub(uvhub, vector, uv_partition_base_pnode);
1655 1689
1656 uv_enable_timeouts(); 1690 uv_enable_timeouts();
1657 alloc_intr_gate(vector, uv_bau_message_intr1); 1691 alloc_intr_gate(vector, uv_bau_message_intr1);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index aef7af92b28..0684f3c74d5 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
1275{ 1275{
1276} 1276}
1277 1277
1278static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
1279{
1280 /* reserve the range used */
1281 native_pagetable_reserve(start, end);
1282
1283 /* set as RW the rest */
1284 printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
1285 PFN_PHYS(pgt_buf_top));
1286 while (end < PFN_PHYS(pgt_buf_top)) {
1287 make_lowmem_page_readwrite(__va(end));
1288 end += PAGE_SIZE;
1289 }
1290}
1291
1278static void xen_post_allocator_init(void); 1292static void xen_post_allocator_init(void);
1279 1293
1280static __init void xen_pagetable_setup_done(pgd_t *base) 1294static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -1495,7 +1509,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1495 * it is RO. 1509 * it is RO.
1496 */ 1510 */
1497 if (((!is_early_ioremap_ptep(ptep) && 1511 if (((!is_early_ioremap_ptep(ptep) &&
1498 pfn >= pgt_buf_start && pfn < pgt_buf_end)) || 1512 pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
1499 (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) 1513 (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
1500 pte = pte_wrprotect(pte); 1514 pte = pte_wrprotect(pte);
1501 1515
@@ -2105,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
2105 2119
2106void __init xen_init_mmu_ops(void) 2120void __init xen_init_mmu_ops(void)
2107{ 2121{
2122 x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
2108 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; 2123 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
2109 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; 2124 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
2110 pv_mmu_ops = xen_mmu_ops; 2125 pv_mmu_ops = xen_mmu_ops;