aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/include/asm/efi.h5
-rw-r--r--arch/x86/include/asm/irqflags.h8
-rw-r--r--arch/x86/include/asm/pgtable.h12
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h9
-rw-r--r--arch/x86/kernel/apic/io_apic.c3
-rw-r--r--arch/x86/kernel/apic/ipi.c3
-rw-r--r--arch/x86/kernel/apic/probe_64.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c42
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kernel/cpu/common.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c19
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c23
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c40
-rw-r--r--arch/x86/kernel/efi.c4
-rw-r--r--arch/x86/kernel/efi_64.c6
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/process.c6
-rw-r--r--arch/x86/kernel/reboot.c42
-rw-r--r--arch/x86/kernel/setup_percpu.c14
-rw-r--r--arch/x86/kernel/tlb_uv.c1
-rw-r--r--arch/x86/kernel/tsc.c29
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S142
-rw-r--r--arch/x86/kvm/i8254.c3
-rw-r--r--arch/x86/kvm/mmu.c48
-rw-r--r--arch/x86/kvm/svm.c6
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--arch/x86/kvm/x86.c44
-rw-r--r--arch/x86/lib/msr.c26
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/pageattr.c39
-rw-r--r--arch/x86/mm/pat.c3
-rw-r--r--arch/x86/mm/pgtable.c1
-rw-r--r--arch/x86/mm/tlb.c21
-rw-r--r--arch/x86/xen/Makefile4
-rw-r--r--arch/x86/xen/enlighten.c24
43 files changed, 456 insertions, 284 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6b0f8b..13ffa5df37d7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,6 +24,7 @@ config X86
24 select HAVE_UNSTABLE_SCHED_CLOCK 24 select HAVE_UNSTABLE_SCHED_CLOCK
25 select HAVE_IDE 25 select HAVE_IDE
26 select HAVE_OPROFILE 26 select HAVE_OPROFILE
27 select HAVE_PERF_COUNTERS if (!M386 && !M486)
27 select HAVE_IOREMAP_PROT 28 select HAVE_IOREMAP_PROT
28 select HAVE_KPROBES 29 select HAVE_KPROBES
29 select ARCH_WANT_OPTIONAL_GPIOLIB 30 select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -742,7 +743,6 @@ config X86_UP_IOAPIC
742config X86_LOCAL_APIC 743config X86_LOCAL_APIC
743 def_bool y 744 def_bool y
744 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC 745 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
745 select HAVE_PERF_COUNTERS if (!M386 && !M486)
746 746
747config X86_IO_APIC 747config X86_IO_APIC
748 def_bool y 748 def_bool y
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index e2ff504b4ddc..f8ed0658404c 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -4,7 +4,7 @@
4# create a compressed vmlinux image from the original vmlinux 4# create a compressed vmlinux image from the original vmlinux
5# 5#
6 6
7targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o 7targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o
8 8
9KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 9KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
10KBUILD_CFLAGS += -fno-strict-aliasing -fPIC 10KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index edc90f23e708..8406ed7f9926 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -33,7 +33,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
33#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ 33#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
34 efi_call_virt(f, a1, a2, a3, a4, a5, a6) 34 efi_call_virt(f, a1, a2, a3, a4, a5, a6)
35 35
36#define efi_ioremap(addr, size) ioremap_cache(addr, size) 36#define efi_ioremap(addr, size, type) ioremap_cache(addr, size)
37 37
38#else /* !CONFIG_X86_32 */ 38#else /* !CONFIG_X86_32 */
39 39
@@ -84,7 +84,8 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
84 efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 84 efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
85 (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) 85 (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
86 86
87extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size); 87extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
88 u32 type);
88 89
89#endif /* CONFIG_X86_32 */ 90#endif /* CONFIG_X86_32 */
90 91
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 2bdab21f0898..c6ccbe7e81ad 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -12,9 +12,15 @@ static inline unsigned long native_save_fl(void)
12{ 12{
13 unsigned long flags; 13 unsigned long flags;
14 14
15 /*
16 * Note: this needs to be "=r" not "=rm", because we have the
17 * stack offset from what gcc expects at the time the "pop" is
18 * executed, and so a memory reference with respect to the stack
19 * would end up using the wrong address.
20 */
15 asm volatile("# __raw_save_flags\n\t" 21 asm volatile("# __raw_save_flags\n\t"
16 "pushf ; pop %0" 22 "pushf ; pop %0"
17 : "=g" (flags) 23 : "=r" (flags)
18 : /* no input */ 24 : /* no input */
19 : "memory"); 25 : "memory");
20 26
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 3cc06e3fceb8..16748077559a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -2,6 +2,7 @@
2#define _ASM_X86_PGTABLE_H 2#define _ASM_X86_PGTABLE_H
3 3
4#include <asm/page.h> 4#include <asm/page.h>
5#include <asm/e820.h>
5 6
6#include <asm/pgtable_types.h> 7#include <asm/pgtable_types.h>
7 8
@@ -269,10 +270,17 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
269 270
270#define canon_pgprot(p) __pgprot(massage_pgprot(p)) 271#define canon_pgprot(p) __pgprot(massage_pgprot(p))
271 272
272static inline int is_new_memtype_allowed(unsigned long flags, 273static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
273 unsigned long new_flags) 274 unsigned long flags,
275 unsigned long new_flags)
274{ 276{
275 /* 277 /*
278 * PAT type is always WB for ISA. So no need to check.
279 */
280 if (is_ISA_range(paddr, paddr + size - 1))
281 return 1;
282
283 /*
276 * Certain new memtypes are not allowed with certain 284 * Certain new memtypes are not allowed with certain
277 * requested memtype: 285 * requested memtype:
278 * - request is uncached, return cannot be write-back 286 * - request is uncached, return cannot be write-back
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index bddd44f2f0ab..80e2984f521c 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -133,7 +133,7 @@ struct bau_msg_payload {
133 * see table 4.2.3.0.1 in broacast_assist spec. 133 * see table 4.2.3.0.1 in broacast_assist spec.
134 */ 134 */
135struct bau_msg_header { 135struct bau_msg_header {
136 unsigned int dest_subnodeid:6; /* must be zero */ 136 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
137 /* bits 5:0 */ 137 /* bits 5:0 */
138 unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ 138 unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */
139 /* bits 20:6 */ /* first bit in node_map */ 139 /* bits 20:6 */ /* first bit in node_map */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 341070f7ad5c..77a68505419a 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -175,7 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
175#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) 175#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT))
176 176
177#define UV_GLOBAL_MMR64_PNODE_BITS(p) \ 177#define UV_GLOBAL_MMR64_PNODE_BITS(p) \
178 ((unsigned long)(UV_PNODE_TO_GNODE(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT) 178 (((unsigned long)(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT)
179 179
180#define UV_APIC_PNODE_SHIFT 6 180#define UV_APIC_PNODE_SHIFT 6
181 181
@@ -327,6 +327,7 @@ struct uv_blade_info {
327 unsigned short nr_possible_cpus; 327 unsigned short nr_possible_cpus;
328 unsigned short nr_online_cpus; 328 unsigned short nr_online_cpus;
329 unsigned short pnode; 329 unsigned short pnode;
330 short memory_nid;
330}; 331};
331extern struct uv_blade_info *uv_blade_info; 332extern struct uv_blade_info *uv_blade_info;
332extern short *uv_node_to_blade; 333extern short *uv_node_to_blade;
@@ -363,6 +364,12 @@ static inline int uv_blade_to_pnode(int bid)
363 return uv_blade_info[bid].pnode; 364 return uv_blade_info[bid].pnode;
364} 365}
365 366
367/* Nid of memory node on blade. -1 if no blade-local memory */
368static inline int uv_blade_to_memory_nid(int bid)
369{
370 return uv_blade_info[bid].memory_nid;
371}
372
366/* Determine the number of possible cpus on a blade */ 373/* Determine the number of possible cpus on a blade */
367static inline int uv_blade_nr_possible_cpus(int bid) 374static inline int uv_blade_nr_possible_cpus(int bid)
368{ 375{
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 2284a4812b68..d2ed6c5ddc80 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3793,6 +3793,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3793 mmr_pnode = uv_blade_to_pnode(mmr_blade); 3793 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3794 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 3794 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
3795 3795
3796 if (cfg->move_in_progress)
3797 send_cleanup_vector(cfg);
3798
3796 return irq; 3799 return irq;
3797} 3800}
3798 3801
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index dbf5445727a9..6ef00ba4c886 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -106,6 +106,9 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
106 unsigned long mask = cpumask_bits(cpumask)[0]; 106 unsigned long mask = cpumask_bits(cpumask)[0];
107 unsigned long flags; 107 unsigned long flags;
108 108
109 if (WARN_ONCE(!mask, "empty IPI mask"))
110 return;
111
109 local_irq_save(flags); 112 local_irq_save(flags);
110 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); 113 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
111 __default_send_IPI_dest_field(mask, vector, apic->dest_logical); 114 __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index bc3e880f9b82..fcec2f1d34a1 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -44,6 +44,11 @@ static struct apic *apic_probe[] __initdata = {
44 NULL, 44 NULL,
45}; 45};
46 46
47static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
48{
49 return hard_smp_processor_id() >> index_msb;
50}
51
47/* 52/*
48 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 53 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
49 */ 54 */
@@ -69,6 +74,11 @@ void __init default_setup_apic_routing(void)
69 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); 74 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
70 } 75 }
71 76
77 if (is_vsmp_box()) {
78 /* need to update phys_pkg_id */
79 apic->phys_pkg_id = apicid_phys_pkg_id;
80 }
81
72 /* 82 /*
73 * Now that apic routing model is selected, configure the 83 * Now that apic routing model is selected, configure the
74 * fault handling for intr remapping. 84 * fault handling for intr remapping.
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 8e4cbb255c38..a5371ec36776 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
17 return x2apic_enabled(); 17 return x2apic_enabled();
18} 18}
19 19
20/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 20/*
21 21 * need to use more than cpu 0, because we need more vectors when
22 * MSI-X are used.
23 */
22static const struct cpumask *x2apic_target_cpus(void) 24static const struct cpumask *x2apic_target_cpus(void)
23{ 25{
24 return cpumask_of(0); 26 return cpu_online_mask;
25} 27}
26 28
27/* 29/*
@@ -170,7 +172,7 @@ static unsigned long set_apic_id(unsigned int id)
170 172
171static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) 173static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb)
172{ 174{
173 return current_cpu_data.initial_apicid >> index_msb; 175 return initial_apicid >> index_msb;
174} 176}
175 177
176static void x2apic_send_IPI_self(int vector) 178static void x2apic_send_IPI_self(int vector)
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index a284359627e7..a8989aadc99a 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
27 return 0; 27 return 0;
28} 28}
29 29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 30/*
31 31 * need to use more than cpu 0, because we need more vectors when
32 * MSI-X are used.
33 */
32static const struct cpumask *x2apic_target_cpus(void) 34static const struct cpumask *x2apic_target_cpus(void)
33{ 35{
34 return cpumask_of(0); 36 return cpu_online_mask;
35} 37}
36 38
37static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) 39static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
@@ -162,7 +164,7 @@ static unsigned long set_apic_id(unsigned int id)
162 164
163static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) 165static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
164{ 166{
165 return current_cpu_data.initial_apicid >> index_msb; 167 return initial_apicid >> index_msb;
166} 168}
167 169
168static void x2apic_send_IPI_self(int vector) 170static void x2apic_send_IPI_self(int vector)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 096d19aea2f7..601159374e87 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -46,7 +46,7 @@ static int early_get_nodeid(void)
46 return node_id.s.node_id; 46 return node_id.s.node_id;
47} 47}
48 48
49static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 49static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
50{ 50{
51 if (!strcmp(oem_id, "SGI")) { 51 if (!strcmp(oem_id, "SGI")) {
52 if (!strcmp(oem_table_id, "UVL")) 52 if (!strcmp(oem_table_id, "UVL"))
@@ -253,7 +253,7 @@ static void uv_send_IPI_self(int vector)
253 apic_write(APIC_SELF_IPI, vector); 253 apic_write(APIC_SELF_IPI, vector);
254} 254}
255 255
256struct apic apic_x2apic_uv_x = { 256struct apic __refdata apic_x2apic_uv_x = {
257 257
258 .name = "UV large system", 258 .name = "UV large system",
259 .probe = NULL, 259 .probe = NULL,
@@ -261,7 +261,7 @@ struct apic apic_x2apic_uv_x = {
261 .apic_id_registered = uv_apic_id_registered, 261 .apic_id_registered = uv_apic_id_registered,
262 262
263 .irq_delivery_mode = dest_Fixed, 263 .irq_delivery_mode = dest_Fixed,
264 .irq_dest_mode = 1, /* logical */ 264 .irq_dest_mode = 0, /* physical */
265 265
266 .target_cpus = uv_target_cpus, 266 .target_cpus = uv_target_cpus,
267 .disable_esr = 0, 267 .disable_esr = 0,
@@ -362,12 +362,6 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
362 BUG(); 362 BUG();
363} 363}
364 364
365static __init void map_low_mmrs(void)
366{
367 init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
368 init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
369}
370
371enum map_type {map_wb, map_uc}; 365enum map_type {map_wb, map_uc};
372 366
373static __init void map_high(char *id, unsigned long base, int shift, 367static __init void map_high(char *id, unsigned long base, int shift,
@@ -395,26 +389,6 @@ static __init void map_gru_high(int max_pnode)
395 map_high("GRU", gru.s.base, shift, max_pnode, map_wb); 389 map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
396} 390}
397 391
398static __init void map_config_high(int max_pnode)
399{
400 union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
401 int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
402
403 cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
404 if (cfg.s.enable)
405 map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc);
406}
407
408static __init void map_mmr_high(int max_pnode)
409{
410 union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
411 int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
412
413 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
414 if (mmr.s.enable)
415 map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
416}
417
418static __init void map_mmioh_high(int max_pnode) 392static __init void map_mmioh_high(int max_pnode)
419{ 393{
420 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; 394 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
@@ -566,8 +540,6 @@ void __init uv_system_init(void)
566 unsigned long mmr_base, present, paddr; 540 unsigned long mmr_base, present, paddr;
567 unsigned short pnode_mask; 541 unsigned short pnode_mask;
568 542
569 map_low_mmrs();
570
571 m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); 543 m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
572 m_val = m_n_config.s.m_skt; 544 m_val = m_n_config.s.m_skt;
573 n_val = m_n_config.s.n_skt; 545 n_val = m_n_config.s.n_skt;
@@ -591,6 +563,8 @@ void __init uv_system_init(void)
591 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); 563 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
592 uv_blade_info = kmalloc(bytes, GFP_KERNEL); 564 uv_blade_info = kmalloc(bytes, GFP_KERNEL);
593 BUG_ON(!uv_blade_info); 565 BUG_ON(!uv_blade_info);
566 for (blade = 0; blade < uv_num_possible_blades(); blade++)
567 uv_blade_info[blade].memory_nid = -1;
594 568
595 get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); 569 get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
596 570
@@ -629,6 +603,9 @@ void __init uv_system_init(void)
629 lcpu = uv_blade_info[blade].nr_possible_cpus; 603 lcpu = uv_blade_info[blade].nr_possible_cpus;
630 uv_blade_info[blade].nr_possible_cpus++; 604 uv_blade_info[blade].nr_possible_cpus++;
631 605
606 /* Any node on the blade, else will contain -1. */
607 uv_blade_info[blade].memory_nid = nid;
608
632 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; 609 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
633 uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; 610 uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
634 uv_cpu_hub_info(cpu)->m_val = m_val; 611 uv_cpu_hub_info(cpu)->m_val = m_val;
@@ -662,11 +639,10 @@ void __init uv_system_init(void)
662 pnode = (paddr >> m_val) & pnode_mask; 639 pnode = (paddr >> m_val) & pnode_mask;
663 blade = boot_pnode_to_blade(pnode); 640 blade = boot_pnode_to_blade(pnode);
664 uv_node_to_blade[nid] = blade; 641 uv_node_to_blade[nid] = blade;
642 max_pnode = max(pnode, max_pnode);
665 } 643 }
666 644
667 map_gru_high(max_pnode); 645 map_gru_high(max_pnode);
668 map_mmr_high(max_pnode);
669 map_config_high(max_pnode);
670 map_mmioh_high(max_pnode); 646 map_mmioh_high(max_pnode);
671 647
672 uv_cpu_init(); 648 uv_cpu_init();
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 79302e9a33a4..442b5508893f 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -811,7 +811,7 @@ static int apm_do_idle(void)
811 u8 ret = 0; 811 u8 ret = 0;
812 int idled = 0; 812 int idled = 0;
813 int polling; 813 int polling;
814 int err; 814 int err = 0;
815 815
816 polling = !!(current_thread_info()->status & TS_POLLING); 816 polling = !!(current_thread_info()->status & TS_POLLING);
817 if (polling) { 817 if (polling) {
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 3efcb2b96a15..c1f253dac155 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -7,6 +7,10 @@ ifdef CONFIG_FUNCTION_TRACER
7CFLAGS_REMOVE_common.o = -pg 7CFLAGS_REMOVE_common.o = -pg
8endif 8endif
9 9
10# Make sure load_percpu_segment has no stackprotector
11nostackp := $(call cc-option, -fno-stack-protector)
12CFLAGS_common.o := $(nostackp)
13
10obj-y := intel_cacheinfo.o addon_cpuid_features.o 14obj-y := intel_cacheinfo.o addon_cpuid_features.o
11obj-y += proc.o capflags.o powerflags.o common.o 15obj-y += proc.o capflags.o powerflags.o common.o
12obj-y += vmware.o hypervisor.o 16obj-y += vmware.o hypervisor.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e2485b03f1cf..63fddcd082cd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
400 level = cpuid_eax(1); 400 level = cpuid_eax(1);
401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) 401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
402 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 402 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
403
404 /*
405 * Some BIOSes incorrectly force this feature, but only K8
406 * revision D (model = 0x14) and later actually support it.
407 */
408 if (c->x86_model < 0x14)
409 clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
403 } 410 }
404 if (c->x86 == 0x10 || c->x86 == 0x11) 411 if (c->x86 == 0x10 || c->x86 == 0x11)
405 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 412 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1961c07af9a..5ce60a88027b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void)
59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
60} 60}
61 61
62static const struct cpu_dev *this_cpu __cpuinitdata; 62static void __cpuinit default_init(struct cpuinfo_x86 *c)
63{
64#ifdef CONFIG_X86_64
65 display_cacheinfo(c);
66#else
67 /* Not much we can do here... */
68 /* Check if at least it has cpuid */
69 if (c->cpuid_level == -1) {
70 /* No cpuid. It must be an ancient CPU */
71 if (c->x86 == 4)
72 strcpy(c->x86_model_id, "486");
73 else if (c->x86 == 3)
74 strcpy(c->x86_model_id, "386");
75 }
76#endif
77}
78
79static const struct cpu_dev __cpuinitconst default_cpu = {
80 .c_init = default_init,
81 .c_vendor = "Unknown",
82 .c_x86_vendor = X86_VENDOR_UNKNOWN,
83};
84
85static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
63 86
64DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { 87DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
65#ifdef CONFIG_X86_64 88#ifdef CONFIG_X86_64
@@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu)
332 355
333static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; 356static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
334 357
335static void __cpuinit default_init(struct cpuinfo_x86 *c)
336{
337#ifdef CONFIG_X86_64
338 display_cacheinfo(c);
339#else
340 /* Not much we can do here... */
341 /* Check if at least it has cpuid */
342 if (c->cpuid_level == -1) {
343 /* No cpuid. It must be an ancient CPU */
344 if (c->x86 == 4)
345 strcpy(c->x86_model_id, "486");
346 else if (c->x86 == 3)
347 strcpy(c->x86_model_id, "386");
348 }
349#endif
350}
351
352static const struct cpu_dev __cpuinitconst default_cpu = {
353 .c_init = default_init,
354 .c_vendor = "Unknown",
355 .c_x86_vendor = X86_VENDOR_UNKNOWN,
356};
357
358static void __cpuinit get_model_name(struct cpuinfo_x86 *c) 358static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
359{ 359{
360 unsigned int *v; 360 unsigned int *v;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 1cfb623ce11c..01213048f62f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1226,8 +1226,13 @@ static void mce_init(void)
1226} 1226}
1227 1227
1228/* Add per CPU specific workarounds here */ 1228/* Add per CPU specific workarounds here */
1229static void mce_cpu_quirks(struct cpuinfo_x86 *c) 1229static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1230{ 1230{
1231 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1232 pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
1233 return -EOPNOTSUPP;
1234 }
1235
1231 /* This should be disabled by the BIOS, but isn't always */ 1236 /* This should be disabled by the BIOS, but isn't always */
1232 if (c->x86_vendor == X86_VENDOR_AMD) { 1237 if (c->x86_vendor == X86_VENDOR_AMD) {
1233 if (c->x86 == 15 && banks > 4) { 1238 if (c->x86 == 15 && banks > 4) {
@@ -1273,11 +1278,20 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
1273 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && 1278 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
1274 monarch_timeout < 0) 1279 monarch_timeout < 0)
1275 monarch_timeout = USEC_PER_SEC; 1280 monarch_timeout = USEC_PER_SEC;
1281
1282 /*
1283 * There are also broken BIOSes on some Pentium M and
1284 * earlier systems:
1285 */
1286 if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
1287 mce_bootlog = 0;
1276 } 1288 }
1277 if (monarch_timeout < 0) 1289 if (monarch_timeout < 0)
1278 monarch_timeout = 0; 1290 monarch_timeout = 0;
1279 if (mce_bootlog != 0) 1291 if (mce_bootlog != 0)
1280 mce_panic_timeout = 30; 1292 mce_panic_timeout = 30;
1293
1294 return 0;
1281} 1295}
1282 1296
1283static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) 1297static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
@@ -1338,11 +1352,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
1338 if (!mce_available(c)) 1352 if (!mce_available(c))
1339 return; 1353 return;
1340 1354
1341 if (mce_cap_init() < 0) { 1355 if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
1342 mce_disabled = 1; 1356 mce_disabled = 1;
1343 return; 1357 return;
1344 } 1358 }
1345 mce_cpu_quirks(c);
1346 1359
1347 machine_check_vector = do_machine_check; 1360 machine_check_vector = do_machine_check;
1348 1361
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index bff8dd191dd5..5957a93e5173 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -36,6 +36,7 @@
36 36
37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; 37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); 38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
39static DEFINE_PER_CPU(bool, thermal_throttle_active);
39 40
40static atomic_t therm_throt_en = ATOMIC_INIT(0); 41static atomic_t therm_throt_en = ATOMIC_INIT(0);
41 42
@@ -96,27 +97,33 @@ static int therm_throt_process(int curr)
96{ 97{
97 unsigned int cpu = smp_processor_id(); 98 unsigned int cpu = smp_processor_id();
98 __u64 tmp_jiffs = get_jiffies_64(); 99 __u64 tmp_jiffs = get_jiffies_64();
100 bool was_throttled = __get_cpu_var(thermal_throttle_active);
101 bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
99 102
100 if (curr) 103 if (is_throttled)
101 __get_cpu_var(thermal_throttle_count)++; 104 __get_cpu_var(thermal_throttle_count)++;
102 105
103 if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) 106 if (!(was_throttled ^ is_throttled) &&
107 time_before64(tmp_jiffs, __get_cpu_var(next_check)))
104 return 0; 108 return 0;
105 109
106 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; 110 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
107 111
108 /* if we just entered the thermal event */ 112 /* if we just entered the thermal event */
109 if (curr) { 113 if (is_throttled) {
110 printk(KERN_CRIT "CPU%d: Temperature above threshold, " 114 printk(KERN_CRIT "CPU%d: Temperature above threshold, "
111 "cpu clock throttled (total events = %lu)\n", cpu, 115 "cpu clock throttled (total events = %lu)\n",
112 __get_cpu_var(thermal_throttle_count)); 116 cpu, __get_cpu_var(thermal_throttle_count));
113 117
114 add_taint(TAINT_MACHINE_CHECK); 118 add_taint(TAINT_MACHINE_CHECK);
115 } else { 119 return 1;
116 printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); 120 }
121 if (was_throttled) {
122 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
123 return 1;
117 } 124 }
118 125
119 return 1; 126 return 0;
120} 127}
121 128
122#ifdef CONFIG_SYSFS 129#ifdef CONFIG_SYSFS
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a7aa8f900954..900332b800f8 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -55,6 +55,7 @@ struct x86_pmu {
55 int num_counters_fixed; 55 int num_counters_fixed;
56 int counter_bits; 56 int counter_bits;
57 u64 counter_mask; 57 u64 counter_mask;
58 int apic;
58 u64 max_period; 59 u64 max_period;
59 u64 intel_ctrl; 60 u64 intel_ctrl;
60}; 61};
@@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] =
72{ 73{
73 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, 74 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
74 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 75 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
75 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, 76 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
76 [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, 77 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
77 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 78 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
78 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 79 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
79 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, 80 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
@@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
613 614
614static bool reserve_pmc_hardware(void) 615static bool reserve_pmc_hardware(void)
615{ 616{
617#ifdef CONFIG_X86_LOCAL_APIC
616 int i; 618 int i;
617 619
618 if (nmi_watchdog == NMI_LOCAL_APIC) 620 if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void)
627 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 629 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
628 goto eventsel_fail; 630 goto eventsel_fail;
629 } 631 }
632#endif
630 633
631 return true; 634 return true;
632 635
636#ifdef CONFIG_X86_LOCAL_APIC
633eventsel_fail: 637eventsel_fail:
634 for (i--; i >= 0; i--) 638 for (i--; i >= 0; i--)
635 release_evntsel_nmi(x86_pmu.eventsel + i); 639 release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -644,10 +648,12 @@ perfctr_fail:
644 enable_lapic_nmi_watchdog(); 648 enable_lapic_nmi_watchdog();
645 649
646 return false; 650 return false;
651#endif
647} 652}
648 653
649static void release_pmc_hardware(void) 654static void release_pmc_hardware(void)
650{ 655{
656#ifdef CONFIG_X86_LOCAL_APIC
651 int i; 657 int i;
652 658
653 for (i = 0; i < x86_pmu.num_counters; i++) { 659 for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -657,6 +663,7 @@ static void release_pmc_hardware(void)
657 663
658 if (nmi_watchdog == NMI_LOCAL_APIC) 664 if (nmi_watchdog == NMI_LOCAL_APIC)
659 enable_lapic_nmi_watchdog(); 665 enable_lapic_nmi_watchdog();
666#endif
660} 667}
661 668
662static void hw_perf_counter_destroy(struct perf_counter *counter) 669static void hw_perf_counter_destroy(struct perf_counter *counter)
@@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
748 hwc->sample_period = x86_pmu.max_period; 755 hwc->sample_period = x86_pmu.max_period;
749 hwc->last_period = hwc->sample_period; 756 hwc->last_period = hwc->sample_period;
750 atomic64_set(&hwc->period_left, hwc->sample_period); 757 atomic64_set(&hwc->period_left, hwc->sample_period);
758 } else {
759 /*
760 * If we have a PMU initialized but no APIC
761 * interrupts, we cannot sample hardware
762 * counters (user-space has to fall back and
763 * sample via a hrtimer based software counter):
764 */
765 if (!x86_pmu.apic)
766 return -EOPNOTSUPP;
751 } 767 }
752 768
753 counter->destroy = hw_perf_counter_destroy; 769 counter->destroy = hw_perf_counter_destroy;
@@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
1449 1465
1450void set_perf_counter_pending(void) 1466void set_perf_counter_pending(void)
1451{ 1467{
1468#ifdef CONFIG_X86_LOCAL_APIC
1452 apic->send_IPI_self(LOCAL_PENDING_VECTOR); 1469 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1470#endif
1453} 1471}
1454 1472
1455void perf_counters_lapic_init(void) 1473void perf_counters_lapic_init(void)
1456{ 1474{
1457 if (!x86_pmu_initialized()) 1475#ifdef CONFIG_X86_LOCAL_APIC
1476 if (!x86_pmu.apic || !x86_pmu_initialized())
1458 return; 1477 return;
1459 1478
1460 /* 1479 /*
1461 * Always use NMI for PMU 1480 * Always use NMI for PMU
1462 */ 1481 */
1463 apic_write(APIC_LVTPC, APIC_DM_NMI); 1482 apic_write(APIC_LVTPC, APIC_DM_NMI);
1483#endif
1464} 1484}
1465 1485
1466static int __kprobes 1486static int __kprobes
@@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
1484 1504
1485 regs = args->regs; 1505 regs = args->regs;
1486 1506
1507#ifdef CONFIG_X86_LOCAL_APIC
1487 apic_write(APIC_LVTPC, APIC_DM_NMI); 1508 apic_write(APIC_LVTPC, APIC_DM_NMI);
1509#endif
1488 /* 1510 /*
1489 * Can't rely on the handled return value to say it was our NMI, two 1511 * Can't rely on the handled return value to say it was our NMI, two
1490 * counters could trigger 'simultaneously' raising two back-to-back NMIs. 1512 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = {
1515 .event_map = p6_pmu_event_map, 1537 .event_map = p6_pmu_event_map,
1516 .raw_event = p6_pmu_raw_event, 1538 .raw_event = p6_pmu_raw_event,
1517 .max_events = ARRAY_SIZE(p6_perfmon_event_map), 1539 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1540 .apic = 1,
1518 .max_period = (1ULL << 31) - 1, 1541 .max_period = (1ULL << 31) - 1,
1519 .version = 0, 1542 .version = 0,
1520 .num_counters = 2, 1543 .num_counters = 2,
@@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
1541 .event_map = intel_pmu_event_map, 1564 .event_map = intel_pmu_event_map,
1542 .raw_event = intel_pmu_raw_event, 1565 .raw_event = intel_pmu_raw_event,
1543 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 1566 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1567 .apic = 1,
1544 /* 1568 /*
1545 * Intel PMCs cannot be accessed sanely above 32 bit width, 1569 * Intel PMCs cannot be accessed sanely above 32 bit width,
1546 * so we install an artificial 1<<31 period regardless of 1570 * so we install an artificial 1<<31 period regardless of
@@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = {
1564 .num_counters = 4, 1588 .num_counters = 4,
1565 .counter_bits = 48, 1589 .counter_bits = 48,
1566 .counter_mask = (1ULL << 48) - 1, 1590 .counter_mask = (1ULL << 48) - 1,
1591 .apic = 1,
1567 /* use highest bit to detect overflow */ 1592 /* use highest bit to detect overflow */
1568 .max_period = (1ULL << 47) - 1, 1593 .max_period = (1ULL << 47) - 1,
1569}; 1594};
@@ -1589,13 +1614,14 @@ static int p6_pmu_init(void)
1589 return -ENODEV; 1614 return -ENODEV;
1590 } 1615 }
1591 1616
1617 x86_pmu = p6_pmu;
1618
1592 if (!cpu_has_apic) { 1619 if (!cpu_has_apic) {
1593 pr_info("no Local APIC, try rebooting with lapic"); 1620 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1594 return -ENODEV; 1621 pr_info("no hardware sampling interrupt available.\n");
1622 x86_pmu.apic = 0;
1595 } 1623 }
1596 1624
1597 x86_pmu = p6_pmu;
1598
1599 return 0; 1625 return 0;
1600} 1626}
1601 1627
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 96f7ac0bbf01..fe26ba3e3451 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -354,7 +354,7 @@ void __init efi_init(void)
354 */ 354 */
355 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); 355 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
356 if (c16) { 356 if (c16) {
357 for (i = 0; i < sizeof(vendor) && *c16; ++i) 357 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
358 vendor[i] = *c16++; 358 vendor[i] = *c16++;
359 vendor[i] = '\0'; 359 vendor[i] = '\0';
360 } else 360 } else
@@ -512,7 +512,7 @@ void __init efi_enter_virtual_mode(void)
512 && end_pfn <= max_pfn_mapped)) 512 && end_pfn <= max_pfn_mapped))
513 va = __va(md->phys_addr); 513 va = __va(md->phys_addr);
514 else 514 else
515 va = efi_ioremap(md->phys_addr, size); 515 va = efi_ioremap(md->phys_addr, size, md->type);
516 516
517 md->virt_addr = (u64) (unsigned long) va; 517 md->virt_addr = (u64) (unsigned long) va;
518 518
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 22c3b7828c50..ac0621a7ac3d 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -98,10 +98,14 @@ void __init efi_call_phys_epilog(void)
98 early_runtime_code_mapping_set_exec(0); 98 early_runtime_code_mapping_set_exec(0);
99} 99}
100 100
101void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) 101void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
102 u32 type)
102{ 103{
103 unsigned long last_map_pfn; 104 unsigned long last_map_pfn;
104 105
106 if (type == EFI_MEMORY_MAPPED_IO)
107 return ioremap(phys_addr, size);
108
105 last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); 109 last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
106 if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) 110 if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
107 return NULL; 111 return NULL;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 8663afb56535..cc827ac9e8d3 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -261,9 +261,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
261 * which will be freed later 261 * which will be freed later
262 */ 262 */
263 263
264#ifndef CONFIG_HOTPLUG_CPU 264__CPUINIT
265.section .init.text,"ax",@progbits
266#endif
267 265
268#ifdef CONFIG_SMP 266#ifdef CONFIG_SMP
269ENTRY(startup_32_smp) 267ENTRY(startup_32_smp)
@@ -602,7 +600,7 @@ ignore_int:
602#endif 600#endif
603 iret 601 iret
604 602
605.section .cpuinit.data,"wa" 603 __REFDATA
606.align 4 604.align 4
607ENTRY(initial_code) 605ENTRY(initial_code)
608 .long i386_start_kernel 606 .long i386_start_kernel
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 994dd6a4a2a0..071166a4ba83 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -519,16 +519,12 @@ static void c1e_idle(void)
519 if (!cpumask_test_cpu(cpu, c1e_mask)) { 519 if (!cpumask_test_cpu(cpu, c1e_mask)) {
520 cpumask_set_cpu(cpu, c1e_mask); 520 cpumask_set_cpu(cpu, c1e_mask);
521 /* 521 /*
522 * Force broadcast so ACPI can not interfere. Needs 522 * Force broadcast so ACPI can not interfere.
523 * to run with interrupts enabled as it uses
524 * smp_function_call.
525 */ 523 */
526 local_irq_enable();
527 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, 524 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
528 &cpu); 525 &cpu);
529 printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", 526 printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
530 cpu); 527 cpu);
531 local_irq_disable();
532 } 528 }
533 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); 529 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
534 530
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 508e982dd072..a06e8d101844 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -3,6 +3,7 @@
3#include <linux/init.h> 3#include <linux/init.h>
4#include <linux/pm.h> 4#include <linux/pm.h>
5#include <linux/efi.h> 5#include <linux/efi.h>
6#include <linux/dmi.h>
6#include <acpi/reboot.h> 7#include <acpi/reboot.h>
7#include <asm/io.h> 8#include <asm/io.h>
8#include <asm/apic.h> 9#include <asm/apic.h>
@@ -17,7 +18,6 @@
17#include <asm/cpu.h> 18#include <asm/cpu.h>
18 19
19#ifdef CONFIG_X86_32 20#ifdef CONFIG_X86_32
20# include <linux/dmi.h>
21# include <linux/ctype.h> 21# include <linux/ctype.h>
22# include <linux/mc146818rtc.h> 22# include <linux/mc146818rtc.h>
23#else 23#else
@@ -404,6 +404,46 @@ EXPORT_SYMBOL(machine_real_restart);
404 404
405#endif /* CONFIG_X86_32 */ 405#endif /* CONFIG_X86_32 */
406 406
407/*
408 * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
409 */
410static int __init set_pci_reboot(const struct dmi_system_id *d)
411{
412 if (reboot_type != BOOT_CF9) {
413 reboot_type = BOOT_CF9;
414 printk(KERN_INFO "%s series board detected. "
415 "Selecting PCI-method for reboots.\n", d->ident);
416 }
417 return 0;
418}
419
420static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
421 { /* Handle problems with rebooting on Apple MacBook5 */
422 .callback = set_pci_reboot,
423 .ident = "Apple MacBook5",
424 .matches = {
425 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
426 DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
427 },
428 },
429 { /* Handle problems with rebooting on Apple MacBookPro5 */
430 .callback = set_pci_reboot,
431 .ident = "Apple MacBookPro5",
432 .matches = {
433 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
434 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
435 },
436 },
437 { }
438};
439
440static int __init pci_reboot_init(void)
441{
442 dmi_check_system(pci_reboot_dmi_table);
443 return 0;
444}
445core_initcall(pci_reboot_init);
446
407static inline void kb_wait(void) 447static inline void kb_wait(void)
408{ 448{
409 int i; 449 int i;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 29a3eef7cf4a..07d81916f212 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -165,7 +165,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
165 165
166 if (!chosen) { 166 if (!chosen) {
167 size_t vm_size = VMALLOC_END - VMALLOC_START; 167 size_t vm_size = VMALLOC_END - VMALLOC_START;
168 size_t tot_size = num_possible_cpus() * PMD_SIZE; 168 size_t tot_size = nr_cpu_ids * PMD_SIZE;
169 169
170 /* on non-NUMA, embedding is better */ 170 /* on non-NUMA, embedding is better */
171 if (!pcpu_need_numa()) 171 if (!pcpu_need_numa())
@@ -199,7 +199,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
199 dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; 199 dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
200 200
201 /* allocate pointer array and alloc large pages */ 201 /* allocate pointer array and alloc large pages */
202 map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); 202 map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0]));
203 pcpul_map = alloc_bootmem(map_size); 203 pcpul_map = alloc_bootmem(map_size);
204 204
205 for_each_possible_cpu(cpu) { 205 for_each_possible_cpu(cpu) {
@@ -228,7 +228,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
228 228
229 /* allocate address and map */ 229 /* allocate address and map */
230 pcpul_vm.flags = VM_ALLOC; 230 pcpul_vm.flags = VM_ALLOC;
231 pcpul_vm.size = num_possible_cpus() * PMD_SIZE; 231 pcpul_vm.size = nr_cpu_ids * PMD_SIZE;
232 vm_area_register_early(&pcpul_vm, PMD_SIZE); 232 vm_area_register_early(&pcpul_vm, PMD_SIZE);
233 233
234 for_each_possible_cpu(cpu) { 234 for_each_possible_cpu(cpu) {
@@ -250,8 +250,8 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
250 PMD_SIZE, pcpul_vm.addr, NULL); 250 PMD_SIZE, pcpul_vm.addr, NULL);
251 251
252 /* sort pcpul_map array for pcpu_lpage_remapped() */ 252 /* sort pcpul_map array for pcpu_lpage_remapped() */
253 for (i = 0; i < num_possible_cpus() - 1; i++) 253 for (i = 0; i < nr_cpu_ids - 1; i++)
254 for (j = i + 1; j < num_possible_cpus(); j++) 254 for (j = i + 1; j < nr_cpu_ids; j++)
255 if (pcpul_map[i].ptr > pcpul_map[j].ptr) { 255 if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
256 struct pcpul_ent tmp = pcpul_map[i]; 256 struct pcpul_ent tmp = pcpul_map[i];
257 pcpul_map[i] = pcpul_map[j]; 257 pcpul_map[i] = pcpul_map[j];
@@ -288,7 +288,7 @@ void *pcpu_lpage_remapped(void *kaddr)
288{ 288{
289 void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); 289 void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
290 unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; 290 unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
291 int left = 0, right = num_possible_cpus() - 1; 291 int left = 0, right = nr_cpu_ids - 1;
292 int pos; 292 int pos;
293 293
294 /* pcpul in use at all? */ 294 /* pcpul in use at all? */
@@ -377,7 +377,7 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
377 pcpu4k_nr_static_pages = PFN_UP(static_size); 377 pcpu4k_nr_static_pages = PFN_UP(static_size);
378 378
379 /* unaligned allocations can't be freed, round up to page size */ 379 /* unaligned allocations can't be freed, round up to page size */
380 pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() 380 pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids
381 * sizeof(pcpu4k_pages[0])); 381 * sizeof(pcpu4k_pages[0]));
382 pcpu4k_pages = alloc_bootmem(pages_size); 382 pcpu4k_pages = alloc_bootmem(pages_size);
383 383
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 8ccabb8a2f6a..77b9689f8edb 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -744,6 +744,7 @@ uv_activation_descriptor_init(int node, int pnode)
744 * note that base_dest_nodeid is actually a nasid. 744 * note that base_dest_nodeid is actually a nasid.
745 */ 745 */
746 ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; 746 ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
747 ad2->header.dest_subnodeid = 0x10; /* the LB */
747 ad2->header.command = UV_NET_ENDPOINT_INTD; 748 ad2->header.command = UV_NET_ENDPOINT_INTD;
748 ad2->header.int_both = 1; 749 ad2->header.int_both = 1;
749 /* 750 /*
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6e1a368d21d4..71f4368b357e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -275,15 +275,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
275 * use the TSC value at the transitions to calculate a pretty 275 * use the TSC value at the transitions to calculate a pretty
276 * good value for the TSC frequencty. 276 * good value for the TSC frequencty.
277 */ 277 */
278static inline int pit_verify_msb(unsigned char val)
279{
280 /* Ignore LSB */
281 inb(0x42);
282 return inb(0x42) == val;
283}
284
278static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) 285static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
279{ 286{
280 int count; 287 int count;
281 u64 tsc = 0; 288 u64 tsc = 0;
282 289
283 for (count = 0; count < 50000; count++) { 290 for (count = 0; count < 50000; count++) {
284 /* Ignore LSB */ 291 if (!pit_verify_msb(val))
285 inb(0x42);
286 if (inb(0x42) != val)
287 break; 292 break;
288 tsc = get_cycles(); 293 tsc = get_cycles();
289 } 294 }
@@ -336,8 +341,7 @@ static unsigned long quick_pit_calibrate(void)
336 * to do that is to just read back the 16-bit counter 341 * to do that is to just read back the 16-bit counter
337 * once from the PIT. 342 * once from the PIT.
338 */ 343 */
339 inb(0x42); 344 pit_verify_msb(0);
340 inb(0x42);
341 345
342 if (pit_expect_msb(0xff, &tsc, &d1)) { 346 if (pit_expect_msb(0xff, &tsc, &d1)) {
343 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { 347 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
@@ -348,8 +352,19 @@ static unsigned long quick_pit_calibrate(void)
348 * Iterate until the error is less than 500 ppm 352 * Iterate until the error is less than 500 ppm
349 */ 353 */
350 delta -= tsc; 354 delta -= tsc;
351 if (d1+d2 < delta >> 11) 355 if (d1+d2 >= delta >> 11)
352 goto success; 356 continue;
357
358 /*
359 * Check the PIT one more time to verify that
360 * all TSC reads were stable wrt the PIT.
361 *
362 * This also guarantees serialization of the
363 * last cycle read ('d2') in pit_expect_msb.
364 */
365 if (!pit_verify_msb(0xfe - i))
366 break;
367 goto success;
353 } 368 }
354 } 369 }
355 printk("Fast TSC calibration failed\n"); 370 printk("Fast TSC calibration failed\n");
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index b263423fbe2a..95a7289e4b0c 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
441 ap.ds = __USER_DS; 441 ap.ds = __USER_DS;
442 ap.es = __USER_DS; 442 ap.es = __USER_DS;
443 ap.fs = __KERNEL_PERCPU; 443 ap.fs = __KERNEL_PERCPU;
444 ap.gs = 0; 444 ap.gs = __KERNEL_STACK_CANARY;
445 445
446 ap.eflags = 0; 446 ap.eflags = 0;
447 447
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 59f31d2dd435..9fc178255c04 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -46,11 +46,10 @@ PHDRS {
46 data PT_LOAD FLAGS(7); /* RWE */ 46 data PT_LOAD FLAGS(7); /* RWE */
47#ifdef CONFIG_X86_64 47#ifdef CONFIG_X86_64
48 user PT_LOAD FLAGS(7); /* RWE */ 48 user PT_LOAD FLAGS(7); /* RWE */
49 data.init PT_LOAD FLAGS(7); /* RWE */
50#ifdef CONFIG_SMP 49#ifdef CONFIG_SMP
51 percpu PT_LOAD FLAGS(7); /* RWE */ 50 percpu PT_LOAD FLAGS(7); /* RWE */
52#endif 51#endif
53 data.init2 PT_LOAD FLAGS(7); /* RWE */ 52 init PT_LOAD FLAGS(7); /* RWE */
54#endif 53#endif
55 note PT_NOTE FLAGS(0); /* ___ */ 54 note PT_NOTE FLAGS(0); /* ___ */
56} 55}
@@ -103,65 +102,43 @@ SECTIONS
103 __stop___ex_table = .; 102 __stop___ex_table = .;
104 } :text = 0x9090 103 } :text = 0x9090
105 104
106 RODATA 105 RO_DATA(PAGE_SIZE)
107 106
108 /* Data */ 107 /* Data */
109 . = ALIGN(PAGE_SIZE);
110 .data : AT(ADDR(.data) - LOAD_OFFSET) { 108 .data : AT(ADDR(.data) - LOAD_OFFSET) {
111 /* Start of data section */ 109 /* Start of data section */
112 _sdata = .; 110 _sdata = .;
113 DATA_DATA 111
114 CONSTRUCTORS 112 /* init_task */
115 } :data 113 INIT_TASK_DATA(THREAD_SIZE)
116 114
117#ifdef CONFIG_X86_32 115#ifdef CONFIG_X86_32
118 /* 32 bit has nosave before _edata */ 116 /* 32 bit has nosave before _edata */
119 . = ALIGN(PAGE_SIZE); 117 NOSAVE_DATA
120 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
121 __nosave_begin = .;
122 *(.data.nosave)
123 . = ALIGN(PAGE_SIZE);
124 __nosave_end = .;
125 }
126#endif 118#endif
127 119
128 . = ALIGN(PAGE_SIZE); 120 PAGE_ALIGNED_DATA(PAGE_SIZE)
129 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
130 *(.data.page_aligned)
131 *(.data.idt) 121 *(.data.idt)
132 }
133 122
134#ifdef CONFIG_X86_32 123 CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
135 . = ALIGN(32);
136#else
137 . = ALIGN(PAGE_SIZE);
138 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
139#endif
140 .data.cacheline_aligned :
141 AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
142 *(.data.cacheline_aligned)
143 }
144 124
145 /* rarely changed data like cpu maps */ 125 DATA_DATA
146#ifdef CONFIG_X86_32 126 CONSTRUCTORS
147 . = ALIGN(32); 127
148#else 128 /* rarely changed data like cpu maps */
149 . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); 129 READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES)
150#endif
151 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
152 *(.data.read_mostly)
153 130
154 /* End of data section */ 131 /* End of data section */
155 _edata = .; 132 _edata = .;
156 } 133 } :data
157 134
158#ifdef CONFIG_X86_64 135#ifdef CONFIG_X86_64
159 136
160#define VSYSCALL_ADDR (-10*1024*1024) 137#define VSYSCALL_ADDR (-10*1024*1024)
161#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \ 138#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \
162 SIZEOF(.data.read_mostly) + 4095) & ~(4095)) 139 PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
163#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \ 140#define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \
164 SIZEOF(.data.read_mostly) + 4095) & ~(4095)) 141 PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
165 142
166#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) 143#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
167#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) 144#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
@@ -227,35 +204,29 @@ SECTIONS
227 204
228#endif /* CONFIG_X86_64 */ 205#endif /* CONFIG_X86_64 */
229 206
230 /* init_task */ 207 /* Init code and data - will be freed after init */
231 . = ALIGN(THREAD_SIZE); 208 . = ALIGN(PAGE_SIZE);
232 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { 209 .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) {
233 *(.data.init_task) 210 __init_begin = .; /* paired with __init_end */
234 } 211 }
235#ifdef CONFIG_X86_64
236 :data.init
237#endif
238 212
213#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
239 /* 214 /*
240 * smp_locks might be freed after init 215 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
241 * start/end must be page aligned 216 * output PHDR, so the next output section - .init.text - should
217 * start another segment - init.
242 */ 218 */
243 . = ALIGN(PAGE_SIZE); 219 PERCPU_VADDR(0, :percpu)
244 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { 220#endif
245 __smp_locks = .;
246 *(.smp_locks)
247 __smp_locks_end = .;
248 . = ALIGN(PAGE_SIZE);
249 }
250 221
251 /* Init code and data - will be freed after init */
252 . = ALIGN(PAGE_SIZE);
253 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { 222 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
254 __init_begin = .; /* paired with __init_end */
255 _sinittext = .; 223 _sinittext = .;
256 INIT_TEXT 224 INIT_TEXT
257 _einittext = .; 225 _einittext = .;
258 } 226 }
227#ifdef CONFIG_X86_64
228 :init
229#endif
259 230
260 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { 231 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
261 INIT_DATA 232 INIT_DATA
@@ -326,17 +297,7 @@ SECTIONS
326 } 297 }
327#endif 298#endif
328 299
329#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) 300#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
330 /*
331 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
332 * output PHDR, so the next output section - __data_nosave - should
333 * start another section data.init2. Also, pda should be at the head of
334 * percpu area. Preallocate it and define the percpu offset symbol
335 * so that it can be accessed as a percpu variable.
336 */
337 . = ALIGN(PAGE_SIZE);
338 PERCPU_VADDR(0, :percpu)
339#else
340 PERCPU(PAGE_SIZE) 301 PERCPU(PAGE_SIZE)
341#endif 302#endif
342 303
@@ -347,15 +308,22 @@ SECTIONS
347 __init_end = .; 308 __init_end = .;
348 } 309 }
349 310
311 /*
312 * smp_locks might be freed after init
313 * start/end must be page aligned
314 */
315 . = ALIGN(PAGE_SIZE);
316 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
317 __smp_locks = .;
318 *(.smp_locks)
319 __smp_locks_end = .;
320 . = ALIGN(PAGE_SIZE);
321 }
322
350#ifdef CONFIG_X86_64 323#ifdef CONFIG_X86_64
351 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { 324 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
352 . = ALIGN(PAGE_SIZE); 325 NOSAVE_DATA
353 __nosave_begin = .; 326 }
354 *(.data.nosave)
355 . = ALIGN(PAGE_SIZE);
356 __nosave_end = .;
357 } :data.init2
358 /* use another section data.init2, see PERCPU_VADDR() above */
359#endif 327#endif
360 328
361 /* BSS */ 329 /* BSS */
@@ -393,8 +361,8 @@ SECTIONS
393 361
394 362
395#ifdef CONFIG_X86_32 363#ifdef CONFIG_X86_32
396ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), 364. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
397 "kernel image bigger than KERNEL_IMAGE_SIZE") 365 "kernel image bigger than KERNEL_IMAGE_SIZE");
398#else 366#else
399/* 367/*
400 * Per-cpu symbols which need to be offset from __per_cpu_load 368 * Per-cpu symbols which need to be offset from __per_cpu_load
@@ -407,12 +375,12 @@ INIT_PER_CPU(irq_stack_union);
407/* 375/*
408 * Build-time check on the image size: 376 * Build-time check on the image size:
409 */ 377 */
410ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), 378. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
411 "kernel image bigger than KERNEL_IMAGE_SIZE") 379 "kernel image bigger than KERNEL_IMAGE_SIZE");
412 380
413#ifdef CONFIG_SMP 381#ifdef CONFIG_SMP
414ASSERT((per_cpu__irq_stack_union == 0), 382. = ASSERT((per_cpu__irq_stack_union == 0),
415 "irq_stack_union is not at start of per-cpu area"); 383 "irq_stack_union is not at start of per-cpu area");
416#endif 384#endif
417 385
418#endif /* CONFIG_X86_32 */ 386#endif /* CONFIG_X86_32 */
@@ -420,7 +388,7 @@ ASSERT((per_cpu__irq_stack_union == 0),
420#ifdef CONFIG_KEXEC 388#ifdef CONFIG_KEXEC
421#include <asm/kexec.h> 389#include <asm/kexec.h>
422 390
423ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, 391. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
424 "kexec control code size is too big") 392 "kexec control code size is too big");
425#endif 393#endif
426 394
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4d6f0d293ee2..21f68e00524f 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -104,6 +104,9 @@ static s64 __kpit_elapsed(struct kvm *kvm)
104 ktime_t remaining; 104 ktime_t remaining;
105 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; 105 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
106 106
107 if (!ps->pit_timer.period)
108 return 0;
109
107 /* 110 /*
108 * The Counter does not stop when it reaches zero. In 111 * The Counter does not stop when it reaches zero. In
109 * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to 112 * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7030b5f911bf..0ef5bb2b4043 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -489,16 +489,20 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
489 * 489 *
490 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc 490 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
491 * containing more mappings. 491 * containing more mappings.
492 *
493 * Returns the number of rmap entries before the spte was added or zero if
494 * the spte was not added.
495 *
492 */ 496 */
493static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) 497static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
494{ 498{
495 struct kvm_mmu_page *sp; 499 struct kvm_mmu_page *sp;
496 struct kvm_rmap_desc *desc; 500 struct kvm_rmap_desc *desc;
497 unsigned long *rmapp; 501 unsigned long *rmapp;
498 int i; 502 int i, count = 0;
499 503
500 if (!is_rmap_pte(*spte)) 504 if (!is_rmap_pte(*spte))
501 return; 505 return count;
502 gfn = unalias_gfn(vcpu->kvm, gfn); 506 gfn = unalias_gfn(vcpu->kvm, gfn);
503 sp = page_header(__pa(spte)); 507 sp = page_header(__pa(spte));
504 sp->gfns[spte - sp->spt] = gfn; 508 sp->gfns[spte - sp->spt] = gfn;
@@ -515,8 +519,10 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
515 } else { 519 } else {
516 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); 520 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
517 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); 521 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
518 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) 522 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) {
519 desc = desc->more; 523 desc = desc->more;
524 count += RMAP_EXT;
525 }
520 if (desc->shadow_ptes[RMAP_EXT-1]) { 526 if (desc->shadow_ptes[RMAP_EXT-1]) {
521 desc->more = mmu_alloc_rmap_desc(vcpu); 527 desc->more = mmu_alloc_rmap_desc(vcpu);
522 desc = desc->more; 528 desc = desc->more;
@@ -525,6 +531,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
525 ; 531 ;
526 desc->shadow_ptes[i] = spte; 532 desc->shadow_ptes[i] = spte;
527 } 533 }
534 return count;
528} 535}
529 536
530static void rmap_desc_remove_entry(unsigned long *rmapp, 537static void rmap_desc_remove_entry(unsigned long *rmapp,
@@ -754,6 +761,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
754 return young; 761 return young;
755} 762}
756 763
764#define RMAP_RECYCLE_THRESHOLD 1000
765
766static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage)
767{
768 unsigned long *rmapp;
769
770 gfn = unalias_gfn(vcpu->kvm, gfn);
771 rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
772
773 kvm_unmap_rmapp(vcpu->kvm, rmapp);
774 kvm_flush_remote_tlbs(vcpu->kvm);
775}
776
757int kvm_age_hva(struct kvm *kvm, unsigned long hva) 777int kvm_age_hva(struct kvm *kvm, unsigned long hva)
758{ 778{
759 return kvm_handle_hva(kvm, hva, kvm_age_rmapp); 779 return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
@@ -1407,24 +1427,25 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1407 */ 1427 */
1408void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) 1428void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
1409{ 1429{
1430 int used_pages;
1431
1432 used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages;
1433 used_pages = max(0, used_pages);
1434
1410 /* 1435 /*
1411 * If we set the number of mmu pages to be smaller be than the 1436 * If we set the number of mmu pages to be smaller be than the
1412 * number of actived pages , we must to free some mmu pages before we 1437 * number of actived pages , we must to free some mmu pages before we
1413 * change the value 1438 * change the value
1414 */ 1439 */
1415 1440
1416 if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) > 1441 if (used_pages > kvm_nr_mmu_pages) {
1417 kvm_nr_mmu_pages) { 1442 while (used_pages > kvm_nr_mmu_pages) {
1418 int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages
1419 - kvm->arch.n_free_mmu_pages;
1420
1421 while (n_used_mmu_pages > kvm_nr_mmu_pages) {
1422 struct kvm_mmu_page *page; 1443 struct kvm_mmu_page *page;
1423 1444
1424 page = container_of(kvm->arch.active_mmu_pages.prev, 1445 page = container_of(kvm->arch.active_mmu_pages.prev,
1425 struct kvm_mmu_page, link); 1446 struct kvm_mmu_page, link);
1426 kvm_mmu_zap_page(kvm, page); 1447 kvm_mmu_zap_page(kvm, page);
1427 n_used_mmu_pages--; 1448 used_pages--;
1428 } 1449 }
1429 kvm->arch.n_free_mmu_pages = 0; 1450 kvm->arch.n_free_mmu_pages = 0;
1430 } 1451 }
@@ -1740,6 +1761,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1740{ 1761{
1741 int was_rmapped = 0; 1762 int was_rmapped = 0;
1742 int was_writeble = is_writeble_pte(*shadow_pte); 1763 int was_writeble = is_writeble_pte(*shadow_pte);
1764 int rmap_count;
1743 1765
1744 pgprintk("%s: spte %llx access %x write_fault %d" 1766 pgprintk("%s: spte %llx access %x write_fault %d"
1745 " user_fault %d gfn %lx\n", 1767 " user_fault %d gfn %lx\n",
@@ -1781,9 +1803,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1781 1803
1782 page_header_update_slot(vcpu->kvm, shadow_pte, gfn); 1804 page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
1783 if (!was_rmapped) { 1805 if (!was_rmapped) {
1784 rmap_add(vcpu, shadow_pte, gfn, largepage); 1806 rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage);
1785 if (!is_rmap_pte(*shadow_pte)) 1807 if (!is_rmap_pte(*shadow_pte))
1786 kvm_release_pfn_clean(pfn); 1808 kvm_release_pfn_clean(pfn);
1809 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
1810 rmap_recycle(vcpu, gfn, largepage);
1787 } else { 1811 } else {
1788 if (was_writeble) 1812 if (was_writeble)
1789 kvm_release_pfn_dirty(pfn); 1813 kvm_release_pfn_dirty(pfn);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 71510e07e69e..b1f658ad2f06 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -711,6 +711,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
711 svm->vmcb->control.tsc_offset += delta; 711 svm->vmcb->control.tsc_offset += delta;
712 vcpu->cpu = cpu; 712 vcpu->cpu = cpu;
713 kvm_migrate_timers(vcpu); 713 kvm_migrate_timers(vcpu);
714 svm->asid_generation = 0;
714 } 715 }
715 716
716 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 717 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
@@ -1031,7 +1032,6 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
1031 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; 1032 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1032 } 1033 }
1033 1034
1034 svm->vcpu.cpu = svm_data->cpu;
1035 svm->asid_generation = svm_data->asid_generation; 1035 svm->asid_generation = svm_data->asid_generation;
1036 svm->vmcb->control.asid = svm_data->next_asid++; 1036 svm->vmcb->control.asid = svm_data->next_asid++;
1037} 1037}
@@ -2300,8 +2300,8 @@ static void pre_svm_run(struct vcpu_svm *svm)
2300 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); 2300 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
2301 2301
2302 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 2302 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
2303 if (svm->vcpu.cpu != cpu || 2303 /* FIXME: handle wraparound of asid_generation */
2304 svm->asid_generation != svm_data->asid_generation) 2304 if (svm->asid_generation != svm_data->asid_generation)
2305 new_asid(svm, svm_data); 2305 new_asid(svm, svm_data);
2306} 2306}
2307 2307
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 356a0ce85c68..29f912927a58 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3157,8 +3157,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3157 struct vcpu_vmx *vmx = to_vmx(vcpu); 3157 struct vcpu_vmx *vmx = to_vmx(vcpu);
3158 enum emulation_result err = EMULATE_DONE; 3158 enum emulation_result err = EMULATE_DONE;
3159 3159
3160 preempt_enable();
3161 local_irq_enable(); 3160 local_irq_enable();
3161 preempt_enable();
3162 3162
3163 while (!guest_state_valid(vcpu)) { 3163 while (!guest_state_valid(vcpu)) {
3164 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 3164 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
@@ -3168,7 +3168,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3168 3168
3169 if (err != EMULATE_DONE) { 3169 if (err != EMULATE_DONE) {
3170 kvm_report_emulation_failure(vcpu, "emulation failure"); 3170 kvm_report_emulation_failure(vcpu, "emulation failure");
3171 return; 3171 break;
3172 } 3172 }
3173 3173
3174 if (signal_pending(current)) 3174 if (signal_pending(current))
@@ -3177,8 +3177,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3177 schedule(); 3177 schedule();
3178 } 3178 }
3179 3179
3180 local_irq_disable();
3181 preempt_disable(); 3180 preempt_disable();
3181 local_irq_disable();
3182 3182
3183 vmx->invalid_state_emulation_result = err; 3183 vmx->invalid_state_emulation_result = err;
3184} 3184}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fe5474aec41a..3d4529011828 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -704,11 +704,48 @@ static bool msr_mtrr_valid(unsigned msr)
704 return false; 704 return false;
705} 705}
706 706
707static bool valid_pat_type(unsigned t)
708{
709 return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
710}
711
712static bool valid_mtrr_type(unsigned t)
713{
714 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
715}
716
717static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
718{
719 int i;
720
721 if (!msr_mtrr_valid(msr))
722 return false;
723
724 if (msr == MSR_IA32_CR_PAT) {
725 for (i = 0; i < 8; i++)
726 if (!valid_pat_type((data >> (i * 8)) & 0xff))
727 return false;
728 return true;
729 } else if (msr == MSR_MTRRdefType) {
730 if (data & ~0xcff)
731 return false;
732 return valid_mtrr_type(data & 0xff);
733 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
734 for (i = 0; i < 8 ; i++)
735 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
736 return false;
737 return true;
738 }
739
740 /* variable MTRRs */
741 return valid_mtrr_type(data & 0xff);
742}
743
707static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 744static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
708{ 745{
709 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; 746 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
710 747
711 if (!msr_mtrr_valid(msr)) 748 if (!mtrr_valid(vcpu, msr, data))
712 return 1; 749 return 1;
713 750
714 if (msr == MSR_MTRRdefType) { 751 if (msr == MSR_MTRRdefType) {
@@ -1079,14 +1116,13 @@ long kvm_arch_dev_ioctl(struct file *filp,
1079 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) 1116 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
1080 goto out; 1117 goto out;
1081 r = -E2BIG; 1118 r = -E2BIG;
1082 if (n < num_msrs_to_save) 1119 if (n < msr_list.nmsrs)
1083 goto out; 1120 goto out;
1084 r = -EFAULT; 1121 r = -EFAULT;
1085 if (copy_to_user(user_msr_list->indices, &msrs_to_save, 1122 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
1086 num_msrs_to_save * sizeof(u32))) 1123 num_msrs_to_save * sizeof(u32)))
1087 goto out; 1124 goto out;
1088 if (copy_to_user(user_msr_list->indices 1125 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
1089 + num_msrs_to_save * sizeof(u32),
1090 &emulated_msrs, 1126 &emulated_msrs,
1091 ARRAY_SIZE(emulated_msrs) * sizeof(u32))) 1127 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
1092 goto out; 1128 goto out;
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 1440b9c0547e..caa24aca8115 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -89,16 +89,13 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs)
89 rv.msrs = msrs; 89 rv.msrs = msrs;
90 rv.msr_no = msr_no; 90 rv.msr_no = msr_no;
91 91
92 preempt_disable(); 92 this_cpu = get_cpu();
93 /* 93
94 * FIXME: handle the CPU we're executing on separately for now until 94 if (cpumask_test_cpu(this_cpu, mask))
95 * smp_call_function_many has been fixed to not skip it. 95 __rdmsr_on_cpu(&rv);
96 */
97 this_cpu = raw_smp_processor_id();
98 smp_call_function_single(this_cpu, __rdmsr_on_cpu, &rv, 1);
99 96
100 smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1); 97 smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1);
101 preempt_enable(); 98 put_cpu();
102} 99}
103EXPORT_SYMBOL(rdmsr_on_cpus); 100EXPORT_SYMBOL(rdmsr_on_cpus);
104 101
@@ -121,16 +118,13 @@ void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs)
121 rv.msrs = msrs; 118 rv.msrs = msrs;
122 rv.msr_no = msr_no; 119 rv.msr_no = msr_no;
123 120
124 preempt_disable(); 121 this_cpu = get_cpu();
125 /* 122
126 * FIXME: handle the CPU we're executing on separately for now until 123 if (cpumask_test_cpu(this_cpu, mask))
127 * smp_call_function_many has been fixed to not skip it. 124 __wrmsr_on_cpu(&rv);
128 */
129 this_cpu = raw_smp_processor_id();
130 smp_call_function_single(this_cpu, __wrmsr_on_cpu, &rv, 1);
131 125
132 smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1); 126 smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1);
133 preempt_enable(); 127 put_cpu();
134} 128}
135EXPORT_SYMBOL(wrmsr_on_cpus); 129EXPORT_SYMBOL(wrmsr_on_cpus);
136 130
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 6176fe8f29e0..ea56b8cbb6a6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -796,7 +796,7 @@ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
796 return ret; 796 return ret;
797 797
798#else 798#else
799 reserve_bootmem(phys, len, BOOTMEM_DEFAULT); 799 reserve_bootmem(phys, len, flags);
800#endif 800#endif
801 801
802 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { 802 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1b734d7a8966..7e600c1962db 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -591,9 +591,12 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
591 unsigned int level; 591 unsigned int level;
592 pte_t *kpte, old_pte; 592 pte_t *kpte, old_pte;
593 593
594 if (cpa->flags & CPA_PAGES_ARRAY) 594 if (cpa->flags & CPA_PAGES_ARRAY) {
595 address = (unsigned long)page_address(cpa->pages[cpa->curpage]); 595 struct page *page = cpa->pages[cpa->curpage];
596 else if (cpa->flags & CPA_ARRAY) 596 if (unlikely(PageHighMem(page)))
597 return 0;
598 address = (unsigned long)page_address(page);
599 } else if (cpa->flags & CPA_ARRAY)
597 address = cpa->vaddr[cpa->curpage]; 600 address = cpa->vaddr[cpa->curpage];
598 else 601 else
599 address = *cpa->vaddr; 602 address = *cpa->vaddr;
@@ -697,9 +700,12 @@ static int cpa_process_alias(struct cpa_data *cpa)
697 * No need to redo, when the primary call touched the direct 700 * No need to redo, when the primary call touched the direct
698 * mapping already: 701 * mapping already:
699 */ 702 */
700 if (cpa->flags & CPA_PAGES_ARRAY) 703 if (cpa->flags & CPA_PAGES_ARRAY) {
701 vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]); 704 struct page *page = cpa->pages[cpa->curpage];
702 else if (cpa->flags & CPA_ARRAY) 705 if (unlikely(PageHighMem(page)))
706 return 0;
707 vaddr = (unsigned long)page_address(page);
708 } else if (cpa->flags & CPA_ARRAY)
703 vaddr = cpa->vaddr[cpa->curpage]; 709 vaddr = cpa->vaddr[cpa->curpage];
704 else 710 else
705 vaddr = *cpa->vaddr; 711 vaddr = *cpa->vaddr;
@@ -997,12 +1003,15 @@ EXPORT_SYMBOL(set_memory_array_uc);
997int _set_memory_wc(unsigned long addr, int numpages) 1003int _set_memory_wc(unsigned long addr, int numpages)
998{ 1004{
999 int ret; 1005 int ret;
1006 unsigned long addr_copy = addr;
1007
1000 ret = change_page_attr_set(&addr, numpages, 1008 ret = change_page_attr_set(&addr, numpages,
1001 __pgprot(_PAGE_CACHE_UC_MINUS), 0); 1009 __pgprot(_PAGE_CACHE_UC_MINUS), 0);
1002
1003 if (!ret) { 1010 if (!ret) {
1004 ret = change_page_attr_set(&addr, numpages, 1011 ret = change_page_attr_set_clr(&addr_copy, numpages,
1005 __pgprot(_PAGE_CACHE_WC), 0); 1012 __pgprot(_PAGE_CACHE_WC),
1013 __pgprot(_PAGE_CACHE_MASK),
1014 0, 0, NULL);
1006 } 1015 }
1007 return ret; 1016 return ret;
1008} 1017}
@@ -1119,7 +1128,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray)
1119 int free_idx; 1128 int free_idx;
1120 1129
1121 for (i = 0; i < addrinarray; i++) { 1130 for (i = 0; i < addrinarray; i++) {
1122 start = (unsigned long)page_address(pages[i]); 1131 if (PageHighMem(pages[i]))
1132 continue;
1133 start = page_to_pfn(pages[i]) << PAGE_SHIFT;
1123 end = start + PAGE_SIZE; 1134 end = start + PAGE_SIZE;
1124 if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) 1135 if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
1125 goto err_out; 1136 goto err_out;
@@ -1132,7 +1143,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray)
1132err_out: 1143err_out:
1133 free_idx = i; 1144 free_idx = i;
1134 for (i = 0; i < free_idx; i++) { 1145 for (i = 0; i < free_idx; i++) {
1135 start = (unsigned long)page_address(pages[i]); 1146 if (PageHighMem(pages[i]))
1147 continue;
1148 start = page_to_pfn(pages[i]) << PAGE_SHIFT;
1136 end = start + PAGE_SIZE; 1149 end = start + PAGE_SIZE;
1137 free_memtype(start, end); 1150 free_memtype(start, end);
1138 } 1151 }
@@ -1161,7 +1174,9 @@ int set_pages_array_wb(struct page **pages, int addrinarray)
1161 return retval; 1174 return retval;
1162 1175
1163 for (i = 0; i < addrinarray; i++) { 1176 for (i = 0; i < addrinarray; i++) {
1164 start = (unsigned long)page_address(pages[i]); 1177 if (PageHighMem(pages[i]))
1178 continue;
1179 start = page_to_pfn(pages[i]) << PAGE_SHIFT;
1165 end = start + PAGE_SIZE; 1180 end = start + PAGE_SIZE;
1166 free_memtype(start, end); 1181 free_memtype(start, end);
1167 } 1182 }
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index e6718bb28065..352aa9e927e2 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -623,7 +623,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
623 return ret; 623 return ret;
624 624
625 if (flags != want_flags) { 625 if (flags != want_flags) {
626 if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) { 626 if (strict_prot ||
627 !is_new_memtype_allowed(paddr, size, want_flags, flags)) {
627 free_memtype(paddr, paddr + size); 628 free_memtype(paddr, paddr + size);
628 printk(KERN_ERR "%s:%d map pfn expected mapping type %s" 629 printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
629 " for %Lx-%Lx, got %s\n", 630 " for %Lx-%Lx, got %s\n",
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index af8f9650058c..ed34f5e35999 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -329,7 +329,6 @@ void __init reserve_top_address(unsigned long reserve)
329 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", 329 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
330 (int)-reserve); 330 (int)-reserve);
331 __FIXADDR_TOP = -reserve - PAGE_SIZE; 331 __FIXADDR_TOP = -reserve - PAGE_SIZE;
332 __VMALLOC_RESERVE += reserve;
333#endif 332#endif
334} 333}
335 334
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 821e97017e95..c814e144a3f0 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -183,18 +183,17 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
183 183
184 f->flush_mm = mm; 184 f->flush_mm = mm;
185 f->flush_va = va; 185 f->flush_va = va;
186 cpumask_andnot(to_cpumask(f->flush_cpumask), 186 if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) {
187 cpumask, cpumask_of(smp_processor_id())); 187 /*
188 188 * We have to send the IPI only to
189 /* 189 * CPUs affected.
190 * We have to send the IPI only to 190 */
191 * CPUs affected. 191 apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
192 */ 192 INVALIDATE_TLB_VECTOR_START + sender);
193 apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
194 INVALIDATE_TLB_VECTOR_START + sender);
195 193
196 while (!cpumask_empty(to_cpumask(f->flush_cpumask))) 194 while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
197 cpu_relax(); 195 cpu_relax();
196 }
198 197
199 f->flush_mm = NULL; 198 f->flush_mm = NULL;
200 f->flush_va = 0; 199 f->flush_va = 0;
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 172438f86a02..7410640db173 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -5,6 +5,10 @@ CFLAGS_REMOVE_time.o = -pg
5CFLAGS_REMOVE_irq.o = -pg 5CFLAGS_REMOVE_irq.o = -pg
6endif 6endif
7 7
8# Make sure early boot has no stackprotector
9nostackp := $(call cc-option, -fno-stack-protector)
10CFLAGS_enlighten.o := $(nostackp)
11
8obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ 12obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
9 time.o xen-asm.o xen-asm_$(BITS).o \ 13 time.o xen-asm.o xen-asm_$(BITS).o \
10 grant-table.o suspend.o 14 grant-table.o suspend.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0a1700a2be9c..eb33aaa8415d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -215,6 +215,7 @@ static __init void xen_init_cpuid_mask(void)
215 (1 << X86_FEATURE_ACPI)); /* disable ACPI */ 215 (1 << X86_FEATURE_ACPI)); /* disable ACPI */
216 216
217 ax = 1; 217 ax = 1;
218 cx = 0;
218 xen_cpuid(&ax, &bx, &cx, &dx); 219 xen_cpuid(&ax, &bx, &cx, &dx);
219 220
220 /* cpuid claims we support xsave; try enabling it to see what happens */ 221 /* cpuid claims we support xsave; try enabling it to see what happens */
@@ -974,10 +975,6 @@ asmlinkage void __init xen_start_kernel(void)
974 975
975 xen_domain_type = XEN_PV_DOMAIN; 976 xen_domain_type = XEN_PV_DOMAIN;
976 977
977 BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
978
979 xen_setup_features();
980
981 /* Install Xen paravirt ops */ 978 /* Install Xen paravirt ops */
982 pv_info = xen_info; 979 pv_info = xen_info;
983 pv_init_ops = xen_init_ops; 980 pv_init_ops = xen_init_ops;
@@ -986,8 +983,15 @@ asmlinkage void __init xen_start_kernel(void)
986 pv_apic_ops = xen_apic_ops; 983 pv_apic_ops = xen_apic_ops;
987 pv_mmu_ops = xen_mmu_ops; 984 pv_mmu_ops = xen_mmu_ops;
988 985
989 xen_init_irq_ops(); 986#ifdef CONFIG_X86_64
987 /*
988 * Setup percpu state. We only need to do this for 64-bit
989 * because 32-bit already has %fs set properly.
990 */
991 load_percpu_segment(0);
992#endif
990 993
994 xen_init_irq_ops();
991 xen_init_cpuid_mask(); 995 xen_init_cpuid_mask();
992 996
993#ifdef CONFIG_X86_LOCAL_APIC 997#ifdef CONFIG_X86_LOCAL_APIC
@@ -997,6 +1001,8 @@ asmlinkage void __init xen_start_kernel(void)
997 set_xen_basic_apic_ops(); 1001 set_xen_basic_apic_ops();
998#endif 1002#endif
999 1003
1004 xen_setup_features();
1005
1000 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { 1006 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1001 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; 1007 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1002 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; 1008 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
@@ -1004,13 +1010,6 @@ asmlinkage void __init xen_start_kernel(void)
1004 1010
1005 machine_ops = xen_machine_ops; 1011 machine_ops = xen_machine_ops;
1006 1012
1007#ifdef CONFIG_X86_64
1008 /*
1009 * Setup percpu state. We only need to do this for 64-bit
1010 * because 32-bit already has %fs set properly.
1011 */
1012 load_percpu_segment(0);
1013#endif
1014 /* 1013 /*
1015 * The only reliable way to retain the initial address of the 1014 * The only reliable way to retain the initial address of the
1016 * percpu gdt_page is to remember it here, so we can go and 1015 * percpu gdt_page is to remember it here, so we can go and
@@ -1061,6 +1060,7 @@ asmlinkage void __init xen_start_kernel(void)
1061 /* set up basic CPUID stuff */ 1060 /* set up basic CPUID stuff */
1062 cpu_detect(&new_cpu_data); 1061 cpu_detect(&new_cpu_data);
1063 new_cpu_data.hard_math = 1; 1062 new_cpu_data.hard_math = 1;
1063 new_cpu_data.wp_works_ok = 1;
1064 new_cpu_data.x86_capability[0] = cpuid_edx(1); 1064 new_cpu_data.x86_capability[0] = cpuid_edx(1);
1065#endif 1065#endif
1066 1066