aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-09-02 02:17:56 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-02 02:17:56 -0400
commit936e894a976dd3b0f07f1f6f43c17b77b7e6146d (patch)
tree5ed5c1f6735dcd26550594df23c8f7fe2aa21a15 /arch/x86/kernel
parent69575d388603365f2afbf4166df93152df59b165 (diff)
parent326ba5010a5429a5a528b268b36a5900d4ab0eba (diff)
Merge commit 'v2.6.31-rc8' into x86/txt
Conflicts: arch/x86/kernel/reboot.c security/Kconfig Merge reason: resolve the conflicts, bump up from rc3 to rc8. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/apic/io_apic.c3
-rw-r--r--arch/x86/kernel/apic/ipi.c3
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c42
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/amd.c9
-rw-r--r--arch/x86/kernel/cpu/common.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c25
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c23
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c281
-rw-r--r--arch/x86/kernel/efi.c4
-rw-r--r--arch/x86/kernel/efi_64.c6
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/irqinit.c2
-rw-r--r--arch/x86/kernel/mfgpt_32.c2
-rw-r--r--arch/x86/kernel/process.c6
-rw-r--r--arch/x86/kernel/reboot.c50
-rw-r--r--arch/x86/kernel/setup.c13
-rw-r--r--arch/x86/kernel/setup_percpu.c14
-rw-r--r--arch/x86/kernel/tlb_uv.c1
-rw-r--r--arch/x86/kernel/tsc.c29
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S147
25 files changed, 516 insertions, 226 deletions
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 2284a4812b68..d2ed6c5ddc80 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3793,6 +3793,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3793 mmr_pnode = uv_blade_to_pnode(mmr_blade); 3793 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3794 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 3794 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
3795 3795
3796 if (cfg->move_in_progress)
3797 send_cleanup_vector(cfg);
3798
3796 return irq; 3799 return irq;
3797} 3800}
3798 3801
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index dbf5445727a9..6ef00ba4c886 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -106,6 +106,9 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
106 unsigned long mask = cpumask_bits(cpumask)[0]; 106 unsigned long mask = cpumask_bits(cpumask)[0];
107 unsigned long flags; 107 unsigned long flags;
108 108
109 if (WARN_ONCE(!mask, "empty IPI mask"))
110 return;
111
109 local_irq_save(flags); 112 local_irq_save(flags);
110 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); 113 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
111 __default_send_IPI_dest_field(mask, vector, apic->dest_logical); 114 __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 8e4cbb255c38..a5371ec36776 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
17 return x2apic_enabled(); 17 return x2apic_enabled();
18} 18}
19 19
20/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 20/*
21 21 * need to use more than cpu 0, because we need more vectors when
22 * MSI-X are used.
23 */
22static const struct cpumask *x2apic_target_cpus(void) 24static const struct cpumask *x2apic_target_cpus(void)
23{ 25{
24 return cpumask_of(0); 26 return cpu_online_mask;
25} 27}
26 28
27/* 29/*
@@ -170,7 +172,7 @@ static unsigned long set_apic_id(unsigned int id)
170 172
171static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) 173static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb)
172{ 174{
173 return current_cpu_data.initial_apicid >> index_msb; 175 return initial_apicid >> index_msb;
174} 176}
175 177
176static void x2apic_send_IPI_self(int vector) 178static void x2apic_send_IPI_self(int vector)
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index a284359627e7..a8989aadc99a 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
27 return 0; 27 return 0;
28} 28}
29 29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 30/*
31 31 * need to use more than cpu 0, because we need more vectors when
32 * MSI-X are used.
33 */
32static const struct cpumask *x2apic_target_cpus(void) 34static const struct cpumask *x2apic_target_cpus(void)
33{ 35{
34 return cpumask_of(0); 36 return cpu_online_mask;
35} 37}
36 38
37static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) 39static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
@@ -162,7 +164,7 @@ static unsigned long set_apic_id(unsigned int id)
162 164
163static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) 165static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
164{ 166{
165 return current_cpu_data.initial_apicid >> index_msb; 167 return initial_apicid >> index_msb;
166} 168}
167 169
168static void x2apic_send_IPI_self(int vector) 170static void x2apic_send_IPI_self(int vector)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 096d19aea2f7..601159374e87 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -46,7 +46,7 @@ static int early_get_nodeid(void)
46 return node_id.s.node_id; 46 return node_id.s.node_id;
47} 47}
48 48
49static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 49static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
50{ 50{
51 if (!strcmp(oem_id, "SGI")) { 51 if (!strcmp(oem_id, "SGI")) {
52 if (!strcmp(oem_table_id, "UVL")) 52 if (!strcmp(oem_table_id, "UVL"))
@@ -253,7 +253,7 @@ static void uv_send_IPI_self(int vector)
253 apic_write(APIC_SELF_IPI, vector); 253 apic_write(APIC_SELF_IPI, vector);
254} 254}
255 255
256struct apic apic_x2apic_uv_x = { 256struct apic __refdata apic_x2apic_uv_x = {
257 257
258 .name = "UV large system", 258 .name = "UV large system",
259 .probe = NULL, 259 .probe = NULL,
@@ -261,7 +261,7 @@ struct apic apic_x2apic_uv_x = {
261 .apic_id_registered = uv_apic_id_registered, 261 .apic_id_registered = uv_apic_id_registered,
262 262
263 .irq_delivery_mode = dest_Fixed, 263 .irq_delivery_mode = dest_Fixed,
264 .irq_dest_mode = 1, /* logical */ 264 .irq_dest_mode = 0, /* physical */
265 265
266 .target_cpus = uv_target_cpus, 266 .target_cpus = uv_target_cpus,
267 .disable_esr = 0, 267 .disable_esr = 0,
@@ -362,12 +362,6 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
362 BUG(); 362 BUG();
363} 363}
364 364
365static __init void map_low_mmrs(void)
366{
367 init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
368 init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
369}
370
371enum map_type {map_wb, map_uc}; 365enum map_type {map_wb, map_uc};
372 366
373static __init void map_high(char *id, unsigned long base, int shift, 367static __init void map_high(char *id, unsigned long base, int shift,
@@ -395,26 +389,6 @@ static __init void map_gru_high(int max_pnode)
395 map_high("GRU", gru.s.base, shift, max_pnode, map_wb); 389 map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
396} 390}
397 391
398static __init void map_config_high(int max_pnode)
399{
400 union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
401 int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
402
403 cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
404 if (cfg.s.enable)
405 map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc);
406}
407
408static __init void map_mmr_high(int max_pnode)
409{
410 union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
411 int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
412
413 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
414 if (mmr.s.enable)
415 map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
416}
417
418static __init void map_mmioh_high(int max_pnode) 392static __init void map_mmioh_high(int max_pnode)
419{ 393{
420 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; 394 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
@@ -566,8 +540,6 @@ void __init uv_system_init(void)
566 unsigned long mmr_base, present, paddr; 540 unsigned long mmr_base, present, paddr;
567 unsigned short pnode_mask; 541 unsigned short pnode_mask;
568 542
569 map_low_mmrs();
570
571 m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); 543 m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
572 m_val = m_n_config.s.m_skt; 544 m_val = m_n_config.s.m_skt;
573 n_val = m_n_config.s.n_skt; 545 n_val = m_n_config.s.n_skt;
@@ -591,6 +563,8 @@ void __init uv_system_init(void)
591 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); 563 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
592 uv_blade_info = kmalloc(bytes, GFP_KERNEL); 564 uv_blade_info = kmalloc(bytes, GFP_KERNEL);
593 BUG_ON(!uv_blade_info); 565 BUG_ON(!uv_blade_info);
566 for (blade = 0; blade < uv_num_possible_blades(); blade++)
567 uv_blade_info[blade].memory_nid = -1;
594 568
595 get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); 569 get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
596 570
@@ -629,6 +603,9 @@ void __init uv_system_init(void)
629 lcpu = uv_blade_info[blade].nr_possible_cpus; 603 lcpu = uv_blade_info[blade].nr_possible_cpus;
630 uv_blade_info[blade].nr_possible_cpus++; 604 uv_blade_info[blade].nr_possible_cpus++;
631 605
606 /* Any node on the blade, else will contain -1. */
607 uv_blade_info[blade].memory_nid = nid;
608
632 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; 609 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
633 uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; 610 uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
634 uv_cpu_hub_info(cpu)->m_val = m_val; 611 uv_cpu_hub_info(cpu)->m_val = m_val;
@@ -662,11 +639,10 @@ void __init uv_system_init(void)
662 pnode = (paddr >> m_val) & pnode_mask; 639 pnode = (paddr >> m_val) & pnode_mask;
663 blade = boot_pnode_to_blade(pnode); 640 blade = boot_pnode_to_blade(pnode);
664 uv_node_to_blade[nid] = blade; 641 uv_node_to_blade[nid] = blade;
642 max_pnode = max(pnode, max_pnode);
665 } 643 }
666 644
667 map_gru_high(max_pnode); 645 map_gru_high(max_pnode);
668 map_mmr_high(max_pnode);
669 map_config_high(max_pnode);
670 map_mmioh_high(max_pnode); 646 map_mmioh_high(max_pnode);
671 647
672 uv_cpu_init(); 648 uv_cpu_init();
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 79302e9a33a4..442b5508893f 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -811,7 +811,7 @@ static int apm_do_idle(void)
811 u8 ret = 0; 811 u8 ret = 0;
812 int idled = 0; 812 int idled = 0;
813 int polling; 813 int polling;
814 int err; 814 int err = 0;
815 815
816 polling = !!(current_thread_info()->status & TS_POLLING); 816 polling = !!(current_thread_info()->status & TS_POLLING);
817 if (polling) { 817 if (polling) {
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 3efcb2b96a15..c1f253dac155 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -7,6 +7,10 @@ ifdef CONFIG_FUNCTION_TRACER
7CFLAGS_REMOVE_common.o = -pg 7CFLAGS_REMOVE_common.o = -pg
8endif 8endif
9 9
10# Make sure load_percpu_segment has no stackprotector
11nostackp := $(call cc-option, -fno-stack-protector)
12CFLAGS_common.o := $(nostackp)
13
10obj-y := intel_cacheinfo.o addon_cpuid_features.o 14obj-y := intel_cacheinfo.o addon_cpuid_features.o
11obj-y += proc.o capflags.o powerflags.o common.o 15obj-y += proc.o capflags.o powerflags.o common.o
12obj-y += vmware.o hypervisor.o 16obj-y += vmware.o hypervisor.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 28e5f5956042..63fddcd082cd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -356,7 +356,7 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
356#endif 356#endif
357#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) 357#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
358 /* check CPU config space for extended APIC ID */ 358 /* check CPU config space for extended APIC ID */
359 if (c->x86 >= 0xf) { 359 if (cpu_has_apic && c->x86 >= 0xf) {
360 unsigned int val; 360 unsigned int val;
361 val = read_pci_config(0, 24, 0, 0x68); 361 val = read_pci_config(0, 24, 0, 0x68);
362 if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) 362 if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
@@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
400 level = cpuid_eax(1); 400 level = cpuid_eax(1);
401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) 401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
402 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 402 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
403
404 /*
405 * Some BIOSes incorrectly force this feature, but only K8
406 * revision D (model = 0x14) and later actually support it.
407 */
408 if (c->x86_model < 0x14)
409 clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
403 } 410 }
404 if (c->x86 == 0x10 || c->x86 == 0x11) 411 if (c->x86 == 0x10 || c->x86 == 0x11)
405 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 412 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1961c07af9a..5ce60a88027b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void)
59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
60} 60}
61 61
62static const struct cpu_dev *this_cpu __cpuinitdata; 62static void __cpuinit default_init(struct cpuinfo_x86 *c)
63{
64#ifdef CONFIG_X86_64
65 display_cacheinfo(c);
66#else
67 /* Not much we can do here... */
68 /* Check if at least it has cpuid */
69 if (c->cpuid_level == -1) {
70 /* No cpuid. It must be an ancient CPU */
71 if (c->x86 == 4)
72 strcpy(c->x86_model_id, "486");
73 else if (c->x86 == 3)
74 strcpy(c->x86_model_id, "386");
75 }
76#endif
77}
78
79static const struct cpu_dev __cpuinitconst default_cpu = {
80 .c_init = default_init,
81 .c_vendor = "Unknown",
82 .c_x86_vendor = X86_VENDOR_UNKNOWN,
83};
84
85static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
63 86
64DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { 87DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
65#ifdef CONFIG_X86_64 88#ifdef CONFIG_X86_64
@@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu)
332 355
333static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; 356static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
334 357
335static void __cpuinit default_init(struct cpuinfo_x86 *c)
336{
337#ifdef CONFIG_X86_64
338 display_cacheinfo(c);
339#else
340 /* Not much we can do here... */
341 /* Check if at least it has cpuid */
342 if (c->cpuid_level == -1) {
343 /* No cpuid. It must be an ancient CPU */
344 if (c->x86 == 4)
345 strcpy(c->x86_model_id, "486");
346 else if (c->x86 == 3)
347 strcpy(c->x86_model_id, "386");
348 }
349#endif
350}
351
352static const struct cpu_dev __cpuinitconst default_cpu = {
353 .c_init = default_init,
354 .c_vendor = "Unknown",
355 .c_x86_vendor = X86_VENDOR_UNKNOWN,
356};
357
358static void __cpuinit get_model_name(struct cpuinfo_x86 *c) 358static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
359{ 359{
360 unsigned int *v; 360 unsigned int *v;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 484c1e5f658e..01213048f62f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1226,8 +1226,13 @@ static void mce_init(void)
1226} 1226}
1227 1227
1228/* Add per CPU specific workarounds here */ 1228/* Add per CPU specific workarounds here */
1229static void mce_cpu_quirks(struct cpuinfo_x86 *c) 1229static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1230{ 1230{
1231 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1232 pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
1233 return -EOPNOTSUPP;
1234 }
1235
1231 /* This should be disabled by the BIOS, but isn't always */ 1236 /* This should be disabled by the BIOS, but isn't always */
1232 if (c->x86_vendor == X86_VENDOR_AMD) { 1237 if (c->x86_vendor == X86_VENDOR_AMD) {
1233 if (c->x86 == 15 && banks > 4) { 1238 if (c->x86 == 15 && banks > 4) {
@@ -1273,11 +1278,20 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
1273 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && 1278 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
1274 monarch_timeout < 0) 1279 monarch_timeout < 0)
1275 monarch_timeout = USEC_PER_SEC; 1280 monarch_timeout = USEC_PER_SEC;
1281
1282 /*
1283 * There are also broken BIOSes on some Pentium M and
1284 * earlier systems:
1285 */
1286 if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
1287 mce_bootlog = 0;
1276 } 1288 }
1277 if (monarch_timeout < 0) 1289 if (monarch_timeout < 0)
1278 monarch_timeout = 0; 1290 monarch_timeout = 0;
1279 if (mce_bootlog != 0) 1291 if (mce_bootlog != 0)
1280 mce_panic_timeout = 30; 1292 mce_panic_timeout = 30;
1293
1294 return 0;
1281} 1295}
1282 1296
1283static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) 1297static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
@@ -1338,11 +1352,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
1338 if (!mce_available(c)) 1352 if (!mce_available(c))
1339 return; 1353 return;
1340 1354
1341 if (mce_cap_init() < 0) { 1355 if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
1342 mce_disabled = 1; 1356 mce_disabled = 1;
1343 return; 1357 return;
1344 } 1358 }
1345 mce_cpu_quirks(c);
1346 1359
1347 machine_check_vector = do_machine_check; 1360 machine_check_vector = do_machine_check;
1348 1361
@@ -1692,17 +1705,15 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
1692 const char *buf, size_t siz) 1705 const char *buf, size_t siz)
1693{ 1706{
1694 char *p; 1707 char *p;
1695 int len;
1696 1708
1697 strncpy(mce_helper, buf, sizeof(mce_helper)); 1709 strncpy(mce_helper, buf, sizeof(mce_helper));
1698 mce_helper[sizeof(mce_helper)-1] = 0; 1710 mce_helper[sizeof(mce_helper)-1] = 0;
1699 len = strlen(mce_helper);
1700 p = strchr(mce_helper, '\n'); 1711 p = strchr(mce_helper, '\n');
1701 1712
1702 if (*p) 1713 if (p)
1703 *p = 0; 1714 *p = 0;
1704 1715
1705 return len; 1716 return strlen(mce_helper) + !!p;
1706} 1717}
1707 1718
1708static ssize_t set_ignore_ce(struct sys_device *s, 1719static ssize_t set_ignore_ce(struct sys_device *s,
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index bff8dd191dd5..5957a93e5173 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -36,6 +36,7 @@
36 36
37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; 37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); 38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
39static DEFINE_PER_CPU(bool, thermal_throttle_active);
39 40
40static atomic_t therm_throt_en = ATOMIC_INIT(0); 41static atomic_t therm_throt_en = ATOMIC_INIT(0);
41 42
@@ -96,27 +97,33 @@ static int therm_throt_process(int curr)
96{ 97{
97 unsigned int cpu = smp_processor_id(); 98 unsigned int cpu = smp_processor_id();
98 __u64 tmp_jiffs = get_jiffies_64(); 99 __u64 tmp_jiffs = get_jiffies_64();
100 bool was_throttled = __get_cpu_var(thermal_throttle_active);
101 bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
99 102
100 if (curr) 103 if (is_throttled)
101 __get_cpu_var(thermal_throttle_count)++; 104 __get_cpu_var(thermal_throttle_count)++;
102 105
103 if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) 106 if (!(was_throttled ^ is_throttled) &&
107 time_before64(tmp_jiffs, __get_cpu_var(next_check)))
104 return 0; 108 return 0;
105 109
106 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; 110 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
107 111
108 /* if we just entered the thermal event */ 112 /* if we just entered the thermal event */
109 if (curr) { 113 if (is_throttled) {
110 printk(KERN_CRIT "CPU%d: Temperature above threshold, " 114 printk(KERN_CRIT "CPU%d: Temperature above threshold, "
111 "cpu clock throttled (total events = %lu)\n", cpu, 115 "cpu clock throttled (total events = %lu)\n",
112 __get_cpu_var(thermal_throttle_count)); 116 cpu, __get_cpu_var(thermal_throttle_count));
113 117
114 add_taint(TAINT_MACHINE_CHECK); 118 add_taint(TAINT_MACHINE_CHECK);
115 } else { 119 return 1;
116 printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); 120 }
121 if (was_throttled) {
122 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
123 return 1;
117 } 124 }
118 125
119 return 1; 126 return 0;
120} 127}
121 128
122#ifdef CONFIG_SYSFS 129#ifdef CONFIG_SYSFS
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 36c3dc7b8991..900332b800f8 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -55,6 +55,7 @@ struct x86_pmu {
55 int num_counters_fixed; 55 int num_counters_fixed;
56 int counter_bits; 56 int counter_bits;
57 u64 counter_mask; 57 u64 counter_mask;
58 int apic;
58 u64 max_period; 59 u64 max_period;
59 u64 intel_ctrl; 60 u64 intel_ctrl;
60}; 61};
@@ -66,6 +67,52 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
66}; 67};
67 68
68/* 69/*
70 * Not sure about some of these
71 */
72static const u64 p6_perfmon_event_map[] =
73{
74 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
75 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
76 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
77 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
78 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
79 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
80 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
81};
82
83static u64 p6_pmu_event_map(int event)
84{
85 return p6_perfmon_event_map[event];
86}
87
88/*
89 * Counter setting that is specified not to count anything.
90 * We use this to effectively disable a counter.
91 *
92 * L2_RQSTS with 0 MESI unit mask.
93 */
94#define P6_NOP_COUNTER 0x0000002EULL
95
96static u64 p6_pmu_raw_event(u64 event)
97{
98#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
99#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
100#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
101#define P6_EVNTSEL_INV_MASK 0x00800000ULL
102#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL
103
104#define P6_EVNTSEL_MASK \
105 (P6_EVNTSEL_EVENT_MASK | \
106 P6_EVNTSEL_UNIT_MASK | \
107 P6_EVNTSEL_EDGE_MASK | \
108 P6_EVNTSEL_INV_MASK | \
109 P6_EVNTSEL_COUNTER_MASK)
110
111 return event & P6_EVNTSEL_MASK;
112}
113
114
115/*
69 * Intel PerfMon v3. Used on Core2 and later. 116 * Intel PerfMon v3. Used on Core2 and later.
70 */ 117 */
71static const u64 intel_perfmon_event_map[] = 118static const u64 intel_perfmon_event_map[] =
@@ -567,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
567 614
568static bool reserve_pmc_hardware(void) 615static bool reserve_pmc_hardware(void)
569{ 616{
617#ifdef CONFIG_X86_LOCAL_APIC
570 int i; 618 int i;
571 619
572 if (nmi_watchdog == NMI_LOCAL_APIC) 620 if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -581,9 +629,11 @@ static bool reserve_pmc_hardware(void)
581 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 629 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
582 goto eventsel_fail; 630 goto eventsel_fail;
583 } 631 }
632#endif
584 633
585 return true; 634 return true;
586 635
636#ifdef CONFIG_X86_LOCAL_APIC
587eventsel_fail: 637eventsel_fail:
588 for (i--; i >= 0; i--) 638 for (i--; i >= 0; i--)
589 release_evntsel_nmi(x86_pmu.eventsel + i); 639 release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -598,10 +648,12 @@ perfctr_fail:
598 enable_lapic_nmi_watchdog(); 648 enable_lapic_nmi_watchdog();
599 649
600 return false; 650 return false;
651#endif
601} 652}
602 653
603static void release_pmc_hardware(void) 654static void release_pmc_hardware(void)
604{ 655{
656#ifdef CONFIG_X86_LOCAL_APIC
605 int i; 657 int i;
606 658
607 for (i = 0; i < x86_pmu.num_counters; i++) { 659 for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -611,6 +663,7 @@ static void release_pmc_hardware(void)
611 663
612 if (nmi_watchdog == NMI_LOCAL_APIC) 664 if (nmi_watchdog == NMI_LOCAL_APIC)
613 enable_lapic_nmi_watchdog(); 665 enable_lapic_nmi_watchdog();
666#endif
614} 667}
615 668
616static void hw_perf_counter_destroy(struct perf_counter *counter) 669static void hw_perf_counter_destroy(struct perf_counter *counter)
@@ -666,6 +719,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
666{ 719{
667 struct perf_counter_attr *attr = &counter->attr; 720 struct perf_counter_attr *attr = &counter->attr;
668 struct hw_perf_counter *hwc = &counter->hw; 721 struct hw_perf_counter *hwc = &counter->hw;
722 u64 config;
669 int err; 723 int err;
670 724
671 if (!x86_pmu_initialized()) 725 if (!x86_pmu_initialized())
@@ -701,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
701 hwc->sample_period = x86_pmu.max_period; 755 hwc->sample_period = x86_pmu.max_period;
702 hwc->last_period = hwc->sample_period; 756 hwc->last_period = hwc->sample_period;
703 atomic64_set(&hwc->period_left, hwc->sample_period); 757 atomic64_set(&hwc->period_left, hwc->sample_period);
758 } else {
759 /*
760 * If we have a PMU initialized but no APIC
761 * interrupts, we cannot sample hardware
762 * counters (user-space has to fall back and
763 * sample via a hrtimer based software counter):
764 */
765 if (!x86_pmu.apic)
766 return -EOPNOTSUPP;
704 } 767 }
705 768
706 counter->destroy = hw_perf_counter_destroy; 769 counter->destroy = hw_perf_counter_destroy;
@@ -718,14 +781,40 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
718 781
719 if (attr->config >= x86_pmu.max_events) 782 if (attr->config >= x86_pmu.max_events)
720 return -EINVAL; 783 return -EINVAL;
784
721 /* 785 /*
722 * The generic map: 786 * The generic map:
723 */ 787 */
724 hwc->config |= x86_pmu.event_map(attr->config); 788 config = x86_pmu.event_map(attr->config);
789
790 if (config == 0)
791 return -ENOENT;
792
793 if (config == -1LL)
794 return -EINVAL;
795
796 hwc->config |= config;
725 797
726 return 0; 798 return 0;
727} 799}
728 800
801static void p6_pmu_disable_all(void)
802{
803 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
804 u64 val;
805
806 if (!cpuc->enabled)
807 return;
808
809 cpuc->enabled = 0;
810 barrier();
811
812 /* p6 only has one enable register */
813 rdmsrl(MSR_P6_EVNTSEL0, val);
814 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
815 wrmsrl(MSR_P6_EVNTSEL0, val);
816}
817
729static void intel_pmu_disable_all(void) 818static void intel_pmu_disable_all(void)
730{ 819{
731 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 820 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -767,6 +856,23 @@ void hw_perf_disable(void)
767 return x86_pmu.disable_all(); 856 return x86_pmu.disable_all();
768} 857}
769 858
859static void p6_pmu_enable_all(void)
860{
861 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
862 unsigned long val;
863
864 if (cpuc->enabled)
865 return;
866
867 cpuc->enabled = 1;
868 barrier();
869
870 /* p6 only has one enable register */
871 rdmsrl(MSR_P6_EVNTSEL0, val);
872 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
873 wrmsrl(MSR_P6_EVNTSEL0, val);
874}
875
770static void intel_pmu_enable_all(void) 876static void intel_pmu_enable_all(void)
771{ 877{
772 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 878 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
@@ -784,13 +890,13 @@ static void amd_pmu_enable_all(void)
784 barrier(); 890 barrier();
785 891
786 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 892 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
893 struct perf_counter *counter = cpuc->counters[idx];
787 u64 val; 894 u64 val;
788 895
789 if (!test_bit(idx, cpuc->active_mask)) 896 if (!test_bit(idx, cpuc->active_mask))
790 continue; 897 continue;
791 rdmsrl(MSR_K7_EVNTSEL0 + idx, val); 898
792 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) 899 val = counter->hw.config;
793 continue;
794 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 900 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
795 wrmsrl(MSR_K7_EVNTSEL0 + idx, val); 901 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
796 } 902 }
@@ -819,16 +925,13 @@ static inline void intel_pmu_ack_status(u64 ack)
819 925
820static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 926static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
821{ 927{
822 int err; 928 (void)checking_wrmsrl(hwc->config_base + idx,
823 err = checking_wrmsrl(hwc->config_base + idx,
824 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); 929 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
825} 930}
826 931
827static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) 932static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
828{ 933{
829 int err; 934 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
830 err = checking_wrmsrl(hwc->config_base + idx,
831 hwc->config);
832} 935}
833 936
834static inline void 937static inline void
@@ -836,13 +939,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
836{ 939{
837 int idx = __idx - X86_PMC_IDX_FIXED; 940 int idx = __idx - X86_PMC_IDX_FIXED;
838 u64 ctrl_val, mask; 941 u64 ctrl_val, mask;
839 int err;
840 942
841 mask = 0xfULL << (idx * 4); 943 mask = 0xfULL << (idx * 4);
842 944
843 rdmsrl(hwc->config_base, ctrl_val); 945 rdmsrl(hwc->config_base, ctrl_val);
844 ctrl_val &= ~mask; 946 ctrl_val &= ~mask;
845 err = checking_wrmsrl(hwc->config_base, ctrl_val); 947 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
948}
949
950static inline void
951p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
952{
953 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
954 u64 val = P6_NOP_COUNTER;
955
956 if (cpuc->enabled)
957 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
958
959 (void)checking_wrmsrl(hwc->config_base + idx, val);
846} 960}
847 961
848static inline void 962static inline void
@@ -943,6 +1057,19 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
943 err = checking_wrmsrl(hwc->config_base, ctrl_val); 1057 err = checking_wrmsrl(hwc->config_base, ctrl_val);
944} 1058}
945 1059
1060static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1061{
1062 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1063 u64 val;
1064
1065 val = hwc->config;
1066 if (cpuc->enabled)
1067 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1068
1069 (void)checking_wrmsrl(hwc->config_base + idx, val);
1070}
1071
1072
946static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 1073static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
947{ 1074{
948 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 1075 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
@@ -959,8 +1086,6 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
959 1086
960 if (cpuc->enabled) 1087 if (cpuc->enabled)
961 x86_pmu_enable_counter(hwc, idx); 1088 x86_pmu_enable_counter(hwc, idx);
962 else
963 x86_pmu_disable_counter(hwc, idx);
964} 1089}
965 1090
966static int 1091static int
@@ -1176,6 +1301,49 @@ static void intel_pmu_reset(void)
1176 local_irq_restore(flags); 1301 local_irq_restore(flags);
1177} 1302}
1178 1303
1304static int p6_pmu_handle_irq(struct pt_regs *regs)
1305{
1306 struct perf_sample_data data;
1307 struct cpu_hw_counters *cpuc;
1308 struct perf_counter *counter;
1309 struct hw_perf_counter *hwc;
1310 int idx, handled = 0;
1311 u64 val;
1312
1313 data.regs = regs;
1314 data.addr = 0;
1315
1316 cpuc = &__get_cpu_var(cpu_hw_counters);
1317
1318 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1319 if (!test_bit(idx, cpuc->active_mask))
1320 continue;
1321
1322 counter = cpuc->counters[idx];
1323 hwc = &counter->hw;
1324
1325 val = x86_perf_counter_update(counter, hwc, idx);
1326 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
1327 continue;
1328
1329 /*
1330 * counter overflow
1331 */
1332 handled = 1;
1333 data.period = counter->hw.last_period;
1334
1335 if (!x86_perf_counter_set_period(counter, hwc, idx))
1336 continue;
1337
1338 if (perf_counter_overflow(counter, 1, &data))
1339 p6_pmu_disable_counter(hwc, idx);
1340 }
1341
1342 if (handled)
1343 inc_irq_stat(apic_perf_irqs);
1344
1345 return handled;
1346}
1179 1347
1180/* 1348/*
1181 * This handler is triggered by the local APIC, so the APIC IRQ handling 1349 * This handler is triggered by the local APIC, so the APIC IRQ handling
@@ -1185,14 +1353,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1185{ 1353{
1186 struct perf_sample_data data; 1354 struct perf_sample_data data;
1187 struct cpu_hw_counters *cpuc; 1355 struct cpu_hw_counters *cpuc;
1188 int bit, cpu, loops; 1356 int bit, loops;
1189 u64 ack, status; 1357 u64 ack, status;
1190 1358
1191 data.regs = regs; 1359 data.regs = regs;
1192 data.addr = 0; 1360 data.addr = 0;
1193 1361
1194 cpu = smp_processor_id(); 1362 cpuc = &__get_cpu_var(cpu_hw_counters);
1195 cpuc = &per_cpu(cpu_hw_counters, cpu);
1196 1363
1197 perf_disable(); 1364 perf_disable();
1198 status = intel_pmu_get_status(); 1365 status = intel_pmu_get_status();
@@ -1249,14 +1416,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1249 struct cpu_hw_counters *cpuc; 1416 struct cpu_hw_counters *cpuc;
1250 struct perf_counter *counter; 1417 struct perf_counter *counter;
1251 struct hw_perf_counter *hwc; 1418 struct hw_perf_counter *hwc;
1252 int cpu, idx, handled = 0; 1419 int idx, handled = 0;
1253 u64 val; 1420 u64 val;
1254 1421
1255 data.regs = regs; 1422 data.regs = regs;
1256 data.addr = 0; 1423 data.addr = 0;
1257 1424
1258 cpu = smp_processor_id(); 1425 cpuc = &__get_cpu_var(cpu_hw_counters);
1259 cpuc = &per_cpu(cpu_hw_counters, cpu);
1260 1426
1261 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1427 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1262 if (!test_bit(idx, cpuc->active_mask)) 1428 if (!test_bit(idx, cpuc->active_mask))
@@ -1299,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
1299 1465
1300void set_perf_counter_pending(void) 1466void set_perf_counter_pending(void)
1301{ 1467{
1468#ifdef CONFIG_X86_LOCAL_APIC
1302 apic->send_IPI_self(LOCAL_PENDING_VECTOR); 1469 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1470#endif
1303} 1471}
1304 1472
1305void perf_counters_lapic_init(void) 1473void perf_counters_lapic_init(void)
1306{ 1474{
1307 if (!x86_pmu_initialized()) 1475#ifdef CONFIG_X86_LOCAL_APIC
1476 if (!x86_pmu.apic || !x86_pmu_initialized())
1308 return; 1477 return;
1309 1478
1310 /* 1479 /*
1311 * Always use NMI for PMU 1480 * Always use NMI for PMU
1312 */ 1481 */
1313 apic_write(APIC_LVTPC, APIC_DM_NMI); 1482 apic_write(APIC_LVTPC, APIC_DM_NMI);
1483#endif
1314} 1484}
1315 1485
1316static int __kprobes 1486static int __kprobes
@@ -1334,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
1334 1504
1335 regs = args->regs; 1505 regs = args->regs;
1336 1506
1507#ifdef CONFIG_X86_LOCAL_APIC
1337 apic_write(APIC_LVTPC, APIC_DM_NMI); 1508 apic_write(APIC_LVTPC, APIC_DM_NMI);
1509#endif
1338 /* 1510 /*
1339 * Can't rely on the handled return value to say it was our NMI, two 1511 * Can't rely on the handled return value to say it was our NMI, two
1340 * counters could trigger 'simultaneously' raising two back-to-back NMIs. 1512 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1353,6 +1525,33 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
1353 .priority = 1 1525 .priority = 1
1354}; 1526};
1355 1527
1528static struct x86_pmu p6_pmu = {
1529 .name = "p6",
1530 .handle_irq = p6_pmu_handle_irq,
1531 .disable_all = p6_pmu_disable_all,
1532 .enable_all = p6_pmu_enable_all,
1533 .enable = p6_pmu_enable_counter,
1534 .disable = p6_pmu_disable_counter,
1535 .eventsel = MSR_P6_EVNTSEL0,
1536 .perfctr = MSR_P6_PERFCTR0,
1537 .event_map = p6_pmu_event_map,
1538 .raw_event = p6_pmu_raw_event,
1539 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1540 .apic = 1,
1541 .max_period = (1ULL << 31) - 1,
1542 .version = 0,
1543 .num_counters = 2,
1544 /*
1545 * Counters have 40 bits implemented. However they are designed such
1546 * that bits [32-39] are sign extensions of bit 31. As such the
1547 * effective width of a counter for P6-like PMU is 32 bits only.
1548 *
1549 * See IA-32 Intel Architecture Software developer manual Vol 3B
1550 */
1551 .counter_bits = 32,
1552 .counter_mask = (1ULL << 32) - 1,
1553};
1554
1356static struct x86_pmu intel_pmu = { 1555static struct x86_pmu intel_pmu = {
1357 .name = "Intel", 1556 .name = "Intel",
1358 .handle_irq = intel_pmu_handle_irq, 1557 .handle_irq = intel_pmu_handle_irq,
@@ -1365,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
1365 .event_map = intel_pmu_event_map, 1564 .event_map = intel_pmu_event_map,
1366 .raw_event = intel_pmu_raw_event, 1565 .raw_event = intel_pmu_raw_event,
1367 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 1566 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1567 .apic = 1,
1368 /* 1568 /*
1369 * Intel PMCs cannot be accessed sanely above 32 bit width, 1569 * Intel PMCs cannot be accessed sanely above 32 bit width,
1370 * so we install an artificial 1<<31 period regardless of 1570 * so we install an artificial 1<<31 period regardless of
@@ -1388,10 +1588,43 @@ static struct x86_pmu amd_pmu = {
1388 .num_counters = 4, 1588 .num_counters = 4,
1389 .counter_bits = 48, 1589 .counter_bits = 48,
1390 .counter_mask = (1ULL << 48) - 1, 1590 .counter_mask = (1ULL << 48) - 1,
1591 .apic = 1,
1391 /* use highest bit to detect overflow */ 1592 /* use highest bit to detect overflow */
1392 .max_period = (1ULL << 47) - 1, 1593 .max_period = (1ULL << 47) - 1,
1393}; 1594};
1394 1595
1596static int p6_pmu_init(void)
1597{
1598 switch (boot_cpu_data.x86_model) {
1599 case 1:
1600 case 3: /* Pentium Pro */
1601 case 5:
1602 case 6: /* Pentium II */
1603 case 7:
1604 case 8:
1605 case 11: /* Pentium III */
1606 break;
1607 case 9:
1608 case 13:
1609 /* Pentium M */
1610 break;
1611 default:
1612 pr_cont("unsupported p6 CPU model %d ",
1613 boot_cpu_data.x86_model);
1614 return -ENODEV;
1615 }
1616
1617 x86_pmu = p6_pmu;
1618
1619 if (!cpu_has_apic) {
1620 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1621 pr_info("no hardware sampling interrupt available.\n");
1622 x86_pmu.apic = 0;
1623 }
1624
1625 return 0;
1626}
1627
1395static int intel_pmu_init(void) 1628static int intel_pmu_init(void)
1396{ 1629{
1397 union cpuid10_edx edx; 1630 union cpuid10_edx edx;
@@ -1400,8 +1633,14 @@ static int intel_pmu_init(void)
1400 unsigned int ebx; 1633 unsigned int ebx;
1401 int version; 1634 int version;
1402 1635
1403 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 1636 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1637 /* check for P6 processor family */
1638 if (boot_cpu_data.x86 == 6) {
1639 return p6_pmu_init();
1640 } else {
1404 return -ENODEV; 1641 return -ENODEV;
1642 }
1643 }
1405 1644
1406 /* 1645 /*
1407 * Check whether the Architectural PerfMon supports 1646 * Check whether the Architectural PerfMon supports
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 96f7ac0bbf01..fe26ba3e3451 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -354,7 +354,7 @@ void __init efi_init(void)
354 */ 354 */
355 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); 355 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
356 if (c16) { 356 if (c16) {
357 for (i = 0; i < sizeof(vendor) && *c16; ++i) 357 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
358 vendor[i] = *c16++; 358 vendor[i] = *c16++;
359 vendor[i] = '\0'; 359 vendor[i] = '\0';
360 } else 360 } else
@@ -512,7 +512,7 @@ void __init efi_enter_virtual_mode(void)
512 && end_pfn <= max_pfn_mapped)) 512 && end_pfn <= max_pfn_mapped))
513 va = __va(md->phys_addr); 513 va = __va(md->phys_addr);
514 else 514 else
515 va = efi_ioremap(md->phys_addr, size); 515 va = efi_ioremap(md->phys_addr, size, md->type);
516 516
517 md->virt_addr = (u64) (unsigned long) va; 517 md->virt_addr = (u64) (unsigned long) va;
518 518
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 22c3b7828c50..ac0621a7ac3d 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -98,10 +98,14 @@ void __init efi_call_phys_epilog(void)
98 early_runtime_code_mapping_set_exec(0); 98 early_runtime_code_mapping_set_exec(0);
99} 99}
100 100
101void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) 101void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
102 u32 type)
102{ 103{
103 unsigned long last_map_pfn; 104 unsigned long last_map_pfn;
104 105
106 if (type == EFI_MEMORY_MAPPED_IO)
107 return ioremap(phys_addr, size);
108
105 last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); 109 last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
106 if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) 110 if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
107 return NULL; 111 return NULL;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 8663afb56535..cc827ac9e8d3 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -261,9 +261,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
261 * which will be freed later 261 * which will be freed later
262 */ 262 */
263 263
264#ifndef CONFIG_HOTPLUG_CPU 264__CPUINIT
265.section .init.text,"ax",@progbits
266#endif
267 265
268#ifdef CONFIG_SMP 266#ifdef CONFIG_SMP
269ENTRY(startup_32_smp) 267ENTRY(startup_32_smp)
@@ -602,7 +600,7 @@ ignore_int:
602#endif 600#endif
603 iret 601 iret
604 602
605.section .cpuinit.data,"wa" 603 __REFDATA
606.align 4 604.align 4
607ENTRY(initial_code) 605ENTRY(initial_code)
608 .long i386_start_kernel 606 .long i386_start_kernel
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 696f0e475c2d..92b7703d3d58 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -187,7 +187,7 @@ static void __init apic_intr_init(void)
187#ifdef CONFIG_X86_THERMAL_VECTOR 187#ifdef CONFIG_X86_THERMAL_VECTOR
188 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 188 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
189#endif 189#endif
190#ifdef CONFIG_X86_THRESHOLD 190#ifdef CONFIG_X86_MCE_THRESHOLD
191 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); 191 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
192#endif 192#endif
193#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) 193#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC)
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 846510b78a09..2a62d843f015 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -347,7 +347,7 @@ static irqreturn_t mfgpt_tick(int irq, void *dev_id)
347 347
348static struct irqaction mfgptirq = { 348static struct irqaction mfgptirq = {
349 .handler = mfgpt_tick, 349 .handler = mfgpt_tick,
350 .flags = IRQF_DISABLED | IRQF_NOBALANCING, 350 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
351 .name = "mfgpt-timer" 351 .name = "mfgpt-timer"
352}; 352};
353 353
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 994dd6a4a2a0..071166a4ba83 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -519,16 +519,12 @@ static void c1e_idle(void)
519 if (!cpumask_test_cpu(cpu, c1e_mask)) { 519 if (!cpumask_test_cpu(cpu, c1e_mask)) {
520 cpumask_set_cpu(cpu, c1e_mask); 520 cpumask_set_cpu(cpu, c1e_mask);
521 /* 521 /*
522 * Force broadcast so ACPI can not interfere. Needs 522 * Force broadcast so ACPI can not interfere.
523 * to run with interrupts enabled as it uses
524 * smp_function_call.
525 */ 523 */
526 local_irq_enable();
527 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, 524 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
528 &cpu); 525 &cpu);
529 printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", 526 printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
530 cpu); 527 cpu);
531 local_irq_disable();
532 } 528 }
533 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); 529 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
534 530
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 18ce5c04242a..27349f92a6d7 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -3,6 +3,7 @@
3#include <linux/init.h> 3#include <linux/init.h>
4#include <linux/pm.h> 4#include <linux/pm.h>
5#include <linux/efi.h> 5#include <linux/efi.h>
6#include <linux/dmi.h>
6#include <linux/tboot.h> 7#include <linux/tboot.h>
7#include <acpi/reboot.h> 8#include <acpi/reboot.h>
8#include <asm/io.h> 9#include <asm/io.h>
@@ -18,7 +19,6 @@
18#include <asm/cpu.h> 19#include <asm/cpu.h>
19 20
20#ifdef CONFIG_X86_32 21#ifdef CONFIG_X86_32
21# include <linux/dmi.h>
22# include <linux/ctype.h> 22# include <linux/ctype.h>
23# include <linux/mc146818rtc.h> 23# include <linux/mc146818rtc.h>
24#else 24#else
@@ -250,6 +250,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
250 DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"), 250 DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"),
251 }, 251 },
252 }, 252 },
253 { /* Handle problems with rebooting on CompuLab SBC-FITPC2 */
254 .callback = set_bios_reboot,
255 .ident = "CompuLab SBC-FITPC2",
256 .matches = {
257 DMI_MATCH(DMI_SYS_VENDOR, "CompuLab"),
258 DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"),
259 },
260 },
253 { } 261 { }
254}; 262};
255 263
@@ -397,6 +405,46 @@ EXPORT_SYMBOL(machine_real_restart);
397 405
398#endif /* CONFIG_X86_32 */ 406#endif /* CONFIG_X86_32 */
399 407
408/*
409 * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
410 */
411static int __init set_pci_reboot(const struct dmi_system_id *d)
412{
413 if (reboot_type != BOOT_CF9) {
414 reboot_type = BOOT_CF9;
415 printk(KERN_INFO "%s series board detected. "
416 "Selecting PCI-method for reboots.\n", d->ident);
417 }
418 return 0;
419}
420
421static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
422 { /* Handle problems with rebooting on Apple MacBook5 */
423 .callback = set_pci_reboot,
424 .ident = "Apple MacBook5",
425 .matches = {
426 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
427 DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
428 },
429 },
430 { /* Handle problems with rebooting on Apple MacBookPro5 */
431 .callback = set_pci_reboot,
432 .ident = "Apple MacBookPro5",
433 .matches = {
434 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
435 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
436 },
437 },
438 { }
439};
440
441static int __init pci_reboot_init(void)
442{
443 dmi_check_system(pci_reboot_dmi_table);
444 return 0;
445}
446core_initcall(pci_reboot_init);
447
400static inline void kb_wait(void) 448static inline void kb_wait(void)
401{ 449{
402 int i; 450 int i;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6ce0d6f38f7f..61f86f241420 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -673,6 +673,19 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
673 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), 673 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
674 }, 674 },
675 }, 675 },
676 {
677 /*
678 * AMI BIOS with low memory corruption was found on Intel DG45ID board.
679 * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
680 * match only DMI_BOARD_NAME and see if there is more bad products
681 * with this vendor.
682 */
683 .callback = dmi_low_memory_corruption,
684 .ident = "AMI BIOS",
685 .matches = {
686 DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
687 },
688 },
676#endif 689#endif
677 {} 690 {}
678}; 691};
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 29a3eef7cf4a..07d81916f212 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -165,7 +165,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
165 165
166 if (!chosen) { 166 if (!chosen) {
167 size_t vm_size = VMALLOC_END - VMALLOC_START; 167 size_t vm_size = VMALLOC_END - VMALLOC_START;
168 size_t tot_size = num_possible_cpus() * PMD_SIZE; 168 size_t tot_size = nr_cpu_ids * PMD_SIZE;
169 169
170 /* on non-NUMA, embedding is better */ 170 /* on non-NUMA, embedding is better */
171 if (!pcpu_need_numa()) 171 if (!pcpu_need_numa())
@@ -199,7 +199,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
199 dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; 199 dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
200 200
201 /* allocate pointer array and alloc large pages */ 201 /* allocate pointer array and alloc large pages */
202 map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); 202 map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0]));
203 pcpul_map = alloc_bootmem(map_size); 203 pcpul_map = alloc_bootmem(map_size);
204 204
205 for_each_possible_cpu(cpu) { 205 for_each_possible_cpu(cpu) {
@@ -228,7 +228,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
228 228
229 /* allocate address and map */ 229 /* allocate address and map */
230 pcpul_vm.flags = VM_ALLOC; 230 pcpul_vm.flags = VM_ALLOC;
231 pcpul_vm.size = num_possible_cpus() * PMD_SIZE; 231 pcpul_vm.size = nr_cpu_ids * PMD_SIZE;
232 vm_area_register_early(&pcpul_vm, PMD_SIZE); 232 vm_area_register_early(&pcpul_vm, PMD_SIZE);
233 233
234 for_each_possible_cpu(cpu) { 234 for_each_possible_cpu(cpu) {
@@ -250,8 +250,8 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
250 PMD_SIZE, pcpul_vm.addr, NULL); 250 PMD_SIZE, pcpul_vm.addr, NULL);
251 251
252 /* sort pcpul_map array for pcpu_lpage_remapped() */ 252 /* sort pcpul_map array for pcpu_lpage_remapped() */
253 for (i = 0; i < num_possible_cpus() - 1; i++) 253 for (i = 0; i < nr_cpu_ids - 1; i++)
254 for (j = i + 1; j < num_possible_cpus(); j++) 254 for (j = i + 1; j < nr_cpu_ids; j++)
255 if (pcpul_map[i].ptr > pcpul_map[j].ptr) { 255 if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
256 struct pcpul_ent tmp = pcpul_map[i]; 256 struct pcpul_ent tmp = pcpul_map[i];
257 pcpul_map[i] = pcpul_map[j]; 257 pcpul_map[i] = pcpul_map[j];
@@ -288,7 +288,7 @@ void *pcpu_lpage_remapped(void *kaddr)
288{ 288{
289 void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); 289 void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
290 unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; 290 unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
291 int left = 0, right = num_possible_cpus() - 1; 291 int left = 0, right = nr_cpu_ids - 1;
292 int pos; 292 int pos;
293 293
294 /* pcpul in use at all? */ 294 /* pcpul in use at all? */
@@ -377,7 +377,7 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
377 pcpu4k_nr_static_pages = PFN_UP(static_size); 377 pcpu4k_nr_static_pages = PFN_UP(static_size);
378 378
379 /* unaligned allocations can't be freed, round up to page size */ 379 /* unaligned allocations can't be freed, round up to page size */
380 pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() 380 pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids
381 * sizeof(pcpu4k_pages[0])); 381 * sizeof(pcpu4k_pages[0]));
382 pcpu4k_pages = alloc_bootmem(pages_size); 382 pcpu4k_pages = alloc_bootmem(pages_size);
383 383
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 8ccabb8a2f6a..77b9689f8edb 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -744,6 +744,7 @@ uv_activation_descriptor_init(int node, int pnode)
744 * note that base_dest_nodeid is actually a nasid. 744 * note that base_dest_nodeid is actually a nasid.
745 */ 745 */
746 ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; 746 ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
747 ad2->header.dest_subnodeid = 0x10; /* the LB */
747 ad2->header.command = UV_NET_ENDPOINT_INTD; 748 ad2->header.command = UV_NET_ENDPOINT_INTD;
748 ad2->header.int_both = 1; 749 ad2->header.int_both = 1;
749 /* 750 /*
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6e1a368d21d4..71f4368b357e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -275,15 +275,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
275 * use the TSC value at the transitions to calculate a pretty 275 * use the TSC value at the transitions to calculate a pretty
276 * good value for the TSC frequencty. 276 * good value for the TSC frequencty.
277 */ 277 */
278static inline int pit_verify_msb(unsigned char val)
279{
280 /* Ignore LSB */
281 inb(0x42);
282 return inb(0x42) == val;
283}
284
278static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) 285static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
279{ 286{
280 int count; 287 int count;
281 u64 tsc = 0; 288 u64 tsc = 0;
282 289
283 for (count = 0; count < 50000; count++) { 290 for (count = 0; count < 50000; count++) {
284 /* Ignore LSB */ 291 if (!pit_verify_msb(val))
285 inb(0x42);
286 if (inb(0x42) != val)
287 break; 292 break;
288 tsc = get_cycles(); 293 tsc = get_cycles();
289 } 294 }
@@ -336,8 +341,7 @@ static unsigned long quick_pit_calibrate(void)
336 * to do that is to just read back the 16-bit counter 341 * to do that is to just read back the 16-bit counter
337 * once from the PIT. 342 * once from the PIT.
338 */ 343 */
339 inb(0x42); 344 pit_verify_msb(0);
340 inb(0x42);
341 345
342 if (pit_expect_msb(0xff, &tsc, &d1)) { 346 if (pit_expect_msb(0xff, &tsc, &d1)) {
343 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { 347 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
@@ -348,8 +352,19 @@ static unsigned long quick_pit_calibrate(void)
348 * Iterate until the error is less than 500 ppm 352 * Iterate until the error is less than 500 ppm
349 */ 353 */
350 delta -= tsc; 354 delta -= tsc;
351 if (d1+d2 < delta >> 11) 355 if (d1+d2 >= delta >> 11)
352 goto success; 356 continue;
357
358 /*
359 * Check the PIT one more time to verify that
360 * all TSC reads were stable wrt the PIT.
361 *
362 * This also guarantees serialization of the
363 * last cycle read ('d2') in pit_expect_msb.
364 */
365 if (!pit_verify_msb(0xfe - i))
366 break;
367 goto success;
353 } 368 }
354 } 369 }
355 printk("Fast TSC calibration failed\n"); 370 printk("Fast TSC calibration failed\n");
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index b263423fbe2a..95a7289e4b0c 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
441 ap.ds = __USER_DS; 441 ap.ds = __USER_DS;
442 ap.es = __USER_DS; 442 ap.es = __USER_DS;
443 ap.fs = __KERNEL_PERCPU; 443 ap.fs = __KERNEL_PERCPU;
444 ap.gs = 0; 444 ap.gs = __KERNEL_STACK_CANARY;
445 445
446 ap.eflags = 0; 446 ap.eflags = 0;
447 447
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 367e87882041..9fc178255c04 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -46,11 +46,10 @@ PHDRS {
46 data PT_LOAD FLAGS(7); /* RWE */ 46 data PT_LOAD FLAGS(7); /* RWE */
47#ifdef CONFIG_X86_64 47#ifdef CONFIG_X86_64
48 user PT_LOAD FLAGS(7); /* RWE */ 48 user PT_LOAD FLAGS(7); /* RWE */
49 data.init PT_LOAD FLAGS(7); /* RWE */
50#ifdef CONFIG_SMP 49#ifdef CONFIG_SMP
51 percpu PT_LOAD FLAGS(7); /* RWE */ 50 percpu PT_LOAD FLAGS(7); /* RWE */
52#endif 51#endif
53 data.init2 PT_LOAD FLAGS(7); /* RWE */ 52 init PT_LOAD FLAGS(7); /* RWE */
54#endif 53#endif
55 note PT_NOTE FLAGS(0); /* ___ */ 54 note PT_NOTE FLAGS(0); /* ___ */
56} 55}
@@ -103,72 +102,43 @@ SECTIONS
103 __stop___ex_table = .; 102 __stop___ex_table = .;
104 } :text = 0x9090 103 } :text = 0x9090
105 104
106 RODATA 105 RO_DATA(PAGE_SIZE)
107 106
108 /* Data */ 107 /* Data */
109 . = ALIGN(PAGE_SIZE);
110 .data : AT(ADDR(.data) - LOAD_OFFSET) { 108 .data : AT(ADDR(.data) - LOAD_OFFSET) {
111 /* Start of data section */ 109 /* Start of data section */
112 _sdata = .; 110 _sdata = .;
113 DATA_DATA
114 CONSTRUCTORS
115 111
116#ifdef CONFIG_X86_64 112 /* init_task */
117 /* End of data section */ 113 INIT_TASK_DATA(THREAD_SIZE)
118 _edata = .;
119#endif
120 } :data
121 114
122#ifdef CONFIG_X86_32 115#ifdef CONFIG_X86_32
123 /* 32 bit has nosave before _edata */ 116 /* 32 bit has nosave before _edata */
124 . = ALIGN(PAGE_SIZE); 117 NOSAVE_DATA
125 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
126 __nosave_begin = .;
127 *(.data.nosave)
128 . = ALIGN(PAGE_SIZE);
129 __nosave_end = .;
130 }
131#endif 118#endif
132 119
133 . = ALIGN(PAGE_SIZE); 120 PAGE_ALIGNED_DATA(PAGE_SIZE)
134 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
135 *(.data.page_aligned)
136 *(.data.idt) 121 *(.data.idt)
137 }
138 122
139#ifdef CONFIG_X86_32 123 CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
140 . = ALIGN(32);
141#else
142 . = ALIGN(PAGE_SIZE);
143 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
144#endif
145 .data.cacheline_aligned :
146 AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
147 *(.data.cacheline_aligned)
148 }
149 124
150 /* rarely changed data like cpu maps */ 125 DATA_DATA
151#ifdef CONFIG_X86_32 126 CONSTRUCTORS
152 . = ALIGN(32); 127
153#else 128 /* rarely changed data like cpu maps */
154 . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); 129 READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES)
155#endif
156 .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
157 *(.data.read_mostly)
158 130
159#ifdef CONFIG_X86_32
160 /* End of data section */ 131 /* End of data section */
161 _edata = .; 132 _edata = .;
162#endif 133 } :data
163 }
164 134
165#ifdef CONFIG_X86_64 135#ifdef CONFIG_X86_64
166 136
167#define VSYSCALL_ADDR (-10*1024*1024) 137#define VSYSCALL_ADDR (-10*1024*1024)
168#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \ 138#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \
169 SIZEOF(.data.read_mostly) + 4095) & ~(4095)) 139 PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
170#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \ 140#define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \
171 SIZEOF(.data.read_mostly) + 4095) & ~(4095)) 141 PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
172 142
173#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) 143#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
174#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) 144#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
@@ -234,35 +204,29 @@ SECTIONS
234 204
235#endif /* CONFIG_X86_64 */ 205#endif /* CONFIG_X86_64 */
236 206
237 /* init_task */ 207 /* Init code and data - will be freed after init */
238 . = ALIGN(THREAD_SIZE); 208 . = ALIGN(PAGE_SIZE);
239 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { 209 .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) {
240 *(.data.init_task) 210 __init_begin = .; /* paired with __init_end */
241 } 211 }
242#ifdef CONFIG_X86_64
243 :data.init
244#endif
245 212
213#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
246 /* 214 /*
247 * smp_locks might be freed after init 215 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
248 * start/end must be page aligned 216 * output PHDR, so the next output section - .init.text - should
217 * start another segment - init.
249 */ 218 */
250 . = ALIGN(PAGE_SIZE); 219 PERCPU_VADDR(0, :percpu)
251 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { 220#endif
252 __smp_locks = .;
253 *(.smp_locks)
254 __smp_locks_end = .;
255 . = ALIGN(PAGE_SIZE);
256 }
257 221
258 /* Init code and data - will be freed after init */
259 . = ALIGN(PAGE_SIZE);
260 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { 222 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
261 __init_begin = .; /* paired with __init_end */
262 _sinittext = .; 223 _sinittext = .;
263 INIT_TEXT 224 INIT_TEXT
264 _einittext = .; 225 _einittext = .;
265 } 226 }
227#ifdef CONFIG_X86_64
228 :init
229#endif
266 230
267 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { 231 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
268 INIT_DATA 232 INIT_DATA
@@ -333,17 +297,7 @@ SECTIONS
333 } 297 }
334#endif 298#endif
335 299
336#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) 300#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
337 /*
338 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
339 * output PHDR, so the next output section - __data_nosave - should
340 * start another section data.init2. Also, pda should be at the head of
341 * percpu area. Preallocate it and define the percpu offset symbol
342 * so that it can be accessed as a percpu variable.
343 */
344 . = ALIGN(PAGE_SIZE);
345 PERCPU_VADDR(0, :percpu)
346#else
347 PERCPU(PAGE_SIZE) 301 PERCPU(PAGE_SIZE)
348#endif 302#endif
349 303
@@ -354,15 +308,22 @@ SECTIONS
354 __init_end = .; 308 __init_end = .;
355 } 309 }
356 310
311 /*
312 * smp_locks might be freed after init
313 * start/end must be page aligned
314 */
315 . = ALIGN(PAGE_SIZE);
316 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
317 __smp_locks = .;
318 *(.smp_locks)
319 __smp_locks_end = .;
320 . = ALIGN(PAGE_SIZE);
321 }
322
357#ifdef CONFIG_X86_64 323#ifdef CONFIG_X86_64
358 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { 324 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
359 . = ALIGN(PAGE_SIZE); 325 NOSAVE_DATA
360 __nosave_begin = .; 326 }
361 *(.data.nosave)
362 . = ALIGN(PAGE_SIZE);
363 __nosave_end = .;
364 } :data.init2
365 /* use another section data.init2, see PERCPU_VADDR() above */
366#endif 327#endif
367 328
368 /* BSS */ 329 /* BSS */
@@ -400,8 +361,8 @@ SECTIONS
400 361
401 362
402#ifdef CONFIG_X86_32 363#ifdef CONFIG_X86_32
403ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), 364. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
404 "kernel image bigger than KERNEL_IMAGE_SIZE") 365 "kernel image bigger than KERNEL_IMAGE_SIZE");
405#else 366#else
406/* 367/*
407 * Per-cpu symbols which need to be offset from __per_cpu_load 368 * Per-cpu symbols which need to be offset from __per_cpu_load
@@ -414,12 +375,12 @@ INIT_PER_CPU(irq_stack_union);
414/* 375/*
415 * Build-time check on the image size: 376 * Build-time check on the image size:
416 */ 377 */
417ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), 378. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
418 "kernel image bigger than KERNEL_IMAGE_SIZE") 379 "kernel image bigger than KERNEL_IMAGE_SIZE");
419 380
420#ifdef CONFIG_SMP 381#ifdef CONFIG_SMP
421ASSERT((per_cpu__irq_stack_union == 0), 382. = ASSERT((per_cpu__irq_stack_union == 0),
422 "irq_stack_union is not at start of per-cpu area"); 383 "irq_stack_union is not at start of per-cpu area");
423#endif 384#endif
424 385
425#endif /* CONFIG_X86_32 */ 386#endif /* CONFIG_X86_32 */
@@ -427,7 +388,7 @@ ASSERT((per_cpu__irq_stack_union == 0),
427#ifdef CONFIG_KEXEC 388#ifdef CONFIG_KEXEC
428#include <asm/kexec.h> 389#include <asm/kexec.h>
429 390
430ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, 391. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
431 "kexec control code size is too big") 392 "kexec control code size is too big");
432#endif 393#endif
433 394