aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r--arch/x86_64/kernel/apic.c104
-rw-r--r--arch/x86_64/kernel/cpufreq/Kconfig7
-rw-r--r--arch/x86_64/kernel/cpufreq/Makefile2
-rw-r--r--arch/x86_64/kernel/crash.c69
-rw-r--r--arch/x86_64/kernel/early-quirks.c13
-rw-r--r--arch/x86_64/kernel/entry.S69
-rw-r--r--arch/x86_64/kernel/genapic.c9
-rw-r--r--arch/x86_64/kernel/head64.c6
-rw-r--r--arch/x86_64/kernel/i387.c7
-rw-r--r--arch/x86_64/kernel/i8259.c3
-rw-r--r--arch/x86_64/kernel/io_apic.c134
-rw-r--r--arch/x86_64/kernel/irq.c2
-rw-r--r--arch/x86_64/kernel/kprobes.c2
-rw-r--r--arch/x86_64/kernel/mce.c9
-rw-r--r--arch/x86_64/kernel/mce_amd.c4
-rw-r--r--arch/x86_64/kernel/module.c5
-rw-r--r--arch/x86_64/kernel/mpparse.c2
-rw-r--r--arch/x86_64/kernel/nmi.c38
-rw-r--r--arch/x86_64/kernel/pci-calgary.c228
-rw-r--r--arch/x86_64/kernel/pci-dma.c5
-rw-r--r--arch/x86_64/kernel/pci-gart.c3
-rw-r--r--arch/x86_64/kernel/process.c51
-rw-r--r--arch/x86_64/kernel/setup.c24
-rw-r--r--arch/x86_64/kernel/smp.c5
-rw-r--r--arch/x86_64/kernel/smpboot.c20
-rw-r--r--arch/x86_64/kernel/time.c29
-rw-r--r--arch/x86_64/kernel/traps.c139
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S12
-rw-r--r--arch/x86_64/kernel/vsyscall.c7
29 files changed, 569 insertions, 439 deletions
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 4d9d5ed942b2..124b2d27b4ac 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -25,6 +25,7 @@
25#include <linux/kernel_stat.h> 25#include <linux/kernel_stat.h>
26#include <linux/sysdev.h> 26#include <linux/sysdev.h>
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/ioport.h>
28 29
29#include <asm/atomic.h> 30#include <asm/atomic.h>
30#include <asm/smp.h> 31#include <asm/smp.h>
@@ -45,6 +46,12 @@ int apic_calibrate_pmtmr __initdata;
45 46
46int disable_apic_timer __initdata; 47int disable_apic_timer __initdata;
47 48
49static struct resource *ioapic_resources;
50static struct resource lapic_resource = {
51 .name = "Local APIC",
52 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
53};
54
48/* 55/*
49 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as 56 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
50 * IPIs in place of local APIC timers 57 * IPIs in place of local APIC timers
@@ -133,7 +140,6 @@ void clear_local_APIC(void)
133 apic_write(APIC_LVTERR, APIC_LVT_MASKED); 140 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
134 if (maxlvt >= 4) 141 if (maxlvt >= 4)
135 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 142 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
136 v = GET_APIC_VERSION(apic_read(APIC_LVR));
137 apic_write(APIC_ESR, 0); 143 apic_write(APIC_ESR, 0);
138 apic_read(APIC_ESR); 144 apic_read(APIC_ESR);
139} 145}
@@ -452,23 +458,30 @@ static struct {
452static int lapic_suspend(struct sys_device *dev, pm_message_t state) 458static int lapic_suspend(struct sys_device *dev, pm_message_t state)
453{ 459{
454 unsigned long flags; 460 unsigned long flags;
461 int maxlvt;
455 462
456 if (!apic_pm_state.active) 463 if (!apic_pm_state.active)
457 return 0; 464 return 0;
458 465
466 maxlvt = get_maxlvt();
467
459 apic_pm_state.apic_id = apic_read(APIC_ID); 468 apic_pm_state.apic_id = apic_read(APIC_ID);
460 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 469 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
461 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 470 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
462 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 471 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
463 apic_pm_state.apic_spiv = apic_read(APIC_SPIV); 472 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
464 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); 473 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
465 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); 474 if (maxlvt >= 4)
475 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
466 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); 476 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
467 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); 477 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
468 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 478 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
469 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 479 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
470 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 480 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
471 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 481#ifdef CONFIG_X86_MCE_INTEL
482 if (maxlvt >= 5)
483 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
484#endif
472 local_irq_save(flags); 485 local_irq_save(flags);
473 disable_local_APIC(); 486 disable_local_APIC();
474 local_irq_restore(flags); 487 local_irq_restore(flags);
@@ -479,10 +492,13 @@ static int lapic_resume(struct sys_device *dev)
479{ 492{
480 unsigned int l, h; 493 unsigned int l, h;
481 unsigned long flags; 494 unsigned long flags;
495 int maxlvt;
482 496
483 if (!apic_pm_state.active) 497 if (!apic_pm_state.active)
484 return 0; 498 return 0;
485 499
500 maxlvt = get_maxlvt();
501
486 local_irq_save(flags); 502 local_irq_save(flags);
487 rdmsr(MSR_IA32_APICBASE, l, h); 503 rdmsr(MSR_IA32_APICBASE, l, h);
488 l &= ~MSR_IA32_APICBASE_BASE; 504 l &= ~MSR_IA32_APICBASE_BASE;
@@ -496,8 +512,12 @@ static int lapic_resume(struct sys_device *dev)
496 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 512 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
497 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 513 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
498 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 514 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
499 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 515#ifdef CONFIG_X86_MCE_INTEL
500 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); 516 if (maxlvt >= 5)
517 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
518#endif
519 if (maxlvt >= 4)
520 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
501 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); 521 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
502 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); 522 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
503 apic_write(APIC_TMICT, apic_pm_state.apic_tmict); 523 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
@@ -585,6 +605,64 @@ static int __init detect_init_APIC (void)
585 return 0; 605 return 0;
586} 606}
587 607
608#ifdef CONFIG_X86_IO_APIC
609static struct resource * __init ioapic_setup_resources(void)
610{
611#define IOAPIC_RESOURCE_NAME_SIZE 11
612 unsigned long n;
613 struct resource *res;
614 char *mem;
615 int i;
616
617 if (nr_ioapics <= 0)
618 return NULL;
619
620 n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
621 n *= nr_ioapics;
622
623 mem = alloc_bootmem(n);
624 res = (void *)mem;
625
626 if (mem != NULL) {
627 memset(mem, 0, n);
628 mem += sizeof(struct resource) * nr_ioapics;
629
630 for (i = 0; i < nr_ioapics; i++) {
631 res[i].name = mem;
632 res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
633 sprintf(mem, "IOAPIC %u", i);
634 mem += IOAPIC_RESOURCE_NAME_SIZE;
635 }
636 }
637
638 ioapic_resources = res;
639
640 return res;
641}
642
643static int __init ioapic_insert_resources(void)
644{
645 int i;
646 struct resource *r = ioapic_resources;
647
648 if (!r) {
649 printk("IO APIC resources could be not be allocated.\n");
650 return -1;
651 }
652
653 for (i = 0; i < nr_ioapics; i++) {
654 insert_resource(&iomem_resource, r);
655 r++;
656 }
657
658 return 0;
659}
660
661/* Insert the IO APIC resources after PCI initialization has occured to handle
662 * IO APICS that are mapped in on a BAR in PCI space. */
663late_initcall(ioapic_insert_resources);
664#endif
665
588void __init init_apic_mappings(void) 666void __init init_apic_mappings(void)
589{ 667{
590 unsigned long apic_phys; 668 unsigned long apic_phys;
@@ -604,6 +682,11 @@ void __init init_apic_mappings(void)
604 apic_mapped = 1; 682 apic_mapped = 1;
605 apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); 683 apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys);
606 684
685 /* Put local APIC into the resource map. */
686 lapic_resource.start = apic_phys;
687 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
688 insert_resource(&iomem_resource, &lapic_resource);
689
607 /* 690 /*
608 * Fetch the APIC ID of the BSP in case we have a 691 * Fetch the APIC ID of the BSP in case we have a
609 * default configuration (or the MP table is broken). 692 * default configuration (or the MP table is broken).
@@ -613,7 +696,9 @@ void __init init_apic_mappings(void)
613 { 696 {
614 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 697 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
615 int i; 698 int i;
699 struct resource *ioapic_res;
616 700
701 ioapic_res = ioapic_setup_resources();
617 for (i = 0; i < nr_ioapics; i++) { 702 for (i = 0; i < nr_ioapics; i++) {
618 if (smp_found_config) { 703 if (smp_found_config) {
619 ioapic_phys = mp_ioapics[i].mpc_apicaddr; 704 ioapic_phys = mp_ioapics[i].mpc_apicaddr;
@@ -625,6 +710,12 @@ void __init init_apic_mappings(void)
625 apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n", 710 apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n",
626 __fix_to_virt(idx), ioapic_phys); 711 __fix_to_virt(idx), ioapic_phys);
627 idx++; 712 idx++;
713
714 if (ioapic_res != NULL) {
715 ioapic_res->start = ioapic_phys;
716 ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
717 ioapic_res++;
718 }
628 } 719 }
629 } 720 }
630} 721}
@@ -644,10 +735,9 @@ void __init init_apic_mappings(void)
644 735
645static void __setup_APIC_LVTT(unsigned int clocks) 736static void __setup_APIC_LVTT(unsigned int clocks)
646{ 737{
647 unsigned int lvtt_value, tmp_value, ver; 738 unsigned int lvtt_value, tmp_value;
648 int cpu = smp_processor_id(); 739 int cpu = smp_processor_id();
649 740
650 ver = GET_APIC_VERSION(apic_read(APIC_LVR));
651 lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; 741 lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
652 742
653 if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) 743 if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask))
diff --git a/arch/x86_64/kernel/cpufreq/Kconfig b/arch/x86_64/kernel/cpufreq/Kconfig
index 81f1562e5393..45a6a1fd14ac 100644
--- a/arch/x86_64/kernel/cpufreq/Kconfig
+++ b/arch/x86_64/kernel/cpufreq/Kconfig
@@ -27,10 +27,13 @@ config X86_POWERNOW_K8_ACPI
27 default y 27 default y
28 28
29config X86_SPEEDSTEP_CENTRINO 29config X86_SPEEDSTEP_CENTRINO
30 tristate "Intel Enhanced SpeedStep" 30 tristate "Intel Enhanced SpeedStep (deprecated)"
31 select CPU_FREQ_TABLE 31 select CPU_FREQ_TABLE
32 depends on ACPI_PROCESSOR 32 depends on ACPI_PROCESSOR
33 help 33 help
34 This is deprecated and this functionality is now merged into
35 acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of
36 speedstep_centrino.
34 This adds the CPUFreq driver for Enhanced SpeedStep enabled 37 This adds the CPUFreq driver for Enhanced SpeedStep enabled
35 mobile CPUs. This means Intel Pentium M (Centrino) CPUs 38 mobile CPUs. This means Intel Pentium M (Centrino) CPUs
36 or 64bit enabled Intel Xeons. 39 or 64bit enabled Intel Xeons.
@@ -46,10 +49,12 @@ config X86_SPEEDSTEP_CENTRINO_ACPI
46 49
47config X86_ACPI_CPUFREQ 50config X86_ACPI_CPUFREQ
48 tristate "ACPI Processor P-States driver" 51 tristate "ACPI Processor P-States driver"
52 select CPU_FREQ_TABLE
49 depends on ACPI_PROCESSOR 53 depends on ACPI_PROCESSOR
50 help 54 help
51 This driver adds a CPUFreq driver which utilizes the ACPI 55 This driver adds a CPUFreq driver which utilizes the ACPI
52 Processor Performance States. 56 Processor Performance States.
57 This driver also supports Intel Enhanced Speedstep.
53 58
54 For details, take a look at <file:Documentation/cpu-freq/>. 59 For details, take a look at <file:Documentation/cpu-freq/>.
55 60
diff --git a/arch/x86_64/kernel/cpufreq/Makefile b/arch/x86_64/kernel/cpufreq/Makefile
index d8b593879224..753ce1dd418e 100644
--- a/arch/x86_64/kernel/cpufreq/Makefile
+++ b/arch/x86_64/kernel/cpufreq/Makefile
@@ -5,8 +5,8 @@
5SRCDIR := ../../../i386/kernel/cpu/cpufreq 5SRCDIR := ../../../i386/kernel/cpu/cpufreq
6 6
7obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o 7obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
8obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o
9obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o 8obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
9obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o
10obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o 10obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o
11obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o 11obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o
12 12
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c
index 3525f884af82..95a7a2c13131 100644
--- a/arch/x86_64/kernel/crash.c
+++ b/arch/x86_64/kernel/crash.c
@@ -28,71 +28,6 @@
28/* This keeps a track of which one is crashing cpu. */ 28/* This keeps a track of which one is crashing cpu. */
29static int crashing_cpu; 29static int crashing_cpu;
30 30
31static u32 *append_elf_note(u32 *buf, char *name, unsigned type,
32 void *data, size_t data_len)
33{
34 struct elf_note note;
35
36 note.n_namesz = strlen(name) + 1;
37 note.n_descsz = data_len;
38 note.n_type = type;
39 memcpy(buf, &note, sizeof(note));
40 buf += (sizeof(note) +3)/4;
41 memcpy(buf, name, note.n_namesz);
42 buf += (note.n_namesz + 3)/4;
43 memcpy(buf, data, note.n_descsz);
44 buf += (note.n_descsz + 3)/4;
45
46 return buf;
47}
48
49static void final_note(u32 *buf)
50{
51 struct elf_note note;
52
53 note.n_namesz = 0;
54 note.n_descsz = 0;
55 note.n_type = 0;
56 memcpy(buf, &note, sizeof(note));
57}
58
59static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
60{
61 struct elf_prstatus prstatus;
62 u32 *buf;
63
64 if ((cpu < 0) || (cpu >= NR_CPUS))
65 return;
66
67 /* Using ELF notes here is opportunistic.
68 * I need a well defined structure format
69 * for the data I pass, and I need tags
70 * on the data to indicate what information I have
71 * squirrelled away. ELF notes happen to provide
72 * all of that, no need to invent something new.
73 */
74
75 buf = (u32*)per_cpu_ptr(crash_notes, cpu);
76
77 if (!buf)
78 return;
79
80 memset(&prstatus, 0, sizeof(prstatus));
81 prstatus.pr_pid = current->pid;
82 elf_core_copy_regs(&prstatus.pr_reg, regs);
83 buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
84 sizeof(prstatus));
85 final_note(buf);
86}
87
88static void crash_save_self(struct pt_regs *regs)
89{
90 int cpu;
91
92 cpu = smp_processor_id();
93 crash_save_this_cpu(regs, cpu);
94}
95
96#ifdef CONFIG_SMP 31#ifdef CONFIG_SMP
97static atomic_t waiting_for_crash_ipi; 32static atomic_t waiting_for_crash_ipi;
98 33
@@ -117,7 +52,7 @@ static int crash_nmi_callback(struct notifier_block *self,
117 return NOTIFY_STOP; 52 return NOTIFY_STOP;
118 local_irq_disable(); 53 local_irq_disable();
119 54
120 crash_save_this_cpu(regs, cpu); 55 crash_save_cpu(regs, cpu);
121 disable_local_APIC(); 56 disable_local_APIC();
122 atomic_dec(&waiting_for_crash_ipi); 57 atomic_dec(&waiting_for_crash_ipi);
123 /* Assume hlt works */ 58 /* Assume hlt works */
@@ -196,5 +131,5 @@ void machine_crash_shutdown(struct pt_regs *regs)
196 131
197 disable_IO_APIC(); 132 disable_IO_APIC();
198 133
199 crash_save_self(regs); 134 crash_save_cpu(regs, smp_processor_id());
200} 135}
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index 68273bff58cc..49802f1bee94 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -76,6 +76,18 @@ static void ati_bugs(void)
76 } 76 }
77} 77}
78 78
79static void intel_bugs(void)
80{
81 u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
82
83#ifdef CONFIG_SMP
84 if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
85 device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
86 device == PCI_DEVICE_ID_INTEL_E7525_MCH)
87 quirk_intel_irqbalance();
88#endif
89}
90
79struct chipset { 91struct chipset {
80 u16 vendor; 92 u16 vendor;
81 void (*f)(void); 93 void (*f)(void);
@@ -85,6 +97,7 @@ static struct chipset early_qrk[] = {
85 { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, 97 { PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
86 { PCI_VENDOR_ID_VIA, via_bugs }, 98 { PCI_VENDOR_ID_VIA, via_bugs },
87 { PCI_VENDOR_ID_ATI, ati_bugs }, 99 { PCI_VENDOR_ID_ATI, ati_bugs },
100 { PCI_VENDOR_ID_INTEL, intel_bugs},
88 {} 101 {}
89}; 102};
90 103
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 7d401b00d822..9f5dac64aa8f 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -230,7 +230,6 @@ ENTRY(system_call)
230 CFI_REL_OFFSET rip,RIP-ARGOFFSET 230 CFI_REL_OFFSET rip,RIP-ARGOFFSET
231 GET_THREAD_INFO(%rcx) 231 GET_THREAD_INFO(%rcx)
232 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) 232 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
233 CFI_REMEMBER_STATE
234 jnz tracesys 233 jnz tracesys
235 cmpq $__NR_syscall_max,%rax 234 cmpq $__NR_syscall_max,%rax
236 ja badsys 235 ja badsys
@@ -241,7 +240,6 @@ ENTRY(system_call)
241 * Syscall return path ending with SYSRET (fast path) 240 * Syscall return path ending with SYSRET (fast path)
242 * Has incomplete stack frame and undefined top of stack. 241 * Has incomplete stack frame and undefined top of stack.
243 */ 242 */
244 .globl ret_from_sys_call
245ret_from_sys_call: 243ret_from_sys_call:
246 movl $_TIF_ALLWORK_MASK,%edi 244 movl $_TIF_ALLWORK_MASK,%edi
247 /* edi: flagmask */ 245 /* edi: flagmask */
@@ -251,8 +249,8 @@ sysret_check:
251 TRACE_IRQS_OFF 249 TRACE_IRQS_OFF
252 movl threadinfo_flags(%rcx),%edx 250 movl threadinfo_flags(%rcx),%edx
253 andl %edi,%edx 251 andl %edi,%edx
254 CFI_REMEMBER_STATE
255 jnz sysret_careful 252 jnz sysret_careful
253 CFI_REMEMBER_STATE
256 /* 254 /*
257 * sysretq will re-enable interrupts: 255 * sysretq will re-enable interrupts:
258 */ 256 */
@@ -265,10 +263,10 @@ sysret_check:
265 swapgs 263 swapgs
266 sysretq 264 sysretq
267 265
266 CFI_RESTORE_STATE
268 /* Handle reschedules */ 267 /* Handle reschedules */
269 /* edx: work, edi: workmask */ 268 /* edx: work, edi: workmask */
270sysret_careful: 269sysret_careful:
271 CFI_RESTORE_STATE
272 bt $TIF_NEED_RESCHED,%edx 270 bt $TIF_NEED_RESCHED,%edx
273 jnc sysret_signal 271 jnc sysret_signal
274 TRACE_IRQS_ON 272 TRACE_IRQS_ON
@@ -306,7 +304,6 @@ badsys:
306 304
307 /* Do syscall tracing */ 305 /* Do syscall tracing */
308tracesys: 306tracesys:
309 CFI_RESTORE_STATE
310 SAVE_REST 307 SAVE_REST
311 movq $-ENOSYS,RAX(%rsp) 308 movq $-ENOSYS,RAX(%rsp)
312 FIXUP_TOP_OF_STACK %rdi 309 FIXUP_TOP_OF_STACK %rdi
@@ -322,32 +319,13 @@ tracesys:
322 call *sys_call_table(,%rax,8) 319 call *sys_call_table(,%rax,8)
3231: movq %rax,RAX-ARGOFFSET(%rsp) 3201: movq %rax,RAX-ARGOFFSET(%rsp)
324 /* Use IRET because user could have changed frame */ 321 /* Use IRET because user could have changed frame */
325 jmp int_ret_from_sys_call
326 CFI_ENDPROC
327END(system_call)
328 322
329/* 323/*
330 * Syscall return path ending with IRET. 324 * Syscall return path ending with IRET.
331 * Has correct top of stack, but partial stack frame. 325 * Has correct top of stack, but partial stack frame.
332 */ 326 */
333ENTRY(int_ret_from_sys_call) 327 .globl int_ret_from_sys_call
334 CFI_STARTPROC simple 328int_ret_from_sys_call:
335 CFI_SIGNAL_FRAME
336 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
337 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
338 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
339 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
340 /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
341 CFI_REL_OFFSET rip,RIP-ARGOFFSET
342 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
343 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
344 CFI_REL_OFFSET rax,RAX-ARGOFFSET
345 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
346 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
347 CFI_REL_OFFSET r8,R8-ARGOFFSET
348 CFI_REL_OFFSET r9,R9-ARGOFFSET
349 CFI_REL_OFFSET r10,R10-ARGOFFSET
350 CFI_REL_OFFSET r11,R11-ARGOFFSET
351 cli 329 cli
352 TRACE_IRQS_OFF 330 TRACE_IRQS_OFF
353 testl $3,CS-ARGOFFSET(%rsp) 331 testl $3,CS-ARGOFFSET(%rsp)
@@ -394,8 +372,6 @@ int_very_careful:
394 popq %rdi 372 popq %rdi
395 CFI_ADJUST_CFA_OFFSET -8 373 CFI_ADJUST_CFA_OFFSET -8
396 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi 374 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
397 cli
398 TRACE_IRQS_OFF
399 jmp int_restore_rest 375 jmp int_restore_rest
400 376
401int_signal: 377int_signal:
@@ -411,7 +387,7 @@ int_restore_rest:
411 TRACE_IRQS_OFF 387 TRACE_IRQS_OFF
412 jmp int_with_check 388 jmp int_with_check
413 CFI_ENDPROC 389 CFI_ENDPROC
414END(int_ret_from_sys_call) 390END(system_call)
415 391
416/* 392/*
417 * Certain special system calls that need to save a complete full stack frame. 393 * Certain special system calls that need to save a complete full stack frame.
@@ -1179,36 +1155,3 @@ ENTRY(call_softirq)
1179 ret 1155 ret
1180 CFI_ENDPROC 1156 CFI_ENDPROC
1181ENDPROC(call_softirq) 1157ENDPROC(call_softirq)
1182
1183#ifdef CONFIG_STACK_UNWIND
1184ENTRY(arch_unwind_init_running)
1185 CFI_STARTPROC
1186 movq %r15, R15(%rdi)
1187 movq %r14, R14(%rdi)
1188 xchgq %rsi, %rdx
1189 movq %r13, R13(%rdi)
1190 movq %r12, R12(%rdi)
1191 xorl %eax, %eax
1192 movq %rbp, RBP(%rdi)
1193 movq %rbx, RBX(%rdi)
1194 movq (%rsp), %rcx
1195 movq %rax, R11(%rdi)
1196 movq %rax, R10(%rdi)
1197 movq %rax, R9(%rdi)
1198 movq %rax, R8(%rdi)
1199 movq %rax, RAX(%rdi)
1200 movq %rax, RCX(%rdi)
1201 movq %rax, RDX(%rdi)
1202 movq %rax, RSI(%rdi)
1203 movq %rax, RDI(%rdi)
1204 movq %rax, ORIG_RAX(%rdi)
1205 movq %rcx, RIP(%rdi)
1206 leaq 8(%rsp), %rcx
1207 movq $__KERNEL_CS, CS(%rdi)
1208 movq %rax, EFLAGS(%rdi)
1209 movq %rcx, RSP(%rdi)
1210 movq $__KERNEL_DS, SS(%rdi)
1211 jmpq *%rdx
1212 CFI_ENDPROC
1213ENDPROC(arch_unwind_init_running)
1214#endif
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 8e78a75d1866..b007433f96bb 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -33,7 +33,7 @@ extern struct genapic apic_flat;
33extern struct genapic apic_physflat; 33extern struct genapic apic_physflat;
34 34
35struct genapic *genapic = &apic_flat; 35struct genapic *genapic = &apic_flat;
36 36struct genapic *genapic_force;
37 37
38/* 38/*
39 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 39 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
@@ -46,6 +46,13 @@ void __init clustered_apic_check(void)
46 u8 cluster_cnt[NUM_APIC_CLUSTERS]; 46 u8 cluster_cnt[NUM_APIC_CLUSTERS];
47 int max_apic = 0; 47 int max_apic = 0;
48 48
49 /* genapic selection can be forced because of certain quirks.
50 */
51 if (genapic_force) {
52 genapic = genapic_force;
53 goto print;
54 }
55
49#if defined(CONFIG_ACPI) 56#if defined(CONFIG_ACPI)
50 /* 57 /*
51 * Some x86_64 machines use physical APIC mode regardless of how many 58 * Some x86_64 machines use physical APIC mode regardless of how many
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 9561eb3c5b5c..cc230b93cd1c 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -57,10 +57,12 @@ void __init x86_64_start_kernel(char * real_mode_data)
57{ 57{
58 int i; 58 int i;
59 59
60 for (i = 0; i < 256; i++) 60 /* clear bss before set_intr_gate with early_idt_handler */
61 clear_bss();
62
63 for (i = 0; i < IDT_ENTRIES; i++)
61 set_intr_gate(i, early_idt_handler); 64 set_intr_gate(i, early_idt_handler);
62 asm volatile("lidt %0" :: "m" (idt_descr)); 65 asm volatile("lidt %0" :: "m" (idt_descr));
63 clear_bss();
64 66
65 early_printk("Kernel alive\n"); 67 early_printk("Kernel alive\n");
66 68
diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c
index 3aa1e9bb781d..1d58c13bc6bc 100644
--- a/arch/x86_64/kernel/i387.c
+++ b/arch/x86_64/kernel/i387.c
@@ -82,11 +82,8 @@ int save_i387(struct _fpstate __user *buf)
82 struct task_struct *tsk = current; 82 struct task_struct *tsk = current;
83 int err = 0; 83 int err = 0;
84 84
85 { 85 BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
86 extern void bad_user_i387_struct(void); 86 sizeof(tsk->thread.i387.fxsave));
87 if (sizeof(struct user_i387_struct) != sizeof(tsk->thread.i387.fxsave))
88 bad_user_i387_struct();
89 }
90 87
91 if ((unsigned long)buf % 16) 88 if ((unsigned long)buf % 16)
92 printk("save_i387: bad fpstate %p\n",buf); 89 printk("save_i387: bad fpstate %p\n",buf);
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index c4ef801b765b..d73c79e821f1 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -76,7 +76,8 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
76 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ 76 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
77 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) 77 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
78 78
79void (*interrupt[NR_IRQS])(void) = { 79/* for the irq vectors */
80static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
80 IRQLIST_16(0x2), IRQLIST_16(0x3), 81 IRQLIST_16(0x2), IRQLIST_16(0x3),
81 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), 82 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
82 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), 83 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index c80081a6ba41..d7bad90a5ad8 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -178,14 +178,20 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
178 * the interrupt, and we need to make sure the entry is fully populated 178 * the interrupt, and we need to make sure the entry is fully populated
179 * before that happens. 179 * before that happens.
180 */ 180 */
181static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 181static void
182__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
182{ 183{
183 unsigned long flags;
184 union entry_union eu; 184 union entry_union eu;
185 eu.entry = e; 185 eu.entry = e;
186 spin_lock_irqsave(&ioapic_lock, flags);
187 io_apic_write(apic, 0x11 + 2*pin, eu.w2); 186 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
188 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 187 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
188}
189
190static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
191{
192 unsigned long flags;
193 spin_lock_irqsave(&ioapic_lock, flags);
194 __ioapic_write_entry(apic, pin, e);
189 spin_unlock_irqrestore(&ioapic_lock, flags); 195 spin_unlock_irqrestore(&ioapic_lock, flags);
190} 196}
191 197
@@ -750,6 +756,22 @@ static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
750 return vector; 756 return vector;
751} 757}
752 758
759static void __clear_irq_vector(int irq)
760{
761 cpumask_t mask;
762 int cpu, vector;
763
764 BUG_ON(!irq_vector[irq]);
765
766 vector = irq_vector[irq];
767 cpus_and(mask, irq_domain[irq], cpu_online_map);
768 for_each_cpu_mask(cpu, mask)
769 per_cpu(vector_irq, cpu)[vector] = -1;
770
771 irq_vector[irq] = 0;
772 irq_domain[irq] = CPU_MASK_NONE;
773}
774
753void __setup_vector_irq(int cpu) 775void __setup_vector_irq(int cpu)
754{ 776{
755 /* Initialize vector_irq on a new cpu */ 777 /* Initialize vector_irq on a new cpu */
@@ -794,27 +816,65 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
794 handle_edge_irq, "edge"); 816 handle_edge_irq, "edge");
795 } 817 }
796} 818}
797 819static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
798static void __init setup_IO_APIC_irqs(void)
799{ 820{
800 struct IO_APIC_route_entry entry; 821 struct IO_APIC_route_entry entry;
801 int apic, pin, idx, irq, first_notcon = 1, vector; 822 int vector;
802 unsigned long flags; 823 unsigned long flags;
803 824
804 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
805 825
806 for (apic = 0; apic < nr_ioapics; apic++) { 826 /*
807 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 827 * add it to the IO-APIC irq-routing table:
828 */
829 memset(&entry,0,sizeof(entry));
808 830
809 /* 831 entry.delivery_mode = INT_DELIVERY_MODE;
810 * add it to the IO-APIC irq-routing table: 832 entry.dest_mode = INT_DEST_MODE;
811 */ 833 entry.mask = 0; /* enable IRQ */
812 memset(&entry,0,sizeof(entry)); 834 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
813 835
814 entry.delivery_mode = INT_DELIVERY_MODE; 836 entry.trigger = irq_trigger(idx);
815 entry.dest_mode = INT_DEST_MODE; 837 entry.polarity = irq_polarity(idx);
816 entry.mask = 0; /* enable IRQ */ 838
839 if (irq_trigger(idx)) {
840 entry.trigger = 1;
841 entry.mask = 1;
817 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); 842 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
843 }
844
845 if (!apic && !IO_APIC_IRQ(irq))
846 return;
847
848 if (IO_APIC_IRQ(irq)) {
849 cpumask_t mask;
850 vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
851 if (vector < 0)
852 return;
853
854 entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
855 entry.vector = vector;
856
857 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
858 if (!apic && (irq < 16))
859 disable_8259A_irq(irq);
860 }
861
862 ioapic_write_entry(apic, pin, entry);
863
864 spin_lock_irqsave(&ioapic_lock, flags);
865 set_native_irq_info(irq, TARGET_CPUS);
866 spin_unlock_irqrestore(&ioapic_lock, flags);
867
868}
869
870static void __init setup_IO_APIC_irqs(void)
871{
872 int apic, pin, idx, irq, first_notcon = 1;
873
874 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
875
876 for (apic = 0; apic < nr_ioapics; apic++) {
877 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
818 878
819 idx = find_irq_entry(apic,pin,mp_INT); 879 idx = find_irq_entry(apic,pin,mp_INT);
820 if (idx == -1) { 880 if (idx == -1) {
@@ -826,39 +886,11 @@ static void __init setup_IO_APIC_irqs(void)
826 continue; 886 continue;
827 } 887 }
828 888
829 entry.trigger = irq_trigger(idx);
830 entry.polarity = irq_polarity(idx);
831
832 if (irq_trigger(idx)) {
833 entry.trigger = 1;
834 entry.mask = 1;
835 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
836 }
837
838 irq = pin_2_irq(idx, apic, pin); 889 irq = pin_2_irq(idx, apic, pin);
839 add_pin_to_irq(irq, apic, pin); 890 add_pin_to_irq(irq, apic, pin);
840 891
841 if (!apic && !IO_APIC_IRQ(irq)) 892 setup_IO_APIC_irq(apic, pin, idx, irq);
842 continue;
843
844 if (IO_APIC_IRQ(irq)) {
845 cpumask_t mask;
846 vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
847 if (vector < 0)
848 continue;
849
850 entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
851 entry.vector = vector;
852 893
853 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
854 if (!apic && (irq < 16))
855 disable_8259A_irq(irq);
856 }
857 ioapic_write_entry(apic, pin, entry);
858
859 spin_lock_irqsave(&ioapic_lock, flags);
860 set_native_irq_info(irq, TARGET_CPUS);
861 spin_unlock_irqrestore(&ioapic_lock, flags);
862 } 894 }
863 } 895 }
864 896
@@ -1837,7 +1869,7 @@ void destroy_irq(unsigned int irq)
1837 dynamic_irq_cleanup(irq); 1869 dynamic_irq_cleanup(irq);
1838 1870
1839 spin_lock_irqsave(&vector_lock, flags); 1871 spin_lock_irqsave(&vector_lock, flags);
1840 irq_vector[irq] = 0; 1872 __clear_irq_vector(irq);
1841 spin_unlock_irqrestore(&vector_lock, flags); 1873 spin_unlock_irqrestore(&vector_lock, flags);
1842} 1874}
1843 1875
@@ -2139,7 +2171,15 @@ void __init setup_ioapic_dest(void)
2139 if (irq_entry == -1) 2171 if (irq_entry == -1)
2140 continue; 2172 continue;
2141 irq = pin_2_irq(irq_entry, ioapic, pin); 2173 irq = pin_2_irq(irq_entry, ioapic, pin);
2142 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2174
2175 /* setup_IO_APIC_irqs could fail to get vector for some device
2176 * when you have too many devices, because at that time only boot
2177 * cpu is online.
2178 */
2179 if(!irq_vector[irq])
2180 setup_IO_APIC_irq(ioapic, pin, irq_entry, irq);
2181 else
2182 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2143 } 2183 }
2144 2184
2145 } 2185 }
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index e46c55856d40..0c06af6c13bc 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
120 120
121 if (likely(irq < NR_IRQS)) 121 if (likely(irq < NR_IRQS))
122 generic_handle_irq(irq); 122 generic_handle_irq(irq);
123 else 123 else if (printk_ratelimit())
124 printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", 124 printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n",
125 __func__, smp_processor_id(), vector); 125 __func__, smp_processor_id(), vector);
126 126
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index ac241567e682..209c8c0bec71 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -224,7 +224,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
224void __kprobes arch_remove_kprobe(struct kprobe *p) 224void __kprobes arch_remove_kprobe(struct kprobe *p)
225{ 225{
226 mutex_lock(&kprobe_mutex); 226 mutex_lock(&kprobe_mutex);
227 free_insn_slot(p->ainsn.insn); 227 free_insn_slot(p->ainsn.insn, 0);
228 mutex_unlock(&kprobe_mutex); 228 mutex_unlock(&kprobe_mutex);
229} 229}
230 230
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index bbea88801d88..ac085038af29 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -306,8 +306,8 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
306 */ 306 */
307 307
308static int check_interval = 5 * 60; /* 5 minutes */ 308static int check_interval = 5 * 60; /* 5 minutes */
309static void mcheck_timer(void *data); 309static void mcheck_timer(struct work_struct *work);
310static DECLARE_WORK(mcheck_work, mcheck_timer, NULL); 310static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
311 311
312static void mcheck_check_cpu(void *info) 312static void mcheck_check_cpu(void *info)
313{ 313{
@@ -315,7 +315,7 @@ static void mcheck_check_cpu(void *info)
315 do_machine_check(NULL, 0); 315 do_machine_check(NULL, 0);
316} 316}
317 317
318static void mcheck_timer(void *data) 318static void mcheck_timer(struct work_struct *work)
319{ 319{
320 on_each_cpu(mcheck_check_cpu, NULL, 1, 1); 320 on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
321 schedule_delayed_work(&mcheck_work, check_interval * HZ); 321 schedule_delayed_work(&mcheck_work, check_interval * HZ);
@@ -641,7 +641,6 @@ static __cpuinit int mce_create_device(unsigned int cpu)
641 return err; 641 return err;
642} 642}
643 643
644#ifdef CONFIG_HOTPLUG_CPU
645static void mce_remove_device(unsigned int cpu) 644static void mce_remove_device(unsigned int cpu)
646{ 645{
647 int i; 646 int i;
@@ -652,6 +651,7 @@ static void mce_remove_device(unsigned int cpu)
652 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); 651 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
653 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); 652 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
654 sysdev_unregister(&per_cpu(device_mce,cpu)); 653 sysdev_unregister(&per_cpu(device_mce,cpu));
654 memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
655} 655}
656 656
657/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 657/* Get notified when a cpu comes on/off. Be hotplug friendly. */
@@ -674,7 +674,6 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
674static struct notifier_block mce_cpu_notifier = { 674static struct notifier_block mce_cpu_notifier = {
675 .notifier_call = mce_cpu_callback, 675 .notifier_call = mce_cpu_callback,
676}; 676};
677#endif
678 677
679static __init int mce_init_device(void) 678static __init int mce_init_device(void)
680{ 679{
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index 883fe747f64c..fa09debad4b7 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -551,7 +551,6 @@ out:
551 return err; 551 return err;
552} 552}
553 553
554#ifdef CONFIG_HOTPLUG_CPU
555/* 554/*
556 * let's be hotplug friendly. 555 * let's be hotplug friendly.
557 * in case of multiple core processors, the first core always takes ownership 556 * in case of multiple core processors, the first core always takes ownership
@@ -594,12 +593,14 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
594 593
595 sprintf(name, "threshold_bank%i", bank); 594 sprintf(name, "threshold_bank%i", bank);
596 595
596#ifdef CONFIG_SMP
597 /* sibling symlink */ 597 /* sibling symlink */
598 if (shared_bank[bank] && b->blocks->cpu != cpu) { 598 if (shared_bank[bank] && b->blocks->cpu != cpu) {
599 sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); 599 sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
600 per_cpu(threshold_banks, cpu)[bank] = NULL; 600 per_cpu(threshold_banks, cpu)[bank] = NULL;
601 return; 601 return;
602 } 602 }
603#endif
603 604
604 /* remove all sibling symlinks before unregistering */ 605 /* remove all sibling symlinks before unregistering */
605 for_each_cpu_mask(i, b->cpus) { 606 for_each_cpu_mask(i, b->cpus) {
@@ -656,7 +657,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
656static struct notifier_block threshold_cpu_notifier = { 657static struct notifier_block threshold_cpu_notifier = {
657 .notifier_call = threshold_cpu_callback, 658 .notifier_call = threshold_cpu_callback,
658}; 659};
659#endif /* CONFIG_HOTPLUG_CPU */
660 660
661static __init int threshold_init_device(void) 661static __init int threshold_init_device(void)
662{ 662{
diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c
index 9d0958ff547f..a888e67f5874 100644
--- a/arch/x86_64/kernel/module.c
+++ b/arch/x86_64/kernel/module.c
@@ -23,6 +23,7 @@
23#include <linux/string.h> 23#include <linux/string.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/bug.h>
26 27
27#include <asm/system.h> 28#include <asm/system.h>
28#include <asm/page.h> 29#include <asm/page.h>
@@ -173,10 +174,12 @@ int module_finalize(const Elf_Ehdr *hdr,
173 lseg, lseg + locks->sh_size, 174 lseg, lseg + locks->sh_size,
174 tseg, tseg + text->sh_size); 175 tseg, tseg + text->sh_size);
175 } 176 }
176 return 0; 177
178 return module_bug_finalize(hdr, sechdrs, me);
177} 179}
178 180
179void module_arch_cleanup(struct module *mod) 181void module_arch_cleanup(struct module *mod)
180{ 182{
181 alternatives_smp_module_del(mod); 183 alternatives_smp_module_del(mod);
184 module_bug_cleanup(mod);
182} 185}
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index b147ab19fbd4..08072568847d 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -35,8 +35,6 @@
35int smp_found_config; 35int smp_found_config;
36unsigned int __initdata maxcpus = NR_CPUS; 36unsigned int __initdata maxcpus = NR_CPUS;
37 37
38int acpi_found_madt;
39
40/* 38/*
41 * Various Linux-internal data structures created from the 39 * Various Linux-internal data structures created from the
42 * MP-table. 40 * MP-table.
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 7af9cb3e2d99..186aebbae32d 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -12,14 +12,15 @@
12 * Mikael Pettersson : PM converted to driver model. Disable/enable API. 12 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
13 */ 13 */
14 14
15#include <linux/nmi.h>
15#include <linux/mm.h> 16#include <linux/mm.h>
16#include <linux/delay.h> 17#include <linux/delay.h>
17#include <linux/interrupt.h> 18#include <linux/interrupt.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/sysdev.h> 20#include <linux/sysdev.h>
20#include <linux/nmi.h>
21#include <linux/sysctl.h> 21#include <linux/sysctl.h>
22#include <linux/kprobes.h> 22#include <linux/kprobes.h>
23#include <linux/cpumask.h>
23 24
24#include <asm/smp.h> 25#include <asm/smp.h>
25#include <asm/nmi.h> 26#include <asm/nmi.h>
@@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi;
41static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); 42static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
42static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); 43static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
43 44
45static cpumask_t backtrace_mask = CPU_MASK_NONE;
46
44/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 47/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
45 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) 48 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
46 */ 49 */
@@ -190,6 +193,8 @@ void nmi_watchdog_default(void)
190 nmi_watchdog = NMI_IO_APIC; 193 nmi_watchdog = NMI_IO_APIC;
191} 194}
192 195
196static int endflag __initdata = 0;
197
193#ifdef CONFIG_SMP 198#ifdef CONFIG_SMP
194/* The performance counters used by NMI_LOCAL_APIC don't trigger when 199/* The performance counters used by NMI_LOCAL_APIC don't trigger when
195 * the CPU is idle. To make sure the NMI watchdog really ticks on all 200 * the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -197,7 +202,6 @@ void nmi_watchdog_default(void)
197 */ 202 */
198static __init void nmi_cpu_busy(void *data) 203static __init void nmi_cpu_busy(void *data)
199{ 204{
200 volatile int *endflag = data;
201 local_irq_enable_in_hardirq(); 205 local_irq_enable_in_hardirq();
202 /* Intentionally don't use cpu_relax here. This is 206 /* Intentionally don't use cpu_relax here. This is
203 to make sure that the performance counter really ticks, 207 to make sure that the performance counter really ticks,
@@ -205,14 +209,13 @@ static __init void nmi_cpu_busy(void *data)
205 pause instruction. On a real HT machine this is fine because 209 pause instruction. On a real HT machine this is fine because
206 all other CPUs are busy with "useless" delay loops and don't 210 all other CPUs are busy with "useless" delay loops and don't
207 care if they get somewhat less cycles. */ 211 care if they get somewhat less cycles. */
208 while (*endflag == 0) 212 while (endflag == 0)
209 barrier(); 213 mb();
210} 214}
211#endif 215#endif
212 216
213int __init check_nmi_watchdog (void) 217int __init check_nmi_watchdog (void)
214{ 218{
215 volatile int endflag = 0;
216 int *counts; 219 int *counts;
217 int cpu; 220 int cpu;
218 221
@@ -253,6 +256,7 @@ int __init check_nmi_watchdog (void)
253 if (!atomic_read(&nmi_active)) { 256 if (!atomic_read(&nmi_active)) {
254 kfree(counts); 257 kfree(counts);
255 atomic_set(&nmi_active, -1); 258 atomic_set(&nmi_active, -1);
259 endflag = 1;
256 return -1; 260 return -1;
257 } 261 }
258 endflag = 1; 262 endflag = 1;
@@ -782,6 +786,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
782{ 786{
783 int sum; 787 int sum;
784 int touched = 0; 788 int touched = 0;
789 int cpu = smp_processor_id();
785 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 790 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
786 u64 dummy; 791 u64 dummy;
787 int rc=0; 792 int rc=0;
@@ -799,6 +804,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
799 touched = 1; 804 touched = 1;
800 } 805 }
801 806
807 if (cpu_isset(cpu, backtrace_mask)) {
808 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
809
810 spin_lock(&lock);
811 printk("NMI backtrace for cpu %d\n", cpu);
812 dump_stack();
813 spin_unlock(&lock);
814 cpu_clear(cpu, backtrace_mask);
815 }
816
802#ifdef CONFIG_X86_MCE 817#ifdef CONFIG_X86_MCE
803 /* Could check oops_in_progress here too, but it's safer 818 /* Could check oops_in_progress here too, but it's safer
804 not too */ 819 not too */
@@ -931,6 +946,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
931 946
932#endif 947#endif
933 948
949void __trigger_all_cpu_backtrace(void)
950{
951 int i;
952
953 backtrace_mask = cpu_online_map;
954 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
955 for (i = 0; i < 10 * 1000; i++) {
956 if (cpus_empty(backtrace_mask))
957 break;
958 mdelay(1);
959 }
960}
961
934EXPORT_SYMBOL(nmi_active); 962EXPORT_SYMBOL(nmi_active);
935EXPORT_SYMBOL(nmi_watchdog); 963EXPORT_SYMBOL(nmi_watchdog);
936EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); 964EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 37a770859e71..3d65b1d4c2b3 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -41,6 +41,13 @@
41#include <asm/pci-direct.h> 41#include <asm/pci-direct.h>
42#include <asm/system.h> 42#include <asm/system.h>
43#include <asm/dma.h> 43#include <asm/dma.h>
44#include <asm/rio.h>
45
46#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
47int use_calgary __read_mostly = 1;
48#else
49int use_calgary __read_mostly = 0;
50#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */
44 51
45#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 52#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
46#define PCI_VENDOR_DEVICE_ID_CALGARY \ 53#define PCI_VENDOR_DEVICE_ID_CALGARY \
@@ -115,14 +122,35 @@ static const unsigned long phb_offsets[] = {
115 0xB000 /* PHB3 */ 122 0xB000 /* PHB3 */
116}; 123};
117 124
125/* PHB debug registers */
126
127static const unsigned long phb_debug_offsets[] = {
128 0x4000 /* PHB 0 DEBUG */,
129 0x5000 /* PHB 1 DEBUG */,
130 0x6000 /* PHB 2 DEBUG */,
131 0x7000 /* PHB 3 DEBUG */
132};
133
134/*
135 * STUFF register for each debug PHB,
136 * byte 1 = start bus number, byte 2 = end bus number
137 */
138
139#define PHB_DEBUG_STUFF_OFFSET 0x0020
140
118unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; 141unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
119static int translate_empty_slots __read_mostly = 0; 142static int translate_empty_slots __read_mostly = 0;
120static int calgary_detected __read_mostly = 0; 143static int calgary_detected __read_mostly = 0;
121 144
145static struct rio_table_hdr *rio_table_hdr __initdata;
146static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
147static struct rio_detail *rio_devs[MAX_NUMNODES * 4] __initdata;
148
122struct calgary_bus_info { 149struct calgary_bus_info {
123 void *tce_space; 150 void *tce_space;
124 unsigned char translation_disabled; 151 unsigned char translation_disabled;
125 signed char phbid; 152 signed char phbid;
153 void __iomem *bbar;
126}; 154};
127 155
128static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; 156static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
@@ -475,6 +503,11 @@ static struct dma_mapping_ops calgary_dma_ops = {
475 .unmap_sg = calgary_unmap_sg, 503 .unmap_sg = calgary_unmap_sg,
476}; 504};
477 505
506static inline void __iomem * busno_to_bbar(unsigned char num)
507{
508 return bus_info[num].bbar;
509}
510
478static inline int busno_to_phbid(unsigned char num) 511static inline int busno_to_phbid(unsigned char num)
479{ 512{
480 return bus_info[num].phbid; 513 return bus_info[num].phbid;
@@ -620,14 +653,9 @@ static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
620static void __init calgary_reserve_regions(struct pci_dev *dev) 653static void __init calgary_reserve_regions(struct pci_dev *dev)
621{ 654{
622 unsigned int npages; 655 unsigned int npages;
623 void __iomem *bbar;
624 unsigned char busnum;
625 u64 start; 656 u64 start;
626 struct iommu_table *tbl = dev->sysdata; 657 struct iommu_table *tbl = dev->sysdata;
627 658
628 bbar = tbl->bbar;
629 busnum = dev->bus->number;
630
631 /* reserve bad_dma_address in case it's a legal address */ 659 /* reserve bad_dma_address in case it's a legal address */
632 iommu_range_reserve(tbl, bad_dma_address, 1); 660 iommu_range_reserve(tbl, bad_dma_address, 1);
633 661
@@ -740,7 +768,7 @@ static void __init calgary_increase_split_completion_timeout(void __iomem *bbar,
740{ 768{
741 u64 val64; 769 u64 val64;
742 void __iomem *target; 770 void __iomem *target;
743 unsigned long phb_shift = -1; 771 unsigned int phb_shift = ~0; /* silence gcc */
744 u64 mask; 772 u64 mask;
745 773
746 switch (busno_to_phbid(busnum)) { 774 switch (busno_to_phbid(busnum)) {
@@ -828,33 +856,6 @@ static void __init calgary_disable_translation(struct pci_dev *dev)
828 del_timer_sync(&tbl->watchdog_timer); 856 del_timer_sync(&tbl->watchdog_timer);
829} 857}
830 858
831static inline unsigned int __init locate_register_space(struct pci_dev *dev)
832{
833 int rionodeid;
834 u32 address;
835
836 /*
837 * Each Calgary has four busses. The first four busses (first Calgary)
838 * have RIO node ID 2, then the next four (second Calgary) have RIO
839 * node ID 3, the next four (third Calgary) have node ID 2 again, etc.
840 * We use a gross hack - relying on the dev->bus->number ordering,
841 * modulo 14 - to decide which Calgary a given bus is on. Busses 0, 1,
842 * 2 and 4 are on the first Calgary (id 2), 6, 8, a and c are on the
843 * second (id 3), and then it repeats modulo 14.
844 */
845 rionodeid = (dev->bus->number % 14 > 4) ? 3 : 2;
846 /*
847 * register space address calculation as follows:
848 * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
849 * ChassisBase is always zero for x366/x260/x460
850 * RioNodeId is 2 for first Calgary, 3 for second Calgary
851 */
852 address = START_ADDRESS -
853 (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 14)) +
854 (0x100000) * (rionodeid - CHASSIS_BASE);
855 return address;
856}
857
858static void __init calgary_init_one_nontraslated(struct pci_dev *dev) 859static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
859{ 860{
860 pci_dev_get(dev); 861 pci_dev_get(dev);
@@ -864,23 +865,15 @@ static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
864 865
865static int __init calgary_init_one(struct pci_dev *dev) 866static int __init calgary_init_one(struct pci_dev *dev)
866{ 867{
867 u32 address;
868 void __iomem *bbar; 868 void __iomem *bbar;
869 int ret; 869 int ret;
870 870
871 BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); 871 BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM);
872 872
873 address = locate_register_space(dev); 873 bbar = busno_to_bbar(dev->bus->number);
874 /* map entire 1MB of Calgary config space */
875 bbar = ioremap_nocache(address, 1024 * 1024);
876 if (!bbar) {
877 ret = -ENODATA;
878 goto done;
879 }
880
881 ret = calgary_setup_tar(dev, bbar); 874 ret = calgary_setup_tar(dev, bbar);
882 if (ret) 875 if (ret)
883 goto iounmap; 876 goto done;
884 877
885 pci_dev_get(dev); 878 pci_dev_get(dev);
886 dev->bus->self = dev; 879 dev->bus->self = dev;
@@ -888,17 +881,66 @@ static int __init calgary_init_one(struct pci_dev *dev)
888 881
889 return 0; 882 return 0;
890 883
891iounmap:
892 iounmap(bbar);
893done: 884done:
894 return ret; 885 return ret;
895} 886}
896 887
888static int __init calgary_locate_bbars(void)
889{
890 int ret;
891 int rioidx, phb, bus;
892 void __iomem *bbar;
893 void __iomem *target;
894 unsigned long offset;
895 u8 start_bus, end_bus;
896 u32 val;
897
898 ret = -ENODATA;
899 for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) {
900 struct rio_detail *rio = rio_devs[rioidx];
901
902 if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY))
903 continue;
904
905 /* map entire 1MB of Calgary config space */
906 bbar = ioremap_nocache(rio->BBAR, 1024 * 1024);
907 if (!bbar)
908 goto error;
909
910 for (phb = 0; phb < PHBS_PER_CALGARY; phb++) {
911 offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET;
912 target = calgary_reg(bbar, offset);
913
914 val = be32_to_cpu(readl(target));
915 start_bus = (u8)((val & 0x00FF0000) >> 16);
916 end_bus = (u8)((val & 0x0000FF00) >> 8);
917 for (bus = start_bus; bus <= end_bus; bus++) {
918 bus_info[bus].bbar = bbar;
919 bus_info[bus].phbid = phb;
920 }
921 }
922 }
923
924 return 0;
925
926error:
927 /* scan bus_info and iounmap any bbars we previously ioremap'd */
928 for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++)
929 if (bus_info[bus].bbar)
930 iounmap(bus_info[bus].bbar);
931
932 return ret;
933}
934
897static int __init calgary_init(void) 935static int __init calgary_init(void)
898{ 936{
899 int ret = -ENODEV; 937 int ret;
900 struct pci_dev *dev = NULL; 938 struct pci_dev *dev = NULL;
901 939
940 ret = calgary_locate_bbars();
941 if (ret)
942 return ret;
943
902 do { 944 do {
903 dev = pci_get_device(PCI_VENDOR_ID_IBM, 945 dev = pci_get_device(PCI_VENDOR_ID_IBM,
904 PCI_DEVICE_ID_IBM_CALGARY, 946 PCI_DEVICE_ID_IBM_CALGARY,
@@ -921,7 +963,7 @@ static int __init calgary_init(void)
921 963
922error: 964error:
923 do { 965 do {
924 dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM, 966 dev = pci_get_device_reverse(PCI_VENDOR_ID_IBM,
925 PCI_DEVICE_ID_IBM_CALGARY, 967 PCI_DEVICE_ID_IBM_CALGARY,
926 dev); 968 dev);
927 if (!dev) 969 if (!dev)
@@ -962,13 +1004,56 @@ static inline int __init determine_tce_table_size(u64 ram)
962 return ret; 1004 return ret;
963} 1005}
964 1006
1007static int __init build_detail_arrays(void)
1008{
1009 unsigned long ptr;
1010 int i, scal_detail_size, rio_detail_size;
1011
1012 if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
1013 printk(KERN_WARNING
1014 "Calgary: MAX_NUMNODES too low! Defined as %d, "
1015 "but system has %d nodes.\n",
1016 MAX_NUMNODES, rio_table_hdr->num_scal_dev);
1017 return -ENODEV;
1018 }
1019
1020 switch (rio_table_hdr->version){
1021 case 2:
1022 scal_detail_size = 11;
1023 rio_detail_size = 13;
1024 break;
1025 case 3:
1026 scal_detail_size = 12;
1027 rio_detail_size = 15;
1028 break;
1029 default:
1030 printk(KERN_WARNING
1031 "Calgary: Invalid Rio Grande Table Version: %d\n",
1032 rio_table_hdr->version);
1033 return -EPROTO;
1034 }
1035
1036 ptr = ((unsigned long)rio_table_hdr) + 3;
1037 for (i = 0; i < rio_table_hdr->num_scal_dev;
1038 i++, ptr += scal_detail_size)
1039 scal_devs[i] = (struct scal_detail *)ptr;
1040
1041 for (i = 0; i < rio_table_hdr->num_rio_dev;
1042 i++, ptr += rio_detail_size)
1043 rio_devs[i] = (struct rio_detail *)ptr;
1044
1045 return 0;
1046}
1047
965void __init detect_calgary(void) 1048void __init detect_calgary(void)
966{ 1049{
967 u32 val; 1050 u32 val;
968 int bus; 1051 int bus;
969 void *tbl; 1052 void *tbl;
970 int calgary_found = 0; 1053 int calgary_found = 0;
971 int phb = -1; 1054 unsigned long ptr;
1055 unsigned int offset, prev_offset;
1056 int ret;
972 1057
973 /* 1058 /*
974 * if the user specified iommu=off or iommu=soft or we found 1059 * if the user specified iommu=off or iommu=soft or we found
@@ -977,25 +1062,54 @@ void __init detect_calgary(void)
977 if (swiotlb || no_iommu || iommu_detected) 1062 if (swiotlb || no_iommu || iommu_detected)
978 return; 1063 return;
979 1064
1065 if (!use_calgary)
1066 return;
1067
980 if (!early_pci_allowed()) 1068 if (!early_pci_allowed())
981 return; 1069 return;
982 1070
1071 printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n");
1072
1073 ptr = (unsigned long)phys_to_virt(get_bios_ebda());
1074
1075 rio_table_hdr = NULL;
1076 prev_offset = 0;
1077 offset = 0x180;
1078 /*
1079 * The next offset is stored in the 1st word.
1080 * Only parse up until the offset increases:
1081 */
1082 while (offset > prev_offset) {
1083 /* The block id is stored in the 2nd word */
1084 if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
1085 /* set the pointer past the offset & block id */
1086 rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
1087 break;
1088 }
1089 prev_offset = offset;
1090 offset = *((unsigned short *)(ptr + offset));
1091 }
1092 if (!rio_table_hdr) {
1093 printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table "
1094 "in EBDA - bailing!\n");
1095 return;
1096 }
1097
1098 ret = build_detail_arrays();
1099 if (ret) {
1100 printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret);
1101 return;
1102 }
1103
983 specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); 1104 specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
984 1105
985 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { 1106 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
986 int dev; 1107 int dev;
987 struct calgary_bus_info *info = &bus_info[bus]; 1108 struct calgary_bus_info *info = &bus_info[bus];
988 info->phbid = -1;
989 1109
990 if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) 1110 if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
991 continue; 1111 continue;
992 1112
993 /*
994 * There are 4 PHBs per Calgary chip. Set phb to which phb (0-3)
995 * it is connected to releative to the clagary chip.
996 */
997 phb = (phb + 1) % PHBS_PER_CALGARY;
998
999 if (info->translation_disabled) 1113 if (info->translation_disabled)
1000 continue; 1114 continue;
1001 1115
@@ -1010,13 +1124,15 @@ void __init detect_calgary(void)
1010 if (!tbl) 1124 if (!tbl)
1011 goto cleanup; 1125 goto cleanup;
1012 info->tce_space = tbl; 1126 info->tce_space = tbl;
1013 info->phbid = phb;
1014 calgary_found = 1; 1127 calgary_found = 1;
1015 break; 1128 break;
1016 } 1129 }
1017 } 1130 }
1018 } 1131 }
1019 1132
1133 printk(KERN_DEBUG "Calgary: finished detection, Calgary %s\n",
1134 calgary_found ? "found" : "not found");
1135
1020 if (calgary_found) { 1136 if (calgary_found) {
1021 iommu_detected = 1; 1137 iommu_detected = 1;
1022 calgary_detected = 1; 1138 calgary_detected = 1;
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index f8d857453f8a..683b7a5c1ab3 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -296,6 +296,11 @@ __init int iommu_setup(char *p)
296 gart_parse_options(p); 296 gart_parse_options(p);
297#endif 297#endif
298 298
299#ifdef CONFIG_CALGARY_IOMMU
300 if (!strncmp(p, "calgary", 7))
301 use_calgary = 1;
302#endif /* CONFIG_CALGARY_IOMMU */
303
299 p += strcspn(p, ","); 304 p += strcspn(p, ",");
300 if (*p == ',') 305 if (*p == ',')
301 ++p; 306 ++p;
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 16261a8a3303..fc1960f1f243 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -601,10 +601,9 @@ void __init gart_iommu_init(void)
601 (!force_iommu && end_pfn <= MAX_DMA32_PFN) || 601 (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
602 !iommu_aperture || 602 !iommu_aperture ||
603 (no_agp && init_k8_gatt(&info) < 0)) { 603 (no_agp && init_k8_gatt(&info) < 0)) {
604 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
605 if (end_pfn > MAX_DMA32_PFN) { 604 if (end_pfn > MAX_DMA32_PFN) {
606 printk(KERN_ERR "WARNING more than 4GB of memory " 605 printk(KERN_ERR "WARNING more than 4GB of memory "
607 "but IOMMU not available.\n" 606 "but GART IOMMU not available.\n"
608 KERN_ERR "WARNING 32bit PCI may malfunction.\n"); 607 KERN_ERR "WARNING 32bit PCI may malfunction.\n");
609 } 608 }
610 return; 609 return;
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 7451a4c43c16..cbbc6adc1a92 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -108,17 +108,19 @@ void exit_idle(void)
108 */ 108 */
109static void default_idle(void) 109static void default_idle(void)
110{ 110{
111 local_irq_enable();
112
113 current_thread_info()->status &= ~TS_POLLING; 111 current_thread_info()->status &= ~TS_POLLING;
114 smp_mb__after_clear_bit(); 112 /*
115 while (!need_resched()) { 113 * TS_POLLING-cleared state must be visible before we
116 local_irq_disable(); 114 * test NEED_RESCHED:
117 if (!need_resched()) 115 */
118 safe_halt(); 116 smp_mb();
119 else 117 local_irq_disable();
120 local_irq_enable(); 118 if (!need_resched()) {
121 } 119 /* Enables interrupts one instruction before HLT.
120 x86 special cases this so there is no race. */
121 safe_halt();
122 } else
123 local_irq_enable();
122 current_thread_info()->status |= TS_POLLING; 124 current_thread_info()->status |= TS_POLLING;
123} 125}
124 126
@@ -130,15 +132,7 @@ static void default_idle(void)
130static void poll_idle (void) 132static void poll_idle (void)
131{ 133{
132 local_irq_enable(); 134 local_irq_enable();
133 135 cpu_relax();
134 asm volatile(
135 "2:"
136 "testl %0,%1;"
137 "rep; nop;"
138 "je 2b;"
139 : :
140 "i" (_TIF_NEED_RESCHED),
141 "m" (current_thread_info()->flags));
142} 136}
143 137
144void cpu_idle_wait(void) 138void cpu_idle_wait(void)
@@ -219,6 +213,12 @@ void cpu_idle (void)
219 idle = default_idle; 213 idle = default_idle;
220 if (cpu_is_offline(smp_processor_id())) 214 if (cpu_is_offline(smp_processor_id()))
221 play_dead(); 215 play_dead();
216 /*
217 * Idle routines should keep interrupts disabled
218 * from here on, until they go to idle.
219 * Otherwise, idle callbacks can misfire.
220 */
221 local_irq_disable();
222 enter_idle(); 222 enter_idle();
223 idle(); 223 idle();
224 /* In many cases the interrupt that ended idle 224 /* In many cases the interrupt that ended idle
@@ -256,9 +256,16 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
256/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 256/* Default MONITOR/MWAIT with no hints, used for default C1 state */
257static void mwait_idle(void) 257static void mwait_idle(void)
258{ 258{
259 local_irq_enable(); 259 if (!need_resched()) {
260 while (!need_resched()) 260 __monitor((void *)&current_thread_info()->flags, 0, 0);
261 mwait_idle_with_hints(0,0); 261 smp_mb();
262 if (!need_resched())
263 __sti_mwait(0, 0);
264 else
265 local_irq_enable();
266 } else {
267 local_irq_enable();
268 }
262} 269}
263 270
264void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 271void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index fc944b5e8f4a..af425a8049fb 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -471,8 +471,7 @@ void __init setup_arch(char **cmdline_p)
471 if (LOADER_TYPE && INITRD_START) { 471 if (LOADER_TYPE && INITRD_START) {
472 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { 472 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
473 reserve_bootmem_generic(INITRD_START, INITRD_SIZE); 473 reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
474 initrd_start = 474 initrd_start = INITRD_START + PAGE_OFFSET;
475 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
476 initrd_end = initrd_start+INITRD_SIZE; 475 initrd_end = initrd_start+INITRD_SIZE;
477 } 476 }
478 else { 477 else {
@@ -732,11 +731,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
732 /* Fix cpuid4 emulation for more */ 731 /* Fix cpuid4 emulation for more */
733 num_cache_leaves = 3; 732 num_cache_leaves = 3;
734 733
735 /* When there is only one core no need to synchronize RDTSC */ 734 /* RDTSC can be speculated around */
736 if (num_possible_cpus() == 1) 735 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
737 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
738 else
739 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
740} 736}
741 737
742static void __cpuinit detect_ht(struct cpuinfo_x86 *c) 738static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -835,6 +831,15 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
835 set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); 831 set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
836 } 832 }
837 833
834 if (cpu_has_ds) {
835 unsigned int l1, l2;
836 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
837 if (!(l1 & (1<<11)))
838 set_bit(X86_FEATURE_BTS, c->x86_capability);
839 if (!(l1 & (1<<12)))
840 set_bit(X86_FEATURE_PEBS, c->x86_capability);
841 }
842
838 n = c->extended_cpuid_level; 843 n = c->extended_cpuid_level;
839 if (n >= 0x80000008) { 844 if (n >= 0x80000008) {
840 unsigned eax = cpuid_eax(0x80000008); 845 unsigned eax = cpuid_eax(0x80000008);
@@ -854,7 +859,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
854 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); 859 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
855 if (c->x86 == 6) 860 if (c->x86 == 6)
856 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); 861 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
857 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); 862 if (c->x86 == 15)
863 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
864 else
865 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
858 c->x86_max_cores = intel_num_cpu_cores(c); 866 c->x86_max_cores = intel_num_cpu_cores(c);
859 867
860 srat_detect_node(); 868 srat_detect_node();
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 9f74c883568c..af1ec4d23cf8 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -379,12 +379,17 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
379 put_cpu(); 379 put_cpu();
380 return 0; 380 return 0;
381 } 381 }
382
383 /* Can deadlock when called with interrupts disabled */
384 WARN_ON(irqs_disabled());
385
382 spin_lock_bh(&call_lock); 386 spin_lock_bh(&call_lock);
383 __smp_call_function_single(cpu, func, info, nonatomic, wait); 387 __smp_call_function_single(cpu, func, info, nonatomic, wait);
384 spin_unlock_bh(&call_lock); 388 spin_unlock_bh(&call_lock);
385 put_cpu(); 389 put_cpu();
386 return 0; 390 return 0;
387} 391}
392EXPORT_SYMBOL(smp_call_function_single);
388 393
389/* 394/*
390 * this function sends a 'generic call function' IPI to all other CPUs 395 * this function sends a 'generic call function' IPI to all other CPUs
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 62c2e747af58..daf19332f0dd 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -60,6 +60,7 @@
60#include <asm/irq.h> 60#include <asm/irq.h>
61#include <asm/hw_irq.h> 61#include <asm/hw_irq.h>
62#include <asm/numa.h> 62#include <asm/numa.h>
63#include <asm/genapic.h>
63 64
64/* Number of siblings per CPU package */ 65/* Number of siblings per CPU package */
65int smp_num_siblings = 1; 66int smp_num_siblings = 1;
@@ -753,14 +754,16 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
753} 754}
754 755
755struct create_idle { 756struct create_idle {
757 struct work_struct work;
756 struct task_struct *idle; 758 struct task_struct *idle;
757 struct completion done; 759 struct completion done;
758 int cpu; 760 int cpu;
759}; 761};
760 762
761void do_fork_idle(void *_c_idle) 763void do_fork_idle(struct work_struct *work)
762{ 764{
763 struct create_idle *c_idle = _c_idle; 765 struct create_idle *c_idle =
766 container_of(work, struct create_idle, work);
764 767
765 c_idle->idle = fork_idle(c_idle->cpu); 768 c_idle->idle = fork_idle(c_idle->cpu);
766 complete(&c_idle->done); 769 complete(&c_idle->done);
@@ -775,10 +778,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
775 int timeout; 778 int timeout;
776 unsigned long start_rip; 779 unsigned long start_rip;
777 struct create_idle c_idle = { 780 struct create_idle c_idle = {
781 .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
778 .cpu = cpu, 782 .cpu = cpu,
779 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), 783 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
780 }; 784 };
781 DECLARE_WORK(work, do_fork_idle, &c_idle);
782 785
783 /* allocate memory for gdts of secondary cpus. Hotplug is considered */ 786 /* allocate memory for gdts of secondary cpus. Hotplug is considered */
784 if (!cpu_gdt_descr[cpu].address && 787 if (!cpu_gdt_descr[cpu].address &&
@@ -825,9 +828,9 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
825 * thread. 828 * thread.
826 */ 829 */
827 if (!keventd_up() || current_is_keventd()) 830 if (!keventd_up() || current_is_keventd())
828 work.func(work.data); 831 c_idle.work.func(&c_idle.work);
829 else { 832 else {
830 schedule_work(&work); 833 schedule_work(&c_idle.work);
831 wait_for_completion(&c_idle.done); 834 wait_for_completion(&c_idle.done);
832 } 835 }
833 836
@@ -1167,6 +1170,13 @@ int __cpuinit __cpu_up(unsigned int cpu)
1167 1170
1168 while (!cpu_isset(cpu, cpu_online_map)) 1171 while (!cpu_isset(cpu, cpu_online_map))
1169 cpu_relax(); 1172 cpu_relax();
1173
1174 if (num_online_cpus() > 8 && genapic == &apic_flat) {
1175 printk(KERN_WARNING
1176 "flat APIC routing can't be used with > 8 cpus\n");
1177 BUG();
1178 }
1179
1170 err = 0; 1180 err = 0;
1171 1181
1172 return err; 1182 return err;
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index e3ef544d2cfb..5cc76d0d331f 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -563,7 +563,7 @@ static unsigned int cpufreq_delayed_issched = 0;
563static unsigned int cpufreq_init = 0; 563static unsigned int cpufreq_init = 0;
564static struct work_struct cpufreq_delayed_get_work; 564static struct work_struct cpufreq_delayed_get_work;
565 565
566static void handle_cpufreq_delayed_get(void *v) 566static void handle_cpufreq_delayed_get(struct work_struct *v)
567{ 567{
568 unsigned int cpu; 568 unsigned int cpu;
569 for_each_online_cpu(cpu) { 569 for_each_online_cpu(cpu) {
@@ -639,7 +639,7 @@ static struct notifier_block time_cpufreq_notifier_block = {
639 639
640static int __init cpufreq_tsc(void) 640static int __init cpufreq_tsc(void)
641{ 641{
642 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); 642 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
643 if (!cpufreq_register_notifier(&time_cpufreq_notifier_block, 643 if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
644 CPUFREQ_TRANSITION_NOTIFIER)) 644 CPUFREQ_TRANSITION_NOTIFIER))
645 cpufreq_init = 1; 645 cpufreq_init = 1;
@@ -656,6 +656,25 @@ core_initcall(cpufreq_tsc);
656 */ 656 */
657 657
658#define TICK_COUNT 100000000 658#define TICK_COUNT 100000000
659#define TICK_MIN 5000
660
661/*
662 * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none
663 * occurs between the reads of the hpet & TSC.
664 */
665static void __init read_hpet_tsc(int *hpet, int *tsc)
666{
667 int tsc1, tsc2, hpet1;
668
669 do {
670 tsc1 = get_cycles_sync();
671 hpet1 = hpet_readl(HPET_COUNTER);
672 tsc2 = get_cycles_sync();
673 } while (tsc2 - tsc1 > TICK_MIN);
674 *hpet = hpet1;
675 *tsc = tsc2;
676}
677
659 678
660static unsigned int __init hpet_calibrate_tsc(void) 679static unsigned int __init hpet_calibrate_tsc(void)
661{ 680{
@@ -666,13 +685,11 @@ static unsigned int __init hpet_calibrate_tsc(void)
666 local_irq_save(flags); 685 local_irq_save(flags);
667 local_irq_disable(); 686 local_irq_disable();
668 687
669 hpet_start = hpet_readl(HPET_COUNTER); 688 read_hpet_tsc(&hpet_start, &tsc_start);
670 rdtscl(tsc_start);
671 689
672 do { 690 do {
673 local_irq_disable(); 691 local_irq_disable();
674 hpet_now = hpet_readl(HPET_COUNTER); 692 read_hpet_tsc(&hpet_now, &tsc_now);
675 tsc_now = get_cycles_sync();
676 local_irq_restore(flags); 693 local_irq_restore(flags);
677 } while ((tsc_now - tsc_start) < TICK_COUNT && 694 } while ((tsc_now - tsc_start) < TICK_COUNT &&
678 (hpet_now - hpet_start) < TICK_COUNT); 695 (hpet_now - hpet_start) < TICK_COUNT);
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 0d65b22f229c..09d2e8a10a49 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -30,9 +30,10 @@
30#include <linux/kprobes.h> 30#include <linux/kprobes.h>
31#include <linux/kexec.h> 31#include <linux/kexec.h>
32#include <linux/unwind.h> 32#include <linux/unwind.h>
33#include <linux/uaccess.h>
34#include <linux/bug.h>
33 35
34#include <asm/system.h> 36#include <asm/system.h>
35#include <asm/uaccess.h>
36#include <asm/io.h> 37#include <asm/io.h>
37#include <asm/atomic.h> 38#include <asm/atomic.h>
38#include <asm/debugreg.h> 39#include <asm/debugreg.h>
@@ -108,12 +109,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
108 preempt_enable_no_resched(); 109 preempt_enable_no_resched();
109} 110}
110 111
111static int kstack_depth_to_print = 12; 112int kstack_depth_to_print = 12;
112#ifdef CONFIG_STACK_UNWIND
113static int call_trace = 1;
114#else
115#define call_trace (-1)
116#endif
117 113
118#ifdef CONFIG_KALLSYMS 114#ifdef CONFIG_KALLSYMS
119void printk_address(unsigned long address) 115void printk_address(unsigned long address)
@@ -216,24 +212,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
216 return NULL; 212 return NULL;
217} 213}
218 214
219struct ops_and_data { 215#define MSG(txt) ops->warning(data, txt)
220 struct stacktrace_ops *ops;
221 void *data;
222};
223
224static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
225{
226 struct ops_and_data *oad = (struct ops_and_data *)context;
227 int n = 0;
228
229 while (unwind(info) == 0 && UNW_PC(info)) {
230 n++;
231 oad->ops->address(oad->data, UNW_PC(info));
232 if (arch_unw_user_mode(info))
233 break;
234 }
235 return n;
236}
237 216
238/* 217/*
239 * x86-64 can have upto three kernel stacks: 218 * x86-64 can have upto three kernel stacks:
@@ -248,61 +227,24 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
248 return p > t && p < t + THREAD_SIZE - 3; 227 return p > t && p < t + THREAD_SIZE - 3;
249} 228}
250 229
251void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, 230void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
231 unsigned long *stack,
252 struct stacktrace_ops *ops, void *data) 232 struct stacktrace_ops *ops, void *data)
253{ 233{
254 const unsigned cpu = smp_processor_id(); 234 const unsigned cpu = get_cpu();
255 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 235 unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
256 unsigned used = 0; 236 unsigned used = 0;
257 struct thread_info *tinfo; 237 struct thread_info *tinfo;
258 238
259 if (!tsk) 239 if (!tsk)
260 tsk = current; 240 tsk = current;
261 241
262 if (call_trace >= 0) {
263 int unw_ret = 0;
264 struct unwind_frame_info info;
265 struct ops_and_data oad = { .ops = ops, .data = data };
266
267 if (regs) {
268 if (unwind_init_frame_info(&info, tsk, regs) == 0)
269 unw_ret = dump_trace_unwind(&info, &oad);
270 } else if (tsk == current)
271 unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
272 else {
273 if (unwind_init_blocked(&info, tsk) == 0)
274 unw_ret = dump_trace_unwind(&info, &oad);
275 }
276 if (unw_ret > 0) {
277 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
278 ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
279 UNW_PC(&info));
280 if ((long)UNW_SP(&info) < 0) {
281 ops->warning(data, "Leftover inexact backtrace:\n");
282 stack = (unsigned long *)UNW_SP(&info);
283 if (!stack)
284 return;
285 } else
286 ops->warning(data, "Full inexact backtrace again:\n");
287 } else if (call_trace >= 1)
288 return;
289 else
290 ops->warning(data, "Full inexact backtrace again:\n");
291 } else
292 ops->warning(data, "Inexact backtrace:\n");
293 }
294 if (!stack) { 242 if (!stack) {
295 unsigned long dummy; 243 unsigned long dummy;
296 stack = &dummy; 244 stack = &dummy;
297 if (tsk && tsk != current) 245 if (tsk && tsk != current)
298 stack = (unsigned long *)tsk->thread.rsp; 246 stack = (unsigned long *)tsk->thread.rsp;
299 } 247 }
300 /*
301 * Align the stack pointer on word boundary, later loops
302 * rely on that (and corruption / debug info bugs can cause
303 * unaligned values here):
304 */
305 stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
306 248
307 /* 249 /*
308 * Print function call entries within a stack. 'cond' is the 250 * Print function call entries within a stack. 'cond' is the
@@ -312,9 +254,9 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
312#define HANDLE_STACK(cond) \ 254#define HANDLE_STACK(cond) \
313 do while (cond) { \ 255 do while (cond) { \
314 unsigned long addr = *stack++; \ 256 unsigned long addr = *stack++; \
315 if (oops_in_progress ? \ 257 /* Use unlocked access here because except for NMIs \
316 __kernel_text_address(addr) : \ 258 we should be already protected against module unloads */ \
317 kernel_text_address(addr)) { \ 259 if (__kernel_text_address(addr)) { \
318 /* \ 260 /* \
319 * If the address is either in the text segment of the \ 261 * If the address is either in the text segment of the \
320 * kernel, or in the region which contains vmalloc'ed \ 262 * kernel, or in the region which contains vmalloc'ed \
@@ -377,9 +319,10 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
377 /* 319 /*
378 * This handles the process stack: 320 * This handles the process stack:
379 */ 321 */
380 tinfo = current_thread_info(); 322 tinfo = task_thread_info(tsk);
381 HANDLE_STACK (valid_stack_ptr(tinfo, stack)); 323 HANDLE_STACK (valid_stack_ptr(tinfo, stack));
382#undef HANDLE_STACK 324#undef HANDLE_STACK
325 put_cpu();
383} 326}
384EXPORT_SYMBOL(dump_trace); 327EXPORT_SYMBOL(dump_trace);
385 328
@@ -516,30 +459,15 @@ bad:
516 printk("\n"); 459 printk("\n");
517} 460}
518 461
519void handle_BUG(struct pt_regs *regs) 462int is_valid_bugaddr(unsigned long rip)
520{ 463{
521 struct bug_frame f; 464 unsigned short ud2;
522 long len;
523 const char *prefix = "";
524 465
525 if (user_mode(regs)) 466 if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2)))
526 return; 467 return 0;
527 if (__copy_from_user(&f, (const void __user *) regs->rip, 468
528 sizeof(struct bug_frame))) 469 return ud2 == 0x0b0f;
529 return; 470}
530 if (f.filename >= 0 ||
531 f.ud2[0] != 0x0f || f.ud2[1] != 0x0b)
532 return;
533 len = __strnlen_user((char *)(long)f.filename, PATH_MAX) - 1;
534 if (len < 0 || len >= PATH_MAX)
535 f.filename = (int)(long)"unmapped filename";
536 else if (len > 50) {
537 f.filename += len - 50;
538 prefix = "...";
539 }
540 printk("----------- [cut here ] --------- [please bite here ] ---------\n");
541 printk(KERN_ALERT "Kernel BUG at %s%.50s:%d\n", prefix, (char *)(long)f.filename, f.line);
542}
543 471
544#ifdef CONFIG_BUG 472#ifdef CONFIG_BUG
545void out_of_line_bug(void) 473void out_of_line_bug(void)
@@ -619,7 +547,9 @@ void die(const char * str, struct pt_regs * regs, long err)
619{ 547{
620 unsigned long flags = oops_begin(); 548 unsigned long flags = oops_begin();
621 549
622 handle_BUG(regs); 550 if (!user_mode(regs))
551 report_bug(regs->rip);
552
623 __die(str, regs, err); 553 __die(str, regs, err);
624 oops_end(flags); 554 oops_end(flags);
625 do_exit(SIGSEGV); 555 do_exit(SIGSEGV);
@@ -786,8 +716,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
786{ 716{
787 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", 717 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
788 reason); 718 reason);
789 printk(KERN_EMERG "You probably have a hardware problem with your " 719 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
790 "RAM chips\n");
791 720
792 if (panic_on_unrecovered_nmi) 721 if (panic_on_unrecovered_nmi)
793 panic("NMI: Not continuing"); 722 panic("NMI: Not continuing");
@@ -1193,21 +1122,3 @@ static int __init kstack_setup(char *s)
1193 return 0; 1122 return 0;
1194} 1123}
1195early_param("kstack", kstack_setup); 1124early_param("kstack", kstack_setup);
1196
1197#ifdef CONFIG_STACK_UNWIND
1198static int __init call_trace_setup(char *s)
1199{
1200 if (!s)
1201 return -EINVAL;
1202 if (strcmp(s, "old") == 0)
1203 call_trace = -1;
1204 else if (strcmp(s, "both") == 0)
1205 call_trace = 0;
1206 else if (strcmp(s, "newfallback") == 0)
1207 call_trace = 1;
1208 else if (strcmp(s, "new") == 0)
1209 call_trace = 2;
1210 return 0;
1211}
1212early_param("call_trace", call_trace_setup);
1213#endif
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index d9534e750d4f..1e54ddf2338d 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -13,6 +13,7 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
13OUTPUT_ARCH(i386:x86-64) 13OUTPUT_ARCH(i386:x86-64)
14ENTRY(phys_startup_64) 14ENTRY(phys_startup_64)
15jiffies_64 = jiffies; 15jiffies_64 = jiffies;
16_proxy_pda = 0;
16PHDRS { 17PHDRS {
17 text PT_LOAD FLAGS(5); /* R_E */ 18 text PT_LOAD FLAGS(5); /* R_E */
18 data PT_LOAD FLAGS(7); /* RWE */ 19 data PT_LOAD FLAGS(7); /* RWE */
@@ -51,14 +52,7 @@ SECTIONS
51 52
52 RODATA 53 RODATA
53 54
54#ifdef CONFIG_STACK_UNWIND 55 BUG_TABLE
55 . = ALIGN(8);
56 .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
57 __start_unwind = .;
58 *(.eh_frame)
59 __end_unwind = .;
60 }
61#endif
62 56
63 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ 57 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
64 /* Data */ 58 /* Data */
@@ -227,9 +221,7 @@ SECTIONS
227 /* Sections to be discarded */ 221 /* Sections to be discarded */
228 /DISCARD/ : { 222 /DISCARD/ : {
229 *(.exitcall.exit) 223 *(.exitcall.exit)
230#ifndef CONFIG_UNWIND_INFO
231 *(.eh_frame) 224 *(.eh_frame)
232#endif
233 } 225 }
234 226
235 STABS_DEBUG 227 STABS_DEBUG
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 92546c1526f1..2433d6fc68b1 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -42,6 +42,7 @@
42#include <asm/topology.h> 42#include <asm/topology.h>
43 43
44#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 44#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
45#define __syscall_clobber "r11","rcx","memory"
45 46
46int __sysctl_vsyscall __section_sysctl_vsyscall = 1; 47int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
47seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; 48seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
@@ -224,8 +225,7 @@ out:
224 225
225static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen, 226static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
226 void __user *oldval, size_t __user *oldlenp, 227 void __user *oldval, size_t __user *oldlenp,
227 void __user *newval, size_t newlen, 228 void __user *newval, size_t newlen)
228 void **context)
229{ 229{
230 return -ENOSYS; 230 return -ENOSYS;
231} 231}
@@ -274,7 +274,6 @@ static void __cpuinit cpu_vsyscall_init(void *arg)
274 vsyscall_set_cpu(raw_smp_processor_id()); 274 vsyscall_set_cpu(raw_smp_processor_id());
275} 275}
276 276
277#ifdef CONFIG_HOTPLUG_CPU
278static int __cpuinit 277static int __cpuinit
279cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) 278cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
280{ 279{
@@ -283,13 +282,13 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
283 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); 282 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
284 return NOTIFY_DONE; 283 return NOTIFY_DONE;
285} 284}
286#endif
287 285
288static void __init map_vsyscall(void) 286static void __init map_vsyscall(void)
289{ 287{
290 extern char __vsyscall_0; 288 extern char __vsyscall_0;
291 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); 289 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
292 290
291 /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
293 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); 292 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
294} 293}
295 294