aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig57
-rw-r--r--arch/x86/Kconfig.cpu1
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/boot/tty.c2
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h24
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/bigsmp/apic.h32
-rw-r--r--arch/x86/include/asm/bigsmp/ipi.h13
-rw-r--r--arch/x86/include/asm/dma-mapping.h6
-rw-r--r--arch/x86/include/asm/ds.h140
-rw-r--r--arch/x86/include/asm/emergency-restart.h4
-rw-r--r--arch/x86/include/asm/es7000/apic.h141
-rw-r--r--arch/x86/include/asm/es7000/ipi.h12
-rw-r--r--arch/x86/include/asm/es7000/wakecpu.h41
-rw-r--r--arch/x86/include/asm/ftrace.h34
-rw-r--r--arch/x86/include/asm/genapic_32.h32
-rw-r--r--arch/x86/include/asm/genapic_64.h16
-rw-r--r--arch/x86/include/asm/io_apic.h9
-rw-r--r--arch/x86/include/asm/iomap.h30
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/include/asm/ipi.h23
-rw-r--r--arch/x86/include/asm/irq.h2
-rw-r--r--arch/x86/include/asm/irq_vectors.h11
-rw-r--r--arch/x86/include/asm/mach-default/mach_apic.h29
-rw-r--r--arch/x86/include/asm/mach-default/mach_ipi.h18
-rw-r--r--arch/x86/include/asm/mach-default/mach_wakecpu.h24
-rw-r--r--arch/x86/include/asm/mach-default/smpboot_hooks.h8
-rw-r--r--arch/x86/include/asm/mach-generic/mach_apic.h2
-rw-r--r--arch/x86/include/asm/mach-generic/mach_wakecpu.h12
-rw-r--r--arch/x86/include/asm/mmzone_32.h4
-rw-r--r--arch/x86/include/asm/numaq/apic.h12
-rw-r--r--arch/x86/include/asm/numaq/ipi.h13
-rw-r--r--arch/x86/include/asm/numaq/wakecpu.h24
-rw-r--r--arch/x86/include/asm/pci_64.h14
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/setup.h3
-rw-r--r--arch/x86/include/asm/smp.h6
-rw-r--r--arch/x86/include/asm/summit/apic.h51
-rw-r--r--arch/x86/include/asm/summit/ipi.h9
-rw-r--r--arch/x86/include/asm/system.h2
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/topology.h4
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h8
-rw-r--r--arch/x86/include/asm/uaccess_64.h8
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/include/asm/vmi.h8
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c12
-rw-r--r--arch/x86/kernel/amd_iommu.c51
-rw-r--r--arch/x86/kernel/amd_iommu_init.c6
-rw-r--r--arch/x86/kernel/apic.c14
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c18
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h17
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c45
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c108
-rw-r--r--arch/x86/kernel/ds.c741
-rw-r--r--arch/x86/kernel/dumpstack.c351
-rw-r--r--arch/x86/kernel/dumpstack.h39
-rw-r--r--arch/x86/kernel/dumpstack_32.c307
-rw-r--r--arch/x86/kernel/dumpstack_64.c289
-rw-r--r--arch/x86/kernel/early-quirks.c18
-rw-r--r--arch/x86/kernel/entry_32.S51
-rw-r--r--arch/x86/kernel/entry_64.S83
-rw-r--r--arch/x86/kernel/es7000_32.c71
-rw-r--r--arch/x86/kernel/ftrace.c390
-rw-r--r--arch/x86/kernel/genapic_64.c4
-rw-r--r--arch/x86/kernel/genapic_flat_64.c105
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c77
-rw-r--r--arch/x86/kernel/genx2apic_phys.c72
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c59
-rw-r--r--arch/x86/kernel/hpet.c8
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/io_apic.c1049
-rw-r--r--arch/x86/kernel/ipi.c28
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/irq_32.c15
-rw-r--r--arch/x86/kernel/irq_64.c17
-rw-r--r--arch/x86/kernel/irqinit_32.c3
-rw-r--r--arch/x86/kernel/irqinit_64.c3
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/mfgpt_32.c2
-rw-r--r--arch/x86/kernel/mpparse.c24
-rw-r--r--arch/x86/kernel/numaq_32.c10
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c3
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/pci-gart_64.c2
-rw-r--r--arch/x86/kernel/process.c32
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c98
-rw-r--r--arch/x86/kernel/reboot.c45
-rw-r--r--arch/x86/kernel/setup.c31
-rw-r--r--arch/x86/kernel/setup_percpu.c19
-rw-r--r--arch/x86/kernel/smp.c21
-rw-r--r--arch/x86/kernel/smpboot.c27
-rw-r--r--arch/x86/kernel/stacktrace.c64
-rw-r--r--arch/x86/kernel/tlb_32.c2
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/tsc_sync.c4
-rw-r--r--arch/x86/kernel/vmi_32.c16
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/i8254.c4
-rw-r--r--arch/x86/kvm/mmu.c4
-rw-r--r--arch/x86/kvm/paging_tmpl.h1
-rw-r--r--arch/x86/kvm/vmx.c7
-rw-r--r--arch/x86/kvm/vmx.h1
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/lib/usercopy_32.c8
-rw-r--r--arch/x86/lib/usercopy_64.c4
-rw-r--r--arch/x86/mach-generic/bigsmp.c6
-rw-r--r--arch/x86/mach-generic/default.c1
-rw-r--r--arch/x86/mach-generic/es7000.c19
-rw-r--r--arch/x86/mach-generic/numaq.c5
-rw-r--r--arch/x86/mach-generic/probe.c16
-rw-r--r--arch/x86/mach-generic/summit.c6
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c25
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c13
-rw-r--r--arch/x86/mm/numa_32.c35
-rw-r--r--arch/x86/mm/numa_64.c4
-rw-r--r--arch/x86/mm/srat_64.c2
-rw-r--r--arch/x86/oprofile/nmi_int.c5
-rw-r--r--arch/x86/oprofile/op_model_ppro.c6
-rw-r--r--arch/x86/pci/direct.c4
-rw-r--r--arch/x86/pci/fixup.c25
-rw-r--r--arch/x86/pci/pci.h1
-rw-r--r--arch/x86/power/hibernate_32.c4
-rw-r--r--arch/x86/vdso/vclock_gettime.c3
-rw-r--r--arch/x86/xen/mmu.c41
-rw-r--r--arch/x86/xen/smp.c29
-rw-r--r--arch/x86/xen/suspend.c3
-rw-r--r--arch/x86/xen/time.c2
-rw-r--r--arch/x86/xen/xen-ops.h4
142 files changed, 3501 insertions, 2195 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 93224b569187..0ca2eb7573cd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,11 +29,14 @@ config X86
29 select HAVE_FTRACE_MCOUNT_RECORD 29 select HAVE_FTRACE_MCOUNT_RECORD
30 select HAVE_DYNAMIC_FTRACE 30 select HAVE_DYNAMIC_FTRACE
31 select HAVE_FUNCTION_TRACER 31 select HAVE_FUNCTION_TRACER
32 select HAVE_FUNCTION_GRAPH_TRACER
33 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
32 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 34 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
33 select HAVE_ARCH_KGDB if !X86_VOYAGER 35 select HAVE_ARCH_KGDB if !X86_VOYAGER
34 select HAVE_ARCH_TRACEHOOK 36 select HAVE_ARCH_TRACEHOOK
35 select HAVE_GENERIC_DMA_COHERENT if X86_32 37 select HAVE_GENERIC_DMA_COHERENT if X86_32
36 select HAVE_EFFICIENT_UNALIGNED_ACCESS 38 select HAVE_EFFICIENT_UNALIGNED_ACCESS
39 select USER_STACKTRACE_SUPPORT
37 40
38config ARCH_DEFCONFIG 41config ARCH_DEFCONFIG
39 string 42 string
@@ -167,9 +170,12 @@ config GENERIC_PENDING_IRQ
167config X86_SMP 170config X86_SMP
168 bool 171 bool
169 depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64) 172 depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
170 select USE_GENERIC_SMP_HELPERS
171 default y 173 default y
172 174
175config USE_GENERIC_SMP_HELPERS
176 def_bool y
177 depends on SMP
178
173config X86_32_SMP 179config X86_32_SMP
174 def_bool y 180 def_bool y
175 depends on X86_32 && SMP 181 depends on X86_32 && SMP
@@ -235,6 +241,25 @@ config X86_HAS_BOOT_CPU_ID
235 def_bool y 241 def_bool y
236 depends on X86_VOYAGER 242 depends on X86_VOYAGER
237 243
244config SPARSE_IRQ
245 bool "Support sparse irq numbering"
246 depends on PCI_MSI || HT_IRQ
247 default y
248 help
249 This enables support for sparse irq, esp for msi/msi-x. You may need
250 if you have lots of cards supports msi-x installed.
251
252 If you don't know what to do here, say Y.
253
254config NUMA_MIGRATE_IRQ_DESC
255 bool "Move irq desc when changing irq smp_affinity"
256 depends on SPARSE_IRQ && SMP
257 default n
258 help
259 This enables moving irq_desc to cpu/node that irq will use handled.
260
261 If you don't know what to do here, say N.
262
238config X86_FIND_SMP_CONFIG 263config X86_FIND_SMP_CONFIG
239 def_bool y 264 def_bool y
240 depends on X86_MPPARSE || X86_VOYAGER 265 depends on X86_MPPARSE || X86_VOYAGER
@@ -364,10 +389,10 @@ config X86_RDC321X
364 as R-8610-(G). 389 as R-8610-(G).
365 If you don't have one of these chips, you should say N here. 390 If you don't have one of these chips, you should say N here.
366 391
367config SCHED_NO_NO_OMIT_FRAME_POINTER 392config SCHED_OMIT_FRAME_POINTER
368 def_bool y 393 def_bool y
369 prompt "Single-depth WCHAN output" 394 prompt "Single-depth WCHAN output"
370 depends on X86_32 395 depends on X86
371 help 396 help
372 Calculate simpler /proc/<PID>/wchan values. If this option 397 Calculate simpler /proc/<PID>/wchan values. If this option
373 is disabled then wchan values will recurse back to the 398 is disabled then wchan values will recurse back to the
@@ -462,10 +487,6 @@ config X86_CYCLONE_TIMER
462 def_bool y 487 def_bool y
463 depends on X86_GENERICARCH 488 depends on X86_GENERICARCH
464 489
465config ES7000_CLUSTERED_APIC
466 def_bool y
467 depends on SMP && X86_ES7000 && MPENTIUMIII
468
469source "arch/x86/Kconfig.cpu" 490source "arch/x86/Kconfig.cpu"
470 491
471config HPET_TIMER 492config HPET_TIMER
@@ -579,19 +600,20 @@ config IOMMU_HELPER
579 600
580config MAXSMP 601config MAXSMP
581 bool "Configure Maximum number of SMP Processors and NUMA Nodes" 602 bool "Configure Maximum number of SMP Processors and NUMA Nodes"
582 depends on X86_64 && SMP && BROKEN 603 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
604 select CPUMASK_OFFSTACK
583 default n 605 default n
584 help 606 help
585 Configure maximum number of CPUS and NUMA Nodes for this architecture. 607 Configure maximum number of CPUS and NUMA Nodes for this architecture.
586 If unsure, say N. 608 If unsure, say N.
587 609
588config NR_CPUS 610config NR_CPUS
589 int "Maximum number of CPUs (2-512)" if !MAXSMP 611 int "Maximum number of CPUs" if SMP && !MAXSMP
590 range 2 512 612 range 2 512 if SMP && !MAXSMP
591 depends on SMP 613 default "1" if !SMP
592 default "4096" if MAXSMP 614 default "4096" if MAXSMP
593 default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 615 default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000)
594 default "8" 616 default "8" if SMP
595 help 617 help
596 This allows you to specify the maximum number of CPUs which this 618 This allows you to specify the maximum number of CPUs which this
597 kernel will support. The maximum supported value is 512 and the 619 kernel will support. The maximum supported value is 512 and the
@@ -957,7 +979,7 @@ config ARCH_PHYS_ADDR_T_64BIT
957config NUMA 979config NUMA
958 bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" 980 bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
959 depends on SMP 981 depends on SMP
960 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && BROKEN) 982 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
961 default n if X86_PC 983 default n if X86_PC
962 default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) 984 default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
963 help 985 help
@@ -1629,13 +1651,6 @@ config APM_ALLOW_INTS
1629 many of the newer IBM Thinkpads. If you experience hangs when you 1651 many of the newer IBM Thinkpads. If you experience hangs when you
1630 suspend, try setting this to Y. Otherwise, say N. 1652 suspend, try setting this to Y. Otherwise, say N.
1631 1653
1632config APM_REAL_MODE_POWER_OFF
1633 bool "Use real mode APM BIOS call to power off"
1634 help
1635 Use real mode APM BIOS calls to switch off the computer. This is
1636 a work-around for a number of buggy BIOSes. Switch this option on if
1637 your computer crashes instead of powering off properly.
1638
1639endif # APM 1654endif # APM
1640 1655
1641source "arch/x86/kernel/cpu/cpufreq/Kconfig" 1656source "arch/x86/kernel/cpu/cpufreq/Kconfig"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b815664fe370..85a78575956c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -515,6 +515,7 @@ config CPU_SUP_UMC_32
515config X86_DS 515config X86_DS
516 def_bool X86_PTRACE_BTS 516 def_bool X86_PTRACE_BTS
517 depends on X86_DEBUGCTLMSR 517 depends on X86_DEBUGCTLMSR
518 select HAVE_HW_BRANCH_TRACER
518 519
519config X86_PTRACE_BTS 520config X86_PTRACE_BTS
520 bool "Branch Trace Store" 521 bool "Branch Trace Store"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2a3dfbd5e677..fa013f529b74 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -186,14 +186,10 @@ config IOMMU_LEAK
186 Add a simple leak tracer to the IOMMU code. This is useful when you 186 Add a simple leak tracer to the IOMMU code. This is useful when you
187 are debugging a buggy device driver that leaks IOMMU mappings. 187 are debugging a buggy device driver that leaks IOMMU mappings.
188 188
189config MMIOTRACE_HOOKS
190 bool
191
192config MMIOTRACE 189config MMIOTRACE
193 bool "Memory mapped IO tracing" 190 bool "Memory mapped IO tracing"
194 depends on DEBUG_KERNEL && PCI 191 depends on DEBUG_KERNEL && PCI
195 select TRACING 192 select TRACING
196 select MMIOTRACE_HOOKS
197 help 193 help
198 Mmiotrace traces Memory Mapped I/O access and is meant for 194 Mmiotrace traces Memory Mapped I/O access and is meant for
199 debugging and reverse engineering. It is called from the ioremap 195 debugging and reverse engineering. It is called from the ioremap
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 0be77b39328a..7e8e8b25f5f6 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -74,7 +74,7 @@ static int kbd_pending(void)
74{ 74{
75 u8 pending; 75 u8 pending;
76 asm volatile("int $0x16; setnz %0" 76 asm volatile("int $0x16; setnz %0"
77 : "=rm" (pending) 77 : "=qm" (pending)
78 : "a" (0x0100)); 78 : "a" (0x0100));
79 return pending; 79 return pending;
80} 80}
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 8d676d8ecde9..9830681446ad 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -113,7 +113,6 @@ static inline void acpi_disable_pci(void)
113 acpi_pci_disabled = 1; 113 acpi_pci_disabled = 1;
114 acpi_noirq_set(); 114 acpi_noirq_set();
115} 115}
116extern int acpi_irq_balance_set(char *str);
117 116
118/* routines for saving/restoring kernel state */ 117/* routines for saving/restoring kernel state */
119extern int acpi_save_state_mem(void); 118extern int acpi_save_state_mem(void);
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 1a30c0440c6b..ac302a2fa339 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -251,13 +251,6 @@ struct amd_iommu {
251 /* Pointer to PCI device of this IOMMU */ 251 /* Pointer to PCI device of this IOMMU */
252 struct pci_dev *dev; 252 struct pci_dev *dev;
253 253
254 /*
255 * Capability pointer. There could be more than one IOMMU per PCI
256 * device function if there are more than one AMD IOMMU capability
257 * pointers.
258 */
259 u16 cap_ptr;
260
261 /* physical address of MMIO space */ 254 /* physical address of MMIO space */
262 u64 mmio_phys; 255 u64 mmio_phys;
263 /* virtual address of MMIO space */ 256 /* virtual address of MMIO space */
@@ -266,6 +259,13 @@ struct amd_iommu {
266 /* capabilities of that IOMMU read from ACPI */ 259 /* capabilities of that IOMMU read from ACPI */
267 u32 cap; 260 u32 cap;
268 261
262 /*
263 * Capability pointer. There could be more than one IOMMU per PCI
264 * device function if there are more than one AMD IOMMU capability
265 * pointers.
266 */
267 u16 cap_ptr;
268
269 /* pci domain of this IOMMU */ 269 /* pci domain of this IOMMU */
270 u16 pci_seg; 270 u16 pci_seg;
271 271
@@ -284,19 +284,19 @@ struct amd_iommu {
284 /* size of command buffer */ 284 /* size of command buffer */
285 u32 cmd_buf_size; 285 u32 cmd_buf_size;
286 286
287 /* event buffer virtual address */
288 u8 *evt_buf;
289 /* size of event buffer */ 287 /* size of event buffer */
290 u32 evt_buf_size; 288 u32 evt_buf_size;
289 /* event buffer virtual address */
290 u8 *evt_buf;
291 /* MSI number for event interrupt */ 291 /* MSI number for event interrupt */
292 u16 evt_msi_num; 292 u16 evt_msi_num;
293 293
294 /* if one, we need to send a completion wait command */
295 int need_sync;
296
297 /* true if interrupts for this IOMMU are already enabled */ 294 /* true if interrupts for this IOMMU are already enabled */
298 bool int_enabled; 295 bool int_enabled;
299 296
297 /* if one, we need to send a completion wait command */
298 int need_sync;
299
300 /* default dma_ops domain for that IOMMU */ 300 /* default dma_ops domain for that IOMMU */
301 struct dma_ops_domain *default_dom; 301 struct dma_ops_domain *default_dom;
302}; 302};
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3b1510b4fc57..25caa0738af5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -193,6 +193,7 @@ extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
193static inline void lapic_shutdown(void) { } 193static inline void lapic_shutdown(void) { }
194#define local_apic_timer_c2_ok 1 194#define local_apic_timer_c2_ok 1
195static inline void init_apic_mappings(void) { } 195static inline void init_apic_mappings(void) { }
196static inline void disable_local_APIC(void) { }
196 197
197#endif /* !CONFIG_X86_LOCAL_APIC */ 198#endif /* !CONFIG_X86_LOCAL_APIC */
198 199
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index 1d9543b9d358..976399debb3f 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -9,12 +9,12 @@ static inline int apic_id_registered(void)
9 return (1); 9 return (1);
10} 10}
11 11
12static inline cpumask_t target_cpus(void) 12static inline const cpumask_t *target_cpus(void)
13{ 13{
14#ifdef CONFIG_SMP 14#ifdef CONFIG_SMP
15 return cpu_online_map; 15 return &cpu_online_map;
16#else 16#else
17 return cpumask_of_cpu(0); 17 return &cpumask_of_cpu(0);
18#endif 18#endif
19} 19}
20 20
@@ -24,8 +24,6 @@ static inline cpumask_t target_cpus(void)
24#define INT_DELIVERY_MODE (dest_Fixed) 24#define INT_DELIVERY_MODE (dest_Fixed)
25#define INT_DEST_MODE (0) /* phys delivery to target proc */ 25#define INT_DEST_MODE (0) /* phys delivery to target proc */
26#define NO_BALANCE_IRQ (0) 26#define NO_BALANCE_IRQ (0)
27#define WAKE_SECONDARY_VIA_INIT
28
29 27
30static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 28static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
31{ 29{
@@ -81,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid)
81 79
82static inline int cpu_present_to_apicid(int mps_cpu) 80static inline int cpu_present_to_apicid(int mps_cpu)
83{ 81{
84 if (mps_cpu < NR_CPUS) 82 if (mps_cpu < nr_cpu_ids)
85 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); 83 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
86 84
87 return BAD_APICID; 85 return BAD_APICID;
@@ -96,7 +94,7 @@ extern u8 cpu_2_logical_apicid[];
96/* Mapping from cpu number to logical apicid */ 94/* Mapping from cpu number to logical apicid */
97static inline int cpu_to_logical_apicid(int cpu) 95static inline int cpu_to_logical_apicid(int cpu)
98{ 96{
99 if (cpu >= NR_CPUS) 97 if (cpu >= nr_cpu_ids)
100 return BAD_APICID; 98 return BAD_APICID;
101 return cpu_physical_id(cpu); 99 return cpu_physical_id(cpu);
102} 100}
@@ -121,16 +119,32 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
121} 119}
122 120
123/* As we are using single CPU as destination, pick only one CPU here */ 121/* As we are using single CPU as destination, pick only one CPU here */
124static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 122static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
125{ 123{
126 int cpu; 124 int cpu;
127 int apicid; 125 int apicid;
128 126
129 cpu = first_cpu(cpumask); 127 cpu = first_cpu(*cpumask);
130 apicid = cpu_to_logical_apicid(cpu); 128 apicid = cpu_to_logical_apicid(cpu);
131 return apicid; 129 return apicid;
132} 130}
133 131
132static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
133 const struct cpumask *andmask)
134{
135 int cpu;
136
137 /*
138 * We're using fixed IRQ delivery, can only return one phys APIC ID.
139 * May as well be the first.
140 */
141 cpu = cpumask_any_and(cpumask, andmask);
142 if (cpu < nr_cpu_ids)
143 return cpu_to_logical_apicid(cpu);
144
145 return BAD_APICID;
146}
147
134static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) 148static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
135{ 149{
136 return cpuid_apic >> index_msb; 150 return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h
index 9404c535b7ec..27fcd01b3ae6 100644
--- a/arch/x86/include/asm/bigsmp/ipi.h
+++ b/arch/x86/include/asm/bigsmp/ipi.h
@@ -1,25 +1,22 @@
1#ifndef __ASM_MACH_IPI_H 1#ifndef __ASM_MACH_IPI_H
2#define __ASM_MACH_IPI_H 2#define __ASM_MACH_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
5void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
5 6
6static inline void send_IPI_mask(cpumask_t mask, int vector) 7static inline void send_IPI_mask(const struct cpumask *mask, int vector)
7{ 8{
8 send_IPI_mask_sequence(mask, vector); 9 send_IPI_mask_sequence(mask, vector);
9} 10}
10 11
11static inline void send_IPI_allbutself(int vector) 12static inline void send_IPI_allbutself(int vector)
12{ 13{
13 cpumask_t mask = cpu_online_map; 14 send_IPI_mask_allbutself(cpu_online_mask, vector);
14 cpu_clear(smp_processor_id(), mask);
15
16 if (!cpus_empty(mask))
17 send_IPI_mask(mask, vector);
18} 15}
19 16
20static inline void send_IPI_all(int vector) 17static inline void send_IPI_all(int vector)
21{ 18{
22 send_IPI_mask(cpu_online_map, vector); 19 send_IPI_mask(cpu_online_mask, vector);
23} 20}
24 21
25#endif /* __ASM_MACH_IPI_H */ 22#endif /* __ASM_MACH_IPI_H */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 7f225a4b2a26..097794ff6b79 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -71,15 +71,13 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
71/* Make sure we keep the same behaviour */ 71/* Make sure we keep the same behaviour */
72static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 72static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
73{ 73{
74#ifdef CONFIG_X86_32 74#ifdef CONFIG_X86_64
75 return 0;
76#else
77 struct dma_mapping_ops *ops = get_dma_ops(dev); 75 struct dma_mapping_ops *ops = get_dma_ops(dev);
78 if (ops->mapping_error) 76 if (ops->mapping_error)
79 return ops->mapping_error(dev, dma_addr); 77 return ops->mapping_error(dev, dma_addr);
80 78
81 return (dma_addr == bad_dma_address);
82#endif 79#endif
80 return (dma_addr == bad_dma_address);
83} 81}
84 82
85#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) 83#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 72c5a190bf48..99b6c39774a4 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -7,13 +7,12 @@
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It assumes:
15 * - get_task_struct on all parameter tasks 14 * - get_task_struct on all traced tasks
16 * - current is allowed to trace parameter tasks 15 * - current is allowed to trace tasks
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -23,13 +22,21 @@
23#ifndef _ASM_X86_DS_H 22#ifndef _ASM_X86_DS_H
24#define _ASM_X86_DS_H 23#define _ASM_X86_DS_H
25 24
26#ifdef CONFIG_X86_DS
27 25
28#include <linux/types.h> 26#include <linux/types.h>
29#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/err.h>
29
30 30
31#ifdef CONFIG_X86_DS
31 32
32struct task_struct; 33struct task_struct;
34struct ds_tracer;
35struct bts_tracer;
36struct pebs_tracer;
37
38typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
39typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
33 40
34/* 41/*
35 * Request BTS or PEBS 42 * Request BTS or PEBS
@@ -37,60 +44,62 @@ struct task_struct;
37 * Due to alignement constraints, the actual buffer may be slightly 44 * Due to alignement constraints, the actual buffer may be slightly
38 * smaller than the requested or provided buffer. 45 * smaller than the requested or provided buffer.
39 * 46 *
40 * Returns 0 on success; -Eerrno otherwise 47 * Returns a pointer to a tracer structure on success, or
48 * ERR_PTR(errcode) on failure.
49 *
50 * The interrupt threshold is independent from the overflow callback
51 * to allow users to use their own overflow interrupt handling mechanism.
41 * 52 *
42 * task: the task to request recording for; 53 * task: the task to request recording for;
43 * NULL for per-cpu recording on the current cpu 54 * NULL for per-cpu recording on the current cpu
44 * base: the base pointer for the (non-pageable) buffer; 55 * base: the base pointer for the (non-pageable) buffer;
45 * NULL if buffer allocation requested 56 * size: the size of the provided buffer in bytes
46 * size: the size of the requested or provided buffer
47 * ovfl: pointer to a function to be called on buffer overflow; 57 * ovfl: pointer to a function to be called on buffer overflow;
48 * NULL if cyclic buffer requested 58 * NULL if cyclic buffer requested
59 * th: the interrupt threshold in records from the end of the buffer;
60 * -1 if no interrupt threshold is requested.
49 */ 61 */
50typedef void (*ds_ovfl_callback_t)(struct task_struct *); 62extern struct bts_tracer *ds_request_bts(struct task_struct *task,
51extern int ds_request_bts(struct task_struct *task, void *base, size_t size, 63 void *base, size_t size,
52 ds_ovfl_callback_t ovfl); 64 bts_ovfl_callback_t ovfl, size_t th);
53extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, 65extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
54 ds_ovfl_callback_t ovfl); 66 void *base, size_t size,
67 pebs_ovfl_callback_t ovfl,
68 size_t th);
55 69
56/* 70/*
57 * Release BTS or PEBS resources 71 * Release BTS or PEBS resources
58 * 72 *
59 * Frees buffers allocated on ds_request.
60 *
61 * Returns 0 on success; -Eerrno otherwise 73 * Returns 0 on success; -Eerrno otherwise
62 * 74 *
63 * task: the task to release resources for; 75 * tracer: the tracer handle returned from ds_request_~()
64 * NULL to release resources for the current cpu
65 */ 76 */
66extern int ds_release_bts(struct task_struct *task); 77extern int ds_release_bts(struct bts_tracer *tracer);
67extern int ds_release_pebs(struct task_struct *task); 78extern int ds_release_pebs(struct pebs_tracer *tracer);
68 79
69/* 80/*
70 * Return the (array) index of the write pointer. 81 * Get the (array) index of the write pointer.
71 * (assuming an array of BTS/PEBS records) 82 * (assuming an array of BTS/PEBS records)
72 * 83 *
73 * Returns -Eerrno on error 84 * Returns 0 on success; -Eerrno on error
74 * 85 *
75 * task: the task to access; 86 * tracer: the tracer handle returned from ds_request_~()
76 * NULL to access the current cpu 87 * pos (out): will hold the result
77 * pos (out): if not NULL, will hold the result
78 */ 88 */
79extern int ds_get_bts_index(struct task_struct *task, size_t *pos); 89extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos);
80extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); 90extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos);
81 91
82/* 92/*
83 * Return the (array) index one record beyond the end of the array. 93 * Get the (array) index one record beyond the end of the array.
84 * (assuming an array of BTS/PEBS records) 94 * (assuming an array of BTS/PEBS records)
85 * 95 *
86 * Returns -Eerrno on error 96 * Returns 0 on success; -Eerrno on error
87 * 97 *
88 * task: the task to access; 98 * tracer: the tracer handle returned from ds_request_~()
89 * NULL to access the current cpu 99 * pos (out): will hold the result
90 * pos (out): if not NULL, will hold the result
91 */ 100 */
92extern int ds_get_bts_end(struct task_struct *task, size_t *pos); 101extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos);
93extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); 102extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos);
94 103
95/* 104/*
96 * Provide a pointer to the BTS/PEBS record at parameter index. 105 * Provide a pointer to the BTS/PEBS record at parameter index.
@@ -101,14 +110,13 @@ extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
101 * 110 *
102 * Returns the size of a single record on success; -Eerrno on error 111 * Returns the size of a single record on success; -Eerrno on error
103 * 112 *
104 * task: the task to access; 113 * tracer: the tracer handle returned from ds_request_~()
105 * NULL to access the current cpu
106 * index: the index of the requested record 114 * index: the index of the requested record
107 * record (out): pointer to the requested record 115 * record (out): pointer to the requested record
108 */ 116 */
109extern int ds_access_bts(struct task_struct *task, 117extern int ds_access_bts(struct bts_tracer *tracer,
110 size_t index, const void **record); 118 size_t index, const void **record);
111extern int ds_access_pebs(struct task_struct *task, 119extern int ds_access_pebs(struct pebs_tracer *tracer,
112 size_t index, const void **record); 120 size_t index, const void **record);
113 121
114/* 122/*
@@ -128,38 +136,24 @@ extern int ds_access_pebs(struct task_struct *task,
128 * 136 *
129 * Returns the number of bytes written or -Eerrno. 137 * Returns the number of bytes written or -Eerrno.
130 * 138 *
131 * task: the task to access; 139 * tracer: the tracer handle returned from ds_request_~()
132 * NULL to access the current cpu
133 * buffer: the buffer to write 140 * buffer: the buffer to write
134 * size: the size of the buffer 141 * size: the size of the buffer
135 */ 142 */
136extern int ds_write_bts(struct task_struct *task, 143extern int ds_write_bts(struct bts_tracer *tracer,
137 const void *buffer, size_t size); 144 const void *buffer, size_t size);
138extern int ds_write_pebs(struct task_struct *task, 145extern int ds_write_pebs(struct pebs_tracer *tracer,
139 const void *buffer, size_t size); 146 const void *buffer, size_t size);
140 147
141/* 148/*
142 * Same as ds_write_bts/pebs, but omit ownership checks.
143 *
144 * This is needed to have some other task than the owner of the
145 * BTS/PEBS buffer or the parameter task itself write into the
146 * respective buffer.
147 */
148extern int ds_unchecked_write_bts(struct task_struct *task,
149 const void *buffer, size_t size);
150extern int ds_unchecked_write_pebs(struct task_struct *task,
151 const void *buffer, size_t size);
152
153/*
154 * Reset the write pointer of the BTS/PEBS buffer. 149 * Reset the write pointer of the BTS/PEBS buffer.
155 * 150 *
156 * Returns 0 on success; -Eerrno on error 151 * Returns 0 on success; -Eerrno on error
157 * 152 *
158 * task: the task to access; 153 * tracer: the tracer handle returned from ds_request_~()
159 * NULL to access the current cpu
160 */ 154 */
161extern int ds_reset_bts(struct task_struct *task); 155extern int ds_reset_bts(struct bts_tracer *tracer);
162extern int ds_reset_pebs(struct task_struct *task); 156extern int ds_reset_pebs(struct pebs_tracer *tracer);
163 157
164/* 158/*
165 * Clear the BTS/PEBS buffer and reset the write pointer. 159 * Clear the BTS/PEBS buffer and reset the write pointer.
@@ -167,33 +161,30 @@ extern int ds_reset_pebs(struct task_struct *task);
167 * 161 *
168 * Returns 0 on success; -Eerrno on error 162 * Returns 0 on success; -Eerrno on error
169 * 163 *
170 * task: the task to access; 164 * tracer: the tracer handle returned from ds_request_~()
171 * NULL to access the current cpu
172 */ 165 */
173extern int ds_clear_bts(struct task_struct *task); 166extern int ds_clear_bts(struct bts_tracer *tracer);
174extern int ds_clear_pebs(struct task_struct *task); 167extern int ds_clear_pebs(struct pebs_tracer *tracer);
175 168
176/* 169/*
177 * Provide the PEBS counter reset value. 170 * Provide the PEBS counter reset value.
178 * 171 *
179 * Returns 0 on success; -Eerrno on error 172 * Returns 0 on success; -Eerrno on error
180 * 173 *
181 * task: the task to access; 174 * tracer: the tracer handle returned from ds_request_pebs()
182 * NULL to access the current cpu
183 * value (out): the counter reset value 175 * value (out): the counter reset value
184 */ 176 */
185extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); 177extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value);
186 178
187/* 179/*
188 * Set the PEBS counter reset value. 180 * Set the PEBS counter reset value.
189 * 181 *
190 * Returns 0 on success; -Eerrno on error 182 * Returns 0 on success; -Eerrno on error
191 * 183 *
192 * task: the task to access; 184 * tracer: the tracer handle returned from ds_request_pebs()
193 * NULL to access the current cpu
194 * value: the new counter reset value 185 * value: the new counter reset value
195 */ 186 */
196extern int ds_set_pebs_reset(struct task_struct *task, u64 value); 187extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
197 188
198/* 189/*
199 * Initialization 190 * Initialization
@@ -206,17 +197,13 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
206/* 197/*
207 * The DS context - part of struct thread_struct. 198 * The DS context - part of struct thread_struct.
208 */ 199 */
200#define MAX_SIZEOF_DS (12 * 8)
201
209struct ds_context { 202struct ds_context {
210 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ 203 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
211 unsigned char *ds; 204 unsigned char ds[MAX_SIZEOF_DS];
212 /* the owner of the BTS and PEBS configuration, respectively */ 205 /* the owner of the BTS and PEBS configuration, respectively */
213 struct task_struct *owner[2]; 206 struct ds_tracer *owner[2];
214 /* buffer overflow notification function for BTS and PEBS */
215 ds_ovfl_callback_t callback[2];
216 /* the original buffer address */
217 void *buffer[2];
218 /* the number of allocated pages for on-request allocated buffers */
219 unsigned int pages[2];
220 /* use count */ 207 /* use count */
221 unsigned long count; 208 unsigned long count;
222 /* a pointer to the context location inside the thread_struct 209 /* a pointer to the context location inside the thread_struct
@@ -232,7 +219,8 @@ extern void ds_free(struct ds_context *context);
232 219
233#else /* CONFIG_X86_DS */ 220#else /* CONFIG_X86_DS */
234 221
235#define ds_init_intel(config) do {} while (0) 222struct cpuinfo_x86;
223static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
236 224
237#endif /* CONFIG_X86_DS */ 225#endif /* CONFIG_X86_DS */
238#endif /* _ASM_X86_DS_H */ 226#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h
index 94826cf87455..cc70c1c78ca4 100644
--- a/arch/x86/include/asm/emergency-restart.h
+++ b/arch/x86/include/asm/emergency-restart.h
@@ -8,7 +8,9 @@ enum reboot_type {
8 BOOT_BIOS = 'b', 8 BOOT_BIOS = 'b',
9#endif 9#endif
10 BOOT_ACPI = 'a', 10 BOOT_ACPI = 'a',
11 BOOT_EFI = 'e' 11 BOOT_EFI = 'e',
12 BOOT_CF9 = 'p',
13 BOOT_CF9_COND = 'q',
12}; 14};
13 15
14extern enum reboot_type reboot_type; 16extern enum reboot_type reboot_type;
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index 380f0b4f17ed..ba8423c5363f 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -9,31 +9,27 @@ static inline int apic_id_registered(void)
9 return (1); 9 return (1);
10} 10}
11 11
12static inline cpumask_t target_cpus(void) 12static inline const cpumask_t *target_cpus_cluster(void)
13{ 13{
14#if defined CONFIG_ES7000_CLUSTERED_APIC 14 return &CPU_MASK_ALL;
15 return CPU_MASK_ALL;
16#else
17 return cpumask_of_cpu(smp_processor_id());
18#endif
19} 15}
20 16
21#if defined CONFIG_ES7000_CLUSTERED_APIC 17static inline const cpumask_t *target_cpus(void)
22#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 18{
23#define INT_DELIVERY_MODE (dest_LowestPrio) 19 return &cpumask_of_cpu(smp_processor_id());
24#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ 20}
25#define NO_BALANCE_IRQ (1) 21
26#undef WAKE_SECONDARY_VIA_INIT 22#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER)
27#define WAKE_SECONDARY_VIA_MIP 23#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio)
28#else 24#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */
25#define NO_BALANCE_IRQ_CLUSTER (1)
26
29#define APIC_DFR_VALUE (APIC_DFR_FLAT) 27#define APIC_DFR_VALUE (APIC_DFR_FLAT)
30#define INT_DELIVERY_MODE (dest_Fixed) 28#define INT_DELIVERY_MODE (dest_Fixed)
31#define INT_DEST_MODE (0) /* phys delivery to target procs */ 29#define INT_DEST_MODE (0) /* phys delivery to target procs */
32#define NO_BALANCE_IRQ (0) 30#define NO_BALANCE_IRQ (0)
33#undef APIC_DEST_LOGICAL 31#undef APIC_DEST_LOGICAL
34#define APIC_DEST_LOGICAL 0x0 32#define APIC_DEST_LOGICAL 0x0
35#define WAKE_SECONDARY_VIA_INIT
36#endif
37 33
38static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 34static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
39{ 35{
@@ -60,6 +56,16 @@ static inline unsigned long calculate_ldr(int cpu)
60 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel 56 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
61 * document number 292116). So here it goes... 57 * document number 292116). So here it goes...
62 */ 58 */
59static inline void init_apic_ldr_cluster(void)
60{
61 unsigned long val;
62 int cpu = smp_processor_id();
63
64 apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER);
65 val = calculate_ldr(cpu);
66 apic_write(APIC_LDR, val);
67}
68
63static inline void init_apic_ldr(void) 69static inline void init_apic_ldr(void)
64{ 70{
65 unsigned long val; 71 unsigned long val;
@@ -70,17 +76,14 @@ static inline void init_apic_ldr(void)
70 apic_write(APIC_LDR, val); 76 apic_write(APIC_LDR, val);
71} 77}
72 78
73#ifndef CONFIG_X86_GENERICARCH
74extern void enable_apic_mode(void);
75#endif
76
77extern int apic_version [MAX_APICS]; 79extern int apic_version [MAX_APICS];
78static inline void setup_apic_routing(void) 80static inline void setup_apic_routing(void)
79{ 81{
80 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); 82 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
81 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", 83 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
82 (apic_version[apic] == 0x14) ? 84 (apic_version[apic] == 0x14) ?
83 "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]); 85 "Physical Cluster" : "Logical Cluster",
86 nr_ioapics, cpus_addr(*target_cpus())[0]);
84} 87}
85 88
86static inline int multi_timer_check(int apic, int irq) 89static inline int multi_timer_check(int apic, int irq)
@@ -98,7 +101,7 @@ static inline int cpu_present_to_apicid(int mps_cpu)
98{ 101{
99 if (!mps_cpu) 102 if (!mps_cpu)
100 return boot_cpu_physical_apicid; 103 return boot_cpu_physical_apicid;
101 else if (mps_cpu < NR_CPUS) 104 else if (mps_cpu < nr_cpu_ids)
102 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); 105 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
103 else 106 else
104 return BAD_APICID; 107 return BAD_APICID;
@@ -118,9 +121,9 @@ extern u8 cpu_2_logical_apicid[];
118static inline int cpu_to_logical_apicid(int cpu) 121static inline int cpu_to_logical_apicid(int cpu)
119{ 122{
120#ifdef CONFIG_SMP 123#ifdef CONFIG_SMP
121 if (cpu >= NR_CPUS) 124 if (cpu >= nr_cpu_ids)
122 return BAD_APICID; 125 return BAD_APICID;
123 return (int)cpu_2_logical_apicid[cpu]; 126 return (int)cpu_2_logical_apicid[cpu];
124#else 127#else
125 return logical_smp_processor_id(); 128 return logical_smp_processor_id();
126#endif 129#endif
@@ -144,16 +147,87 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
144 return (1); 147 return (1);
145} 148}
146 149
147static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 150static inline unsigned int
151cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
152{
153 int num_bits_set;
154 int cpus_found = 0;
155 int cpu;
156 int apicid;
157
158 num_bits_set = cpumask_weight(cpumask);
159 /* Return id to all */
160 if (num_bits_set == NR_CPUS)
161 return 0xFF;
162 /*
163 * The cpus in the mask must all be on the apic cluster. If are not
164 * on the same apicid cluster return default value of TARGET_CPUS.
165 */
166 cpu = cpumask_first(cpumask);
167 apicid = cpu_to_logical_apicid(cpu);
168 while (cpus_found < num_bits_set) {
169 if (cpumask_test_cpu(cpu, cpumask)) {
170 int new_apicid = cpu_to_logical_apicid(cpu);
171 if (apicid_cluster(apicid) !=
172 apicid_cluster(new_apicid)){
173 printk ("%s: Not a valid mask!\n", __func__);
174 return 0xFF;
175 }
176 apicid = new_apicid;
177 cpus_found++;
178 }
179 cpu++;
180 }
181 return apicid;
182}
183
184static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
148{ 185{
149 int num_bits_set; 186 int num_bits_set;
150 int cpus_found = 0; 187 int cpus_found = 0;
151 int cpu; 188 int cpu;
152 int apicid; 189 int apicid;
153 190
154 num_bits_set = cpus_weight(cpumask); 191 num_bits_set = cpus_weight(*cpumask);
155 /* Return id to all */ 192 /* Return id to all */
156 if (num_bits_set == NR_CPUS) 193 if (num_bits_set == NR_CPUS)
194 return cpu_to_logical_apicid(0);
195 /*
196 * The cpus in the mask must all be on the apic cluster. If are not
197 * on the same apicid cluster return default value of TARGET_CPUS.
198 */
199 cpu = first_cpu(*cpumask);
200 apicid = cpu_to_logical_apicid(cpu);
201 while (cpus_found < num_bits_set) {
202 if (cpu_isset(cpu, *cpumask)) {
203 int new_apicid = cpu_to_logical_apicid(cpu);
204 if (apicid_cluster(apicid) !=
205 apicid_cluster(new_apicid)){
206 printk ("%s: Not a valid mask!\n", __func__);
207 return cpu_to_logical_apicid(0);
208 }
209 apicid = new_apicid;
210 cpus_found++;
211 }
212 cpu++;
213 }
214 return apicid;
215}
216
217static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
218 const struct cpumask *andmask)
219{
220 int num_bits_set;
221 int num_bits_set2;
222 int cpus_found = 0;
223 int cpu;
224 int apicid = 0;
225
226 num_bits_set = cpumask_weight(cpumask);
227 num_bits_set2 = cpumask_weight(andmask);
228 num_bits_set = min(num_bits_set, num_bits_set2);
229 /* Return id to all */
230 if (num_bits_set >= nr_cpu_ids)
157#if defined CONFIG_ES7000_CLUSTERED_APIC 231#if defined CONFIG_ES7000_CLUSTERED_APIC
158 return 0xFF; 232 return 0xFF;
159#else 233#else
@@ -163,14 +237,17 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
163 * The cpus in the mask must all be on the apic cluster. If are not 237 * The cpus in the mask must all be on the apic cluster. If are not
164 * on the same apicid cluster return default value of TARGET_CPUS. 238 * on the same apicid cluster return default value of TARGET_CPUS.
165 */ 239 */
166 cpu = first_cpu(cpumask); 240 cpu = cpumask_first_and(cpumask, andmask);
167 apicid = cpu_to_logical_apicid(cpu); 241 apicid = cpu_to_logical_apicid(cpu);
242
168 while (cpus_found < num_bits_set) { 243 while (cpus_found < num_bits_set) {
169 if (cpu_isset(cpu, cpumask)) { 244 if (cpumask_test_cpu(cpu, cpumask) &&
245 cpumask_test_cpu(cpu, andmask)) {
170 int new_apicid = cpu_to_logical_apicid(cpu); 246 int new_apicid = cpu_to_logical_apicid(cpu);
171 if (apicid_cluster(apicid) != 247 if (apicid_cluster(apicid) !=
172 apicid_cluster(new_apicid)){ 248 apicid_cluster(new_apicid)) {
173 printk ("%s: Not a valid mask!\n", __func__); 249 printk(KERN_WARNING
250 "%s: Not a valid mask!\n", __func__);
174#if defined CONFIG_ES7000_CLUSTERED_APIC 251#if defined CONFIG_ES7000_CLUSTERED_APIC
175 return 0xFF; 252 return 0xFF;
176#else 253#else
diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h
index 632a955fcc0a..7e8ed24d4b8a 100644
--- a/arch/x86/include/asm/es7000/ipi.h
+++ b/arch/x86/include/asm/es7000/ipi.h
@@ -1,24 +1,22 @@
1#ifndef __ASM_ES7000_IPI_H 1#ifndef __ASM_ES7000_IPI_H
2#define __ASM_ES7000_IPI_H 2#define __ASM_ES7000_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
5void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
5 6
6static inline void send_IPI_mask(cpumask_t mask, int vector) 7static inline void send_IPI_mask(const struct cpumask *mask, int vector)
7{ 8{
8 send_IPI_mask_sequence(mask, vector); 9 send_IPI_mask_sequence(mask, vector);
9} 10}
10 11
11static inline void send_IPI_allbutself(int vector) 12static inline void send_IPI_allbutself(int vector)
12{ 13{
13 cpumask_t mask = cpu_online_map; 14 send_IPI_mask_allbutself(cpu_online_mask, vector);
14 cpu_clear(smp_processor_id(), mask);
15 if (!cpus_empty(mask))
16 send_IPI_mask(mask, vector);
17} 15}
18 16
19static inline void send_IPI_all(int vector) 17static inline void send_IPI_all(int vector)
20{ 18{
21 send_IPI_mask(cpu_online_map, vector); 19 send_IPI_mask(cpu_online_mask, vector);
22} 20}
23 21
24#endif /* __ASM_ES7000_IPI_H */ 22#endif /* __ASM_ES7000_IPI_H */
diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h
index 398493461913..78f0daaee436 100644
--- a/arch/x86/include/asm/es7000/wakecpu.h
+++ b/arch/x86/include/asm/es7000/wakecpu.h
@@ -1,36 +1,12 @@
1#ifndef __ASM_ES7000_WAKECPU_H 1#ifndef __ASM_ES7000_WAKECPU_H
2#define __ASM_ES7000_WAKECPU_H 2#define __ASM_ES7000_WAKECPU_H
3 3
4/* 4#define TRAMPOLINE_PHYS_LOW 0x467
5 * This file copes with machines that wakeup secondary CPUs by the 5#define TRAMPOLINE_PHYS_HIGH 0x469
6 * INIT, INIT, STARTUP sequence.
7 */
8
9#ifdef CONFIG_ES7000_CLUSTERED_APIC
10#define WAKE_SECONDARY_VIA_MIP
11#else
12#define WAKE_SECONDARY_VIA_INIT
13#endif
14
15#ifdef WAKE_SECONDARY_VIA_MIP
16extern int es7000_start_cpu(int cpu, unsigned long eip);
17static inline int
18wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
19{
20 int boot_error = 0;
21 boot_error = es7000_start_cpu(phys_apicid, start_eip);
22 return boot_error;
23}
24#endif
25
26#define TRAMPOLINE_LOW phys_to_virt(0x467)
27#define TRAMPOLINE_HIGH phys_to_virt(0x469)
28
29#define boot_cpu_apicid boot_cpu_physical_apicid
30 6
31static inline void wait_for_init_deassert(atomic_t *deassert) 7static inline void wait_for_init_deassert(atomic_t *deassert)
32{ 8{
33#ifdef WAKE_SECONDARY_VIA_INIT 9#ifndef CONFIG_ES7000_CLUSTERED_APIC
34 while (!atomic_read(deassert)) 10 while (!atomic_read(deassert))
35 cpu_relax(); 11 cpu_relax();
36#endif 12#endif
@@ -50,9 +26,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
50{ 26{
51} 27}
52 28
53#define inquire_remote_apic(apicid) do { \ 29extern void __inquire_remote_apic(int apicid);
54 if (apic_verbosity >= APIC_DEBUG) \ 30
55 __inquire_remote_apic(apicid); \ 31static inline void inquire_remote_apic(int apicid)
56 } while (0) 32{
33 if (apic_verbosity >= APIC_DEBUG)
34 __inquire_remote_apic(apicid);
35}
57 36
58#endif /* __ASM_MACH_WAKECPU_H */ 37#endif /* __ASM_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9e8bc29b8b17..7e61b4ceb9a4 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,8 +17,40 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
17 */ 17 */
18 return addr - 1; 18 return addr - 1;
19} 19}
20#endif
21 20
21#ifdef CONFIG_DYNAMIC_FTRACE
22
23struct dyn_arch_ftrace {
24 /* No extra data needed for x86 */
25};
26
27#endif /* CONFIG_DYNAMIC_FTRACE */
28#endif /* __ASSEMBLY__ */
22#endif /* CONFIG_FUNCTION_TRACER */ 29#endif /* CONFIG_FUNCTION_TRACER */
23 30
31#ifdef CONFIG_FUNCTION_GRAPH_TRACER
32
33#ifndef __ASSEMBLY__
34
35/*
36 * Stack of return addresses for functions
37 * of a thread.
38 * Used in struct thread_info
39 */
40struct ftrace_ret_stack {
41 unsigned long ret;
42 unsigned long func;
43 unsigned long long calltime;
44};
45
46/*
47 * Primary handler of a function return.
48 * It relays on ftrace_return_to_handler.
49 * Defined in entry32.S
50 */
51extern void return_to_handler(void);
52
53#endif /* __ASSEMBLY__ */
54#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
55
24#endif /* _ASM_X86_FTRACE_H */ 56#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 5cbd4fcc06fd..746f37a7963a 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -2,6 +2,7 @@
2#define _ASM_X86_GENAPIC_32_H 2#define _ASM_X86_GENAPIC_32_H
3 3
4#include <asm/mpspec.h> 4#include <asm/mpspec.h>
5#include <asm/atomic.h>
5 6
6/* 7/*
7 * Generic APIC driver interface. 8 * Generic APIC driver interface.
@@ -23,7 +24,7 @@ struct genapic {
23 int (*probe)(void); 24 int (*probe)(void);
24 25
25 int (*apic_id_registered)(void); 26 int (*apic_id_registered)(void);
26 cpumask_t (*target_cpus)(void); 27 const struct cpumask *(*target_cpus)(void);
27 int int_delivery_mode; 28 int int_delivery_mode;
28 int int_dest_mode; 29 int int_dest_mode;
29 int ESR_DISABLE; 30 int ESR_DISABLE;
@@ -56,15 +57,27 @@ struct genapic {
56 57
57 unsigned (*get_apic_id)(unsigned long x); 58 unsigned (*get_apic_id)(unsigned long x);
58 unsigned long apic_id_mask; 59 unsigned long apic_id_mask;
59 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); 60 unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
60 cpumask_t (*vector_allocation_domain)(int cpu); 61 unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
62 const struct cpumask *andmask);
63 void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
61 64
62#ifdef CONFIG_SMP 65#ifdef CONFIG_SMP
63 /* ipi */ 66 /* ipi */
64 void (*send_IPI_mask)(cpumask_t mask, int vector); 67 void (*send_IPI_mask)(const struct cpumask *mask, int vector);
68 void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
69 int vector);
65 void (*send_IPI_allbutself)(int vector); 70 void (*send_IPI_allbutself)(int vector);
66 void (*send_IPI_all)(int vector); 71 void (*send_IPI_all)(int vector);
67#endif 72#endif
73 int (*wakeup_cpu)(int apicid, unsigned long start_eip);
74 int trampoline_phys_low;
75 int trampoline_phys_high;
76 void (*wait_for_init_deassert)(atomic_t *deassert);
77 void (*smp_callin_clear_local_apic)(void);
78 void (*store_NMI_vector)(unsigned short *high, unsigned short *low);
79 void (*restore_NMI_vector)(unsigned short *high, unsigned short *low);
80 void (*inquire_remote_apic)(int apicid);
68}; 81};
69 82
70#define APICFUNC(x) .x = x, 83#define APICFUNC(x) .x = x,
@@ -105,16 +118,25 @@ struct genapic {
105 APICFUNC(get_apic_id) \ 118 APICFUNC(get_apic_id) \
106 .apic_id_mask = APIC_ID_MASK, \ 119 .apic_id_mask = APIC_ID_MASK, \
107 APICFUNC(cpu_mask_to_apicid) \ 120 APICFUNC(cpu_mask_to_apicid) \
108 APICFUNC(vector_allocation_domain) \ 121 APICFUNC(cpu_mask_to_apicid_and) \
122 APICFUNC(vector_allocation_domain) \
109 APICFUNC(acpi_madt_oem_check) \ 123 APICFUNC(acpi_madt_oem_check) \
110 IPIFUNC(send_IPI_mask) \ 124 IPIFUNC(send_IPI_mask) \
111 IPIFUNC(send_IPI_allbutself) \ 125 IPIFUNC(send_IPI_allbutself) \
112 IPIFUNC(send_IPI_all) \ 126 IPIFUNC(send_IPI_all) \
113 APICFUNC(enable_apic_mode) \ 127 APICFUNC(enable_apic_mode) \
114 APICFUNC(phys_pkg_id) \ 128 APICFUNC(phys_pkg_id) \
129 .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \
130 .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \
131 APICFUNC(wait_for_init_deassert) \
132 APICFUNC(smp_callin_clear_local_apic) \
133 APICFUNC(store_NMI_vector) \
134 APICFUNC(restore_NMI_vector) \
135 APICFUNC(inquire_remote_apic) \
115} 136}
116 137
117extern struct genapic *genapic; 138extern struct genapic *genapic;
139extern void es7000_update_genapic_to_cluster(void);
118 140
119enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; 141enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
120#define get_uv_system_type() UV_NONE 142#define get_uv_system_type() UV_NONE
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 13c4e96199ea..adf32fb56aa6 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_GENAPIC_64_H 1#ifndef _ASM_X86_GENAPIC_64_H
2#define _ASM_X86_GENAPIC_64_H 2#define _ASM_X86_GENAPIC_64_H
3 3
4#include <linux/cpumask.h>
5
4/* 6/*
5 * Copyright 2004 James Cleverdon, IBM. 7 * Copyright 2004 James Cleverdon, IBM.
6 * Subject to the GNU Public License, v.2 8 * Subject to the GNU Public License, v.2
@@ -18,20 +20,26 @@ struct genapic {
18 u32 int_delivery_mode; 20 u32 int_delivery_mode;
19 u32 int_dest_mode; 21 u32 int_dest_mode;
20 int (*apic_id_registered)(void); 22 int (*apic_id_registered)(void);
21 cpumask_t (*target_cpus)(void); 23 const struct cpumask *(*target_cpus)(void);
22 cpumask_t (*vector_allocation_domain)(int cpu); 24 void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
23 void (*init_apic_ldr)(void); 25 void (*init_apic_ldr)(void);
24 /* ipi */ 26 /* ipi */
25 void (*send_IPI_mask)(cpumask_t mask, int vector); 27 void (*send_IPI_mask)(const struct cpumask *mask, int vector);
28 void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
29 int vector);
26 void (*send_IPI_allbutself)(int vector); 30 void (*send_IPI_allbutself)(int vector);
27 void (*send_IPI_all)(int vector); 31 void (*send_IPI_all)(int vector);
28 void (*send_IPI_self)(int vector); 32 void (*send_IPI_self)(int vector);
29 /* */ 33 /* */
30 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); 34 unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
35 unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
36 const struct cpumask *andmask);
31 unsigned int (*phys_pkg_id)(int index_msb); 37 unsigned int (*phys_pkg_id)(int index_msb);
32 unsigned int (*get_apic_id)(unsigned long x); 38 unsigned int (*get_apic_id)(unsigned long x);
33 unsigned long (*set_apic_id)(unsigned int id); 39 unsigned long (*set_apic_id)(unsigned int id);
34 unsigned long apic_id_mask; 40 unsigned long apic_id_mask;
41 /* wakeup_secondary_cpu */
42 int (*wakeup_cpu)(int apicid, unsigned long start_eip);
35}; 43};
36 44
37extern struct genapic *genapic; 45extern struct genapic *genapic;
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 6afd9933a7dd..25d527ca1362 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -188,17 +188,14 @@ extern void restore_IO_APIC_setup(void);
188extern void reinit_intr_remapped_IO_APIC(int); 188extern void reinit_intr_remapped_IO_APIC(int);
189#endif 189#endif
190 190
191extern int probe_nr_irqs(void); 191extern void probe_nr_irqs_gsi(void);
192 192
193#else /* !CONFIG_X86_IO_APIC */ 193#else /* !CONFIG_X86_IO_APIC */
194#define io_apic_assign_pci_irqs 0 194#define io_apic_assign_pci_irqs 0
195static const int timer_through_8259 = 0; 195static const int timer_through_8259 = 0;
196static inline void ioapic_init_mappings(void) { } 196static inline void ioapic_init_mappings(void) { }
197 197
198static inline int probe_nr_irqs(void) 198static inline void probe_nr_irqs_gsi(void) { }
199{
200 return NR_IRQS;
201}
202#endif 199#endif
203 200
204#endif /* _ASM_X86_IO_APIC_H */ 201#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
new file mode 100644
index 000000000000..c1f06289b14b
--- /dev/null
+++ b/arch/x86/include/asm/iomap.h
@@ -0,0 +1,30 @@
1/*
2 * Copyright © 2008 Ingo Molnar
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
17 */
18
19#include <linux/fs.h>
20#include <linux/mm.h>
21#include <linux/uaccess.h>
22#include <asm/cacheflush.h>
23#include <asm/pgtable.h>
24#include <asm/tlbflush.h>
25
26void *
27iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
28
29void
30iounmap_atomic(void *kvaddr, enum km_type type);
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index e4a552d44465..0b500c5b6446 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -6,7 +6,6 @@ extern void no_iommu_init(void);
6extern struct dma_mapping_ops nommu_dma_ops; 6extern struct dma_mapping_ops nommu_dma_ops;
7extern int force_iommu, no_iommu; 7extern int force_iommu, no_iommu;
8extern int iommu_detected; 8extern int iommu_detected;
9extern int dmar_disabled;
10 9
11extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); 10extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
12 11
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index f89dffb28aa9..c745a306f7d3 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -117,7 +117,8 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
117 native_apic_mem_write(APIC_ICR, cfg); 117 native_apic_mem_write(APIC_ICR, cfg);
118} 118}
119 119
120static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) 120static inline void send_IPI_mask_sequence(const struct cpumask *mask,
121 int vector)
121{ 122{
122 unsigned long flags; 123 unsigned long flags;
123 unsigned long query_cpu; 124 unsigned long query_cpu;
@@ -128,11 +129,29 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
128 * - mbligh 129 * - mbligh
129 */ 130 */
130 local_irq_save(flags); 131 local_irq_save(flags);
131 for_each_cpu_mask_nr(query_cpu, mask) { 132 for_each_cpu(query_cpu, mask) {
132 __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), 133 __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
133 vector, APIC_DEST_PHYSICAL); 134 vector, APIC_DEST_PHYSICAL);
134 } 135 }
135 local_irq_restore(flags); 136 local_irq_restore(flags);
136} 137}
137 138
139static inline void send_IPI_mask_allbutself(const struct cpumask *mask,
140 int vector)
141{
142 unsigned long flags;
143 unsigned int query_cpu;
144 unsigned int this_cpu = smp_processor_id();
145
146 /* See Hack comment above */
147
148 local_irq_save(flags);
149 for_each_cpu(query_cpu, mask)
150 if (query_cpu != this_cpu)
151 __send_IPI_dest_field(
152 per_cpu(x86_cpu_to_apicid, query_cpu),
153 vector, APIC_DEST_PHYSICAL);
154 local_irq_restore(flags);
155}
156
138#endif /* _ASM_X86_IPI_H */ 157#endif /* _ASM_X86_IPI_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index bae0eda95486..8766d30fb746 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -37,7 +37,7 @@ extern int irqbalance_disable(char *str);
37 37
38#ifdef CONFIG_HOTPLUG_CPU 38#ifdef CONFIG_HOTPLUG_CPU
39#include <linux/cpumask.h> 39#include <linux/cpumask.h>
40extern void fixup_irqs(cpumask_t map); 40extern void fixup_irqs(void);
41#endif 41#endif
42 42
43extern unsigned int do_IRQ(struct pt_regs *regs); 43extern unsigned int do_IRQ(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 0005adb0f941..f7ff65032b9d 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -101,12 +101,23 @@
101#define LAST_VM86_IRQ 15 101#define LAST_VM86_IRQ 15
102#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) 102#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
103 103
104#define NR_IRQS_LEGACY 16
105
104#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) 106#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
107
108#ifndef CONFIG_SPARSE_IRQ
105# if NR_CPUS < MAX_IO_APICS 109# if NR_CPUS < MAX_IO_APICS
106# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) 110# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
107# else 111# else
108# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) 112# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
109# endif 113# endif
114#else
115# if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
116# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
117# else
118# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
119# endif
120#endif
110 121
111#elif defined(CONFIG_X86_VOYAGER) 122#elif defined(CONFIG_X86_VOYAGER)
112 123
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index ff3a6c236c00..8863d978cb96 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -8,12 +8,12 @@
8 8
9#define APIC_DFR_VALUE (APIC_DFR_FLAT) 9#define APIC_DFR_VALUE (APIC_DFR_FLAT)
10 10
11static inline cpumask_t target_cpus(void) 11static inline const struct cpumask *target_cpus(void)
12{ 12{
13#ifdef CONFIG_SMP 13#ifdef CONFIG_SMP
14 return cpu_online_map; 14 return cpu_online_mask;
15#else 15#else
16 return cpumask_of_cpu(0); 16 return cpumask_of(0);
17#endif 17#endif
18} 18}
19 19
@@ -28,15 +28,18 @@ static inline cpumask_t target_cpus(void)
28#define apic_id_registered (genapic->apic_id_registered) 28#define apic_id_registered (genapic->apic_id_registered)
29#define init_apic_ldr (genapic->init_apic_ldr) 29#define init_apic_ldr (genapic->init_apic_ldr)
30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) 30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
31#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
31#define phys_pkg_id (genapic->phys_pkg_id) 32#define phys_pkg_id (genapic->phys_pkg_id)
32#define vector_allocation_domain (genapic->vector_allocation_domain) 33#define vector_allocation_domain (genapic->vector_allocation_domain)
33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) 34#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
34#define send_IPI_self (genapic->send_IPI_self) 35#define send_IPI_self (genapic->send_IPI_self)
36#define wakeup_secondary_cpu (genapic->wakeup_cpu)
35extern void setup_apic_routing(void); 37extern void setup_apic_routing(void);
36#else 38#else
37#define INT_DELIVERY_MODE dest_LowestPrio 39#define INT_DELIVERY_MODE dest_LowestPrio
38#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ 40#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
39#define TARGET_CPUS (target_cpus()) 41#define TARGET_CPUS (target_cpus())
42#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init
40/* 43/*
41 * Set up the logical destination ID. 44 * Set up the logical destination ID.
42 * 45 *
@@ -59,9 +62,18 @@ static inline int apic_id_registered(void)
59 return physid_isset(read_apic_id(), phys_cpu_present_map); 62 return physid_isset(read_apic_id(), phys_cpu_present_map);
60} 63}
61 64
62static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 65static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask)
63{ 66{
64 return cpus_addr(cpumask)[0]; 67 return cpumask_bits(cpumask)[0];
68}
69
70static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
71 const struct cpumask *andmask)
72{
73 unsigned long mask1 = cpumask_bits(cpumask)[0];
74 unsigned long mask2 = cpumask_bits(andmask)[0];
75
76 return (unsigned int)(mask1 & mask2);
65} 77}
66 78
67static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) 79static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
@@ -86,7 +98,7 @@ static inline int apicid_to_node(int logical_apicid)
86#endif 98#endif
87} 99}
88 100
89static inline cpumask_t vector_allocation_domain(int cpu) 101static inline void vector_allocation_domain(int cpu, struct cpumask *retmask)
90{ 102{
91 /* Careful. Some cpus do not strictly honor the set of cpus 103 /* Careful. Some cpus do not strictly honor the set of cpus
92 * specified in the interrupt destination when using lowest 104 * specified in the interrupt destination when using lowest
@@ -96,8 +108,7 @@ static inline cpumask_t vector_allocation_domain(int cpu)
96 * deliver interrupts to the wrong hyperthread when only one 108 * deliver interrupts to the wrong hyperthread when only one
97 * hyperthread was specified in the interrupt desitination. 109 * hyperthread was specified in the interrupt desitination.
98 */ 110 */
99 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 111 *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
100 return domain;
101} 112}
102#endif 113#endif
103 114
@@ -129,7 +140,7 @@ static inline int cpu_to_logical_apicid(int cpu)
129 140
130static inline int cpu_present_to_apicid(int mps_cpu) 141static inline int cpu_present_to_apicid(int mps_cpu)
131{ 142{
132 if (mps_cpu < NR_CPUS && cpu_present(mps_cpu)) 143 if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
133 return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); 144 return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
134 else 145 else
135 return BAD_APICID; 146 return BAD_APICID;
diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h
index fabca01ebacf..191312d155da 100644
--- a/arch/x86/include/asm/mach-default/mach_ipi.h
+++ b/arch/x86/include/asm/mach-default/mach_ipi.h
@@ -4,7 +4,8 @@
4/* Avoid include hell */ 4/* Avoid include hell */
5#define NMI_VECTOR 0x02 5#define NMI_VECTOR 0x02
6 6
7void send_IPI_mask_bitmask(cpumask_t mask, int vector); 7void send_IPI_mask_bitmask(const struct cpumask *mask, int vector);
8void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
8void __send_IPI_shortcut(unsigned int shortcut, int vector); 9void __send_IPI_shortcut(unsigned int shortcut, int vector);
9 10
10extern int no_broadcast; 11extern int no_broadcast;
@@ -12,28 +13,27 @@ extern int no_broadcast;
12#ifdef CONFIG_X86_64 13#ifdef CONFIG_X86_64
13#include <asm/genapic.h> 14#include <asm/genapic.h>
14#define send_IPI_mask (genapic->send_IPI_mask) 15#define send_IPI_mask (genapic->send_IPI_mask)
16#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
15#else 17#else
16static inline void send_IPI_mask(cpumask_t mask, int vector) 18static inline void send_IPI_mask(const struct cpumask *mask, int vector)
17{ 19{
18 send_IPI_mask_bitmask(mask, vector); 20 send_IPI_mask_bitmask(mask, vector);
19} 21}
22void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
20#endif 23#endif
21 24
22static inline void __local_send_IPI_allbutself(int vector) 25static inline void __local_send_IPI_allbutself(int vector)
23{ 26{
24 if (no_broadcast || vector == NMI_VECTOR) { 27 if (no_broadcast || vector == NMI_VECTOR)
25 cpumask_t mask = cpu_online_map; 28 send_IPI_mask_allbutself(cpu_online_mask, vector);
26 29 else
27 cpu_clear(smp_processor_id(), mask);
28 send_IPI_mask(mask, vector);
29 } else
30 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); 30 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
31} 31}
32 32
33static inline void __local_send_IPI_all(int vector) 33static inline void __local_send_IPI_all(int vector)
34{ 34{
35 if (no_broadcast || vector == NMI_VECTOR) 35 if (no_broadcast || vector == NMI_VECTOR)
36 send_IPI_mask(cpu_online_map, vector); 36 send_IPI_mask(cpu_online_mask, vector);
37 else 37 else
38 __send_IPI_shortcut(APIC_DEST_ALLINC, vector); 38 __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
39} 39}
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h
index 9d80db91e992..ceb013660146 100644
--- a/arch/x86/include/asm/mach-default/mach_wakecpu.h
+++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h
@@ -1,17 +1,8 @@
1#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H 1#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
2#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H 2#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
3 3
4/* 4#define TRAMPOLINE_PHYS_LOW (0x467)
5 * This file copes with machines that wakeup secondary CPUs by the 5#define TRAMPOLINE_PHYS_HIGH (0x469)
6 * INIT, INIT, STARTUP sequence.
7 */
8
9#define WAKE_SECONDARY_VIA_INIT
10
11#define TRAMPOLINE_LOW phys_to_virt(0x467)
12#define TRAMPOLINE_HIGH phys_to_virt(0x469)
13
14#define boot_cpu_apicid boot_cpu_physical_apicid
15 6
16static inline void wait_for_init_deassert(atomic_t *deassert) 7static inline void wait_for_init_deassert(atomic_t *deassert)
17{ 8{
@@ -33,9 +24,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
33{ 24{
34} 25}
35 26
36#define inquire_remote_apic(apicid) do { \ 27extern void __inquire_remote_apic(int apicid);
37 if (apic_verbosity >= APIC_DEBUG) \ 28
38 __inquire_remote_apic(apicid); \ 29static inline void inquire_remote_apic(int apicid)
39 } while (0) 30{
31 if (apic_verbosity >= APIC_DEBUG)
32 __inquire_remote_apic(apicid);
33}
40 34
41#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ 35#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/mach-default/smpboot_hooks.h
index dbab36d64d48..23bf52103b89 100644
--- a/arch/x86/include/asm/mach-default/smpboot_hooks.h
+++ b/arch/x86/include/asm/mach-default/smpboot_hooks.h
@@ -13,9 +13,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
13 CMOS_WRITE(0xa, 0xf); 13 CMOS_WRITE(0xa, 0xf);
14 local_flush_tlb(); 14 local_flush_tlb();
15 pr_debug("1.\n"); 15 pr_debug("1.\n");
16 *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; 16 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
17 start_eip >> 4;
17 pr_debug("2.\n"); 18 pr_debug("2.\n");
18 *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; 19 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
20 start_eip & 0xf;
19 pr_debug("3.\n"); 21 pr_debug("3.\n");
20} 22}
21 23
@@ -32,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
32 */ 34 */
33 CMOS_WRITE(0, 0xf); 35 CMOS_WRITE(0, 0xf);
34 36
35 *((volatile long *) phys_to_virt(0x467)) = 0; 37 *((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
36} 38}
37 39
38static inline void __init smpboot_setup_io_apic(void) 40static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
index 5180bd7478fb..48553e958ad5 100644
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -24,9 +24,11 @@
24#define check_phys_apicid_present (genapic->check_phys_apicid_present) 24#define check_phys_apicid_present (genapic->check_phys_apicid_present)
25#define check_apicid_used (genapic->check_apicid_used) 25#define check_apicid_used (genapic->check_apicid_used)
26#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) 26#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
27#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
27#define vector_allocation_domain (genapic->vector_allocation_domain) 28#define vector_allocation_domain (genapic->vector_allocation_domain)
28#define enable_apic_mode (genapic->enable_apic_mode) 29#define enable_apic_mode (genapic->enable_apic_mode)
29#define phys_pkg_id (genapic->phys_pkg_id) 30#define phys_pkg_id (genapic->phys_pkg_id)
31#define wakeup_secondary_cpu (genapic->wakeup_cpu)
30 32
31extern void generic_bigsmp_probe(void); 33extern void generic_bigsmp_probe(void);
32 34
diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
new file mode 100644
index 000000000000..1ab16b168c8a
--- /dev/null
+++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
@@ -0,0 +1,12 @@
1#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
2#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
3
4#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low)
5#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high)
6#define wait_for_init_deassert (genapic->wait_for_init_deassert)
7#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic)
8#define store_NMI_vector (genapic->store_NMI_vector)
9#define restore_NMI_vector (genapic->restore_NMI_vector)
10#define inquire_remote_apic (genapic->inquire_remote_apic)
11
12#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 485bdf059ffb..07f1af494ca5 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -34,10 +34,14 @@ static inline void get_memcfg_numa(void)
34 34
35extern int early_pfn_to_nid(unsigned long pfn); 35extern int early_pfn_to_nid(unsigned long pfn);
36 36
37extern void resume_map_numa_kva(pgd_t *pgd);
38
37#else /* !CONFIG_NUMA */ 39#else /* !CONFIG_NUMA */
38 40
39#define get_memcfg_numa get_memcfg_numa_flat 41#define get_memcfg_numa get_memcfg_numa_flat
40 42
43static inline void resume_map_numa_kva(pgd_t *pgd) {}
44
41#endif /* CONFIG_NUMA */ 45#endif /* CONFIG_NUMA */
42 46
43#ifdef CONFIG_DISCONTIGMEM 47#ifdef CONFIG_DISCONTIGMEM
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index 0bf2a06b7a4e..c80f00d29965 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -7,9 +7,9 @@
7 7
8#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 8#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
9 9
10static inline cpumask_t target_cpus(void) 10static inline const cpumask_t *target_cpus(void)
11{ 11{
12 return CPU_MASK_ALL; 12 return &CPU_MASK_ALL;
13} 13}
14 14
15#define NO_BALANCE_IRQ (1) 15#define NO_BALANCE_IRQ (1)
@@ -122,7 +122,13 @@ static inline void enable_apic_mode(void)
122 * We use physical apicids here, not logical, so just return the default 122 * We use physical apicids here, not logical, so just return the default
123 * physical broadcast to stop people from breaking us 123 * physical broadcast to stop people from breaking us
124 */ 124 */
125static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 125static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
126{
127 return (int) 0xF;
128}
129
130static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
131 const struct cpumask *andmask)
126{ 132{
127 return (int) 0xF; 133 return (int) 0xF;
128} 134}
diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h
index 935588d286cf..a8374c652778 100644
--- a/arch/x86/include/asm/numaq/ipi.h
+++ b/arch/x86/include/asm/numaq/ipi.h
@@ -1,25 +1,22 @@
1#ifndef __ASM_NUMAQ_IPI_H 1#ifndef __ASM_NUMAQ_IPI_H
2#define __ASM_NUMAQ_IPI_H 2#define __ASM_NUMAQ_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t, int vector); 4void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
5void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
5 6
6static inline void send_IPI_mask(cpumask_t mask, int vector) 7static inline void send_IPI_mask(const struct cpumask *mask, int vector)
7{ 8{
8 send_IPI_mask_sequence(mask, vector); 9 send_IPI_mask_sequence(mask, vector);
9} 10}
10 11
11static inline void send_IPI_allbutself(int vector) 12static inline void send_IPI_allbutself(int vector)
12{ 13{
13 cpumask_t mask = cpu_online_map; 14 send_IPI_mask_allbutself(cpu_online_mask, vector);
14 cpu_clear(smp_processor_id(), mask);
15
16 if (!cpus_empty(mask))
17 send_IPI_mask(mask, vector);
18} 15}
19 16
20static inline void send_IPI_all(int vector) 17static inline void send_IPI_all(int vector)
21{ 18{
22 send_IPI_mask(cpu_online_map, vector); 19 send_IPI_mask(cpu_online_mask, vector);
23} 20}
24 21
25#endif /* __ASM_NUMAQ_IPI_H */ 22#endif /* __ASM_NUMAQ_IPI_H */
diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h
index c577bda5b1c5..6f499df8eddb 100644
--- a/arch/x86/include/asm/numaq/wakecpu.h
+++ b/arch/x86/include/asm/numaq/wakecpu.h
@@ -3,12 +3,8 @@
3 3
4/* This file copes with machines that wakeup secondary CPUs by NMIs */ 4/* This file copes with machines that wakeup secondary CPUs by NMIs */
5 5
6#define WAKE_SECONDARY_VIA_NMI 6#define TRAMPOLINE_PHYS_LOW (0x8)
7 7#define TRAMPOLINE_PHYS_HIGH (0xa)
8#define TRAMPOLINE_LOW phys_to_virt(0x8)
9#define TRAMPOLINE_HIGH phys_to_virt(0xa)
10
11#define boot_cpu_apicid boot_cpu_logical_apicid
12 8
13/* We don't do anything here because we use NMI's to boot instead */ 9/* We don't do anything here because we use NMI's to boot instead */
14static inline void wait_for_init_deassert(atomic_t *deassert) 10static inline void wait_for_init_deassert(atomic_t *deassert)
@@ -27,17 +23,23 @@ static inline void smp_callin_clear_local_apic(void)
27static inline void store_NMI_vector(unsigned short *high, unsigned short *low) 23static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
28{ 24{
29 printk("Storing NMI vector\n"); 25 printk("Storing NMI vector\n");
30 *high = *((volatile unsigned short *) TRAMPOLINE_HIGH); 26 *high =
31 *low = *((volatile unsigned short *) TRAMPOLINE_LOW); 27 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH));
28 *low =
29 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW));
32} 30}
33 31
34static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) 32static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
35{ 33{
36 printk("Restoring NMI vector\n"); 34 printk("Restoring NMI vector\n");
37 *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high; 35 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
38 *((volatile unsigned short *) TRAMPOLINE_LOW) = *low; 36 *high;
37 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
38 *low;
39} 39}
40 40
41#define inquire_remote_apic(apicid) {} 41static inline void inquire_remote_apic(int apicid)
42{
43}
42 44
43#endif /* __ASM_NUMAQ_WAKECPU_H */ 45#endif /* __ASM_NUMAQ_WAKECPU_H */
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
index 5b28995d664e..d02d936840a3 100644
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -34,8 +34,6 @@ extern void pci_iommu_alloc(void);
34 */ 34 */
35#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) 35#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
36 36
37#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU)
38
39#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ 37#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
40 dma_addr_t ADDR_NAME; 38 dma_addr_t ADDR_NAME;
41#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ 39#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
@@ -49,18 +47,6 @@ extern void pci_iommu_alloc(void);
49#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ 47#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
50 (((PTR)->LEN_NAME) = (VAL)) 48 (((PTR)->LEN_NAME) = (VAL))
51 49
52#else
53/* No IOMMU */
54
55#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
56#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
57#define pci_unmap_addr(PTR, ADDR_NAME) (0)
58#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
59#define pci_unmap_len(PTR, LEN_NAME) (0)
60#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
61
62#endif
63
64#endif /* __KERNEL__ */ 50#endif /* __KERNEL__ */
65 51
66#endif /* _ASM_X86_PCI_64_H */ 52#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index d1531c8480b7..eefb0594b058 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -271,8 +271,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
271extern int do_set_thread_area(struct task_struct *p, int idx, 271extern int do_set_thread_area(struct task_struct *p, int idx,
272 struct user_desc __user *info, int can_allocate); 272 struct user_desc __user *info, int can_allocate);
273 273
274#define __ARCH_WANT_COMPAT_SYS_PTRACE
275
276#endif /* __KERNEL__ */ 274#endif /* __KERNEL__ */
277 275
278#endif /* !__ASSEMBLY__ */ 276#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index f12d37237465..294daeb3a006 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -16,6 +16,8 @@ static inline void visws_early_detect(void) { }
16static inline int is_visws_box(void) { return 0; } 16static inline int is_visws_box(void) { return 0; }
17#endif 17#endif
18 18
19extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
20extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
19/* 21/*
20 * Any setup quirks to be performed? 22 * Any setup quirks to be performed?
21 */ 23 */
@@ -39,6 +41,7 @@ struct x86_quirks {
39 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, 41 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
40 unsigned short oemsize); 42 unsigned short oemsize);
41 int (*setup_ioapic_ids)(void); 43 int (*setup_ioapic_ids)(void);
44 int (*update_genapic)(void);
42}; 45};
43 46
44extern struct x86_quirks *x86_quirks; 47extern struct x86_quirks *x86_quirks;
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index d12811ce51d9..830b9fcb6427 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -60,7 +60,7 @@ struct smp_ops {
60 void (*cpu_die)(unsigned int cpu); 60 void (*cpu_die)(unsigned int cpu);
61 void (*play_dead)(void); 61 void (*play_dead)(void);
62 62
63 void (*send_call_func_ipi)(cpumask_t mask); 63 void (*send_call_func_ipi)(const struct cpumask *mask);
64 void (*send_call_func_single_ipi)(int cpu); 64 void (*send_call_func_single_ipi)(int cpu);
65}; 65};
66 66
@@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu)
125 125
126static inline void arch_send_call_function_ipi(cpumask_t mask) 126static inline void arch_send_call_function_ipi(cpumask_t mask)
127{ 127{
128 smp_ops.send_call_func_ipi(mask); 128 smp_ops.send_call_func_ipi(&mask);
129} 129}
130 130
131void cpu_disable_common(void); 131void cpu_disable_common(void);
@@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu);
138void native_play_dead(void); 138void native_play_dead(void);
139void play_dead_common(void); 139void play_dead_common(void);
140 140
141void native_send_call_func_ipi(cpumask_t mask); 141void native_send_call_func_ipi(const struct cpumask *mask);
142void native_send_call_func_single_ipi(int cpu); 142void native_send_call_func_single_ipi(int cpu);
143 143
144extern void prefill_possible_map(void); 144extern void prefill_possible_map(void);
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 9b3070f1c2ac..651a93849341 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -14,13 +14,13 @@
14 14
15#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 15#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
16 16
17static inline cpumask_t target_cpus(void) 17static inline const cpumask_t *target_cpus(void)
18{ 18{
19 /* CPU_MASK_ALL (0xff) has undefined behaviour with 19 /* CPU_MASK_ALL (0xff) has undefined behaviour with
20 * dest_LowestPrio mode logical clustered apic interrupt routing 20 * dest_LowestPrio mode logical clustered apic interrupt routing
21 * Just start on cpu 0. IRQ balancing will spread load 21 * Just start on cpu 0. IRQ balancing will spread load
22 */ 22 */
23 return cpumask_of_cpu(0); 23 return &cpumask_of_cpu(0);
24} 24}
25 25
26#define INT_DELIVERY_MODE (dest_LowestPrio) 26#define INT_DELIVERY_MODE (dest_LowestPrio)
@@ -137,14 +137,14 @@ static inline void enable_apic_mode(void)
137{ 137{
138} 138}
139 139
140static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 140static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
141{ 141{
142 int num_bits_set; 142 int num_bits_set;
143 int cpus_found = 0; 143 int cpus_found = 0;
144 int cpu; 144 int cpu;
145 int apicid; 145 int apicid;
146 146
147 num_bits_set = cpus_weight(cpumask); 147 num_bits_set = cpus_weight(*cpumask);
148 /* Return id to all */ 148 /* Return id to all */
149 if (num_bits_set == NR_CPUS) 149 if (num_bits_set == NR_CPUS)
150 return (int) 0xFF; 150 return (int) 0xFF;
@@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
152 * The cpus in the mask must all be on the apic cluster. If are not 152 * The cpus in the mask must all be on the apic cluster. If are not
153 * on the same apicid cluster return default value of TARGET_CPUS. 153 * on the same apicid cluster return default value of TARGET_CPUS.
154 */ 154 */
155 cpu = first_cpu(cpumask); 155 cpu = first_cpu(*cpumask);
156 apicid = cpu_to_logical_apicid(cpu); 156 apicid = cpu_to_logical_apicid(cpu);
157 while (cpus_found < num_bits_set) { 157 while (cpus_found < num_bits_set) {
158 if (cpu_isset(cpu, cpumask)) { 158 if (cpu_isset(cpu, *cpumask)) {
159 int new_apicid = cpu_to_logical_apicid(cpu); 159 int new_apicid = cpu_to_logical_apicid(cpu);
160 if (apicid_cluster(apicid) != 160 if (apicid_cluster(apicid) !=
161 apicid_cluster(new_apicid)){ 161 apicid_cluster(new_apicid)){
@@ -170,6 +170,45 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
170 return apicid; 170 return apicid;
171} 171}
172 172
173static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
174 const struct cpumask *andmask)
175{
176 int num_bits_set;
177 int num_bits_set2;
178 int cpus_found = 0;
179 int cpu;
180 int apicid = 0;
181
182 num_bits_set = cpumask_weight(cpumask);
183 num_bits_set2 = cpumask_weight(andmask);
184 num_bits_set = min(num_bits_set, num_bits_set2);
185 /* Return id to all */
186 if (num_bits_set >= nr_cpu_ids)
187 return 0xFF;
188 /*
189 * The cpus in the mask must all be on the apic cluster. If are not
190 * on the same apicid cluster return default value of TARGET_CPUS.
191 */
192 cpu = cpumask_first_and(cpumask, andmask);
193 apicid = cpu_to_logical_apicid(cpu);
194 while (cpus_found < num_bits_set) {
195 if (cpumask_test_cpu(cpu, cpumask)
196 && cpumask_test_cpu(cpu, andmask)) {
197 int new_apicid = cpu_to_logical_apicid(cpu);
198 if (apicid_cluster(apicid) !=
199 apicid_cluster(new_apicid)) {
200 printk(KERN_WARNING
201 "%s: Not a valid mask!\n", __func__);
202 return 0xFF;
203 }
204 apicid = apicid | new_apicid;
205 cpus_found++;
206 }
207 cpu++;
208 }
209 return apicid;
210}
211
173/* cpuid returns the value latched in the HW at reset, not the APIC ID 212/* cpuid returns the value latched in the HW at reset, not the APIC ID
174 * register's value. For any box whose BIOS changes APIC IDs, like 213 * register's value. For any box whose BIOS changes APIC IDs, like
175 * clustered APIC systems, we must use hard_smp_processor_id. 214 * clustered APIC systems, we must use hard_smp_processor_id.
diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h
index 53bd1e7bd7b4..a8a2c24f50cc 100644
--- a/arch/x86/include/asm/summit/ipi.h
+++ b/arch/x86/include/asm/summit/ipi.h
@@ -1,9 +1,10 @@
1#ifndef __ASM_SUMMIT_IPI_H 1#ifndef __ASM_SUMMIT_IPI_H
2#define __ASM_SUMMIT_IPI_H 2#define __ASM_SUMMIT_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
5void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
5 6
6static inline void send_IPI_mask(cpumask_t mask, int vector) 7static inline void send_IPI_mask(const cpumask_t *mask, int vector)
7{ 8{
8 send_IPI_mask_sequence(mask, vector); 9 send_IPI_mask_sequence(mask, vector);
9} 10}
@@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector)
14 cpu_clear(smp_processor_id(), mask); 15 cpu_clear(smp_processor_id(), mask);
15 16
16 if (!cpus_empty(mask)) 17 if (!cpus_empty(mask))
17 send_IPI_mask(mask, vector); 18 send_IPI_mask(&mask, vector);
18} 19}
19 20
20static inline void send_IPI_all(int vector) 21static inline void send_IPI_all(int vector)
21{ 22{
22 send_IPI_mask(cpu_online_map, vector); 23 send_IPI_mask(&cpu_online_map, vector);
23} 24}
24 25
25#endif /* __ASM_SUMMIT_IPI_H */ 26#endif /* __ASM_SUMMIT_IPI_H */
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 2ed3f0f44ff7..07c3e4048991 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -314,6 +314,8 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
314 314
315void default_idle(void); 315void default_idle(void);
316 316
317void stop_this_cpu(void *dummy);
318
317/* 319/*
318 * Force strict CPU ordering. 320 * Force strict CPU ordering.
319 * And yes, this is required on UP too when we're talking 321 * And yes, this is required on UP too when we're talking
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e44d379faad2..0921b4018c11 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -20,6 +20,8 @@
20struct task_struct; 20struct task_struct;
21struct exec_domain; 21struct exec_domain;
22#include <asm/processor.h> 22#include <asm/processor.h>
23#include <asm/ftrace.h>
24#include <asm/atomic.h>
23 25
24struct thread_info { 26struct thread_info {
25 struct task_struct *task; /* main task structure */ 27 struct task_struct *task; /* main task structure */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4850e4b02b61..79e31e9dcdda 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -226,6 +226,8 @@ extern cpumask_t cpu_coregroup_map(int cpu);
226#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) 226#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
227#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) 227#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
228#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) 228#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
229#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu))
230#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
229 231
230/* indicates that pointers to the topology cpumask_t maps are valid */ 232/* indicates that pointers to the topology cpumask_t maps are valid */
231#define arch_provides_topology_pointers yes 233#define arch_provides_topology_pointers yes
@@ -239,7 +241,7 @@ struct pci_bus;
239void set_pci_bus_resources_arch_default(struct pci_bus *b); 241void set_pci_bus_resources_arch_default(struct pci_bus *b);
240 242
241#ifdef CONFIG_SMP 243#ifdef CONFIG_SMP
242#define mc_capable() (boot_cpu_data.x86_max_cores > 1) 244#define mc_capable() (cpus_weight(per_cpu(cpu_core_map, 0)) != nr_cpu_ids)
243#define smt_capable() (smp_num_siblings > 1) 245#define smt_capable() (smp_num_siblings > 1)
244#endif 246#endif
245 247
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 35c54921b2e4..99192bb55a53 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -157,6 +157,7 @@ extern int __get_user_bad(void);
157 int __ret_gu; \ 157 int __ret_gu; \
158 unsigned long __val_gu; \ 158 unsigned long __val_gu; \
159 __chk_user_ptr(ptr); \ 159 __chk_user_ptr(ptr); \
160 might_fault(); \
160 switch (sizeof(*(ptr))) { \ 161 switch (sizeof(*(ptr))) { \
161 case 1: \ 162 case 1: \
162 __get_user_x(1, __ret_gu, __val_gu, ptr); \ 163 __get_user_x(1, __ret_gu, __val_gu, ptr); \
@@ -241,6 +242,7 @@ extern void __put_user_8(void);
241 int __ret_pu; \ 242 int __ret_pu; \
242 __typeof__(*(ptr)) __pu_val; \ 243 __typeof__(*(ptr)) __pu_val; \
243 __chk_user_ptr(ptr); \ 244 __chk_user_ptr(ptr); \
245 might_fault(); \
244 __pu_val = x; \ 246 __pu_val = x; \
245 switch (sizeof(*(ptr))) { \ 247 switch (sizeof(*(ptr))) { \
246 case 1: \ 248 case 1: \
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index d095a3aeea1b..5e06259e90e5 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
82static __always_inline unsigned long __must_check 82static __always_inline unsigned long __must_check
83__copy_to_user(void __user *to, const void *from, unsigned long n) 83__copy_to_user(void __user *to, const void *from, unsigned long n)
84{ 84{
85 might_sleep(); 85 might_fault();
86 return __copy_to_user_inatomic(to, from, n); 86 return __copy_to_user_inatomic(to, from, n);
87} 87}
88 88
89static __always_inline unsigned long 89static __always_inline unsigned long
@@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
137static __always_inline unsigned long 137static __always_inline unsigned long
138__copy_from_user(void *to, const void __user *from, unsigned long n) 138__copy_from_user(void *to, const void __user *from, unsigned long n)
139{ 139{
140 might_sleep(); 140 might_fault();
141 if (__builtin_constant_p(n)) { 141 if (__builtin_constant_p(n)) {
142 unsigned long ret; 142 unsigned long ret;
143 143
@@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
159static __always_inline unsigned long __copy_from_user_nocache(void *to, 159static __always_inline unsigned long __copy_from_user_nocache(void *to,
160 const void __user *from, unsigned long n) 160 const void __user *from, unsigned long n)
161{ 161{
162 might_sleep(); 162 might_fault();
163 if (__builtin_constant_p(n)) { 163 if (__builtin_constant_p(n)) {
164 unsigned long ret; 164 unsigned long ret;
165 165
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 664f15280f14..84210c479fca 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -29,6 +29,8 @@ static __always_inline __must_check
29int __copy_from_user(void *dst, const void __user *src, unsigned size) 29int __copy_from_user(void *dst, const void __user *src, unsigned size)
30{ 30{
31 int ret = 0; 31 int ret = 0;
32
33 might_fault();
32 if (!__builtin_constant_p(size)) 34 if (!__builtin_constant_p(size))
33 return copy_user_generic(dst, (__force void *)src, size); 35 return copy_user_generic(dst, (__force void *)src, size);
34 switch (size) { 36 switch (size) {
@@ -46,7 +48,7 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size)
46 return ret; 48 return ret;
47 case 10: 49 case 10:
48 __get_user_asm(*(u64 *)dst, (u64 __user *)src, 50 __get_user_asm(*(u64 *)dst, (u64 __user *)src,
49 ret, "q", "", "=r", 16); 51 ret, "q", "", "=r", 10);
50 if (unlikely(ret)) 52 if (unlikely(ret))
51 return ret; 53 return ret;
52 __get_user_asm(*(u16 *)(8 + (char *)dst), 54 __get_user_asm(*(u16 *)(8 + (char *)dst),
@@ -71,6 +73,8 @@ static __always_inline __must_check
71int __copy_to_user(void __user *dst, const void *src, unsigned size) 73int __copy_to_user(void __user *dst, const void *src, unsigned size)
72{ 74{
73 int ret = 0; 75 int ret = 0;
76
77 might_fault();
74 if (!__builtin_constant_p(size)) 78 if (!__builtin_constant_p(size))
75 return copy_user_generic((__force void *)dst, src, size); 79 return copy_user_generic((__force void *)dst, src, size);
76 switch (size) { 80 switch (size) {
@@ -113,6 +117,8 @@ static __always_inline __must_check
113int __copy_in_user(void __user *dst, const void __user *src, unsigned size) 117int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
114{ 118{
115 int ret = 0; 119 int ret = 0;
120
121 might_fault();
116 if (!__builtin_constant_p(size)) 122 if (!__builtin_constant_p(size))
117 return copy_user_generic((__force void *)dst, 123 return copy_user_generic((__force void *)dst,
118 (__force void *)src, size); 124 (__force void *)src, size);
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 834b2c1d89fb..d2e415e6666f 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
639__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) 639__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
640#define __NR_timerfd_gettime 287 640#define __NR_timerfd_gettime 287
641__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) 641__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
642#define __NR_paccept 288 642#define __NR_accept4 288
643__SYSCALL(__NR_paccept, sys_paccept) 643__SYSCALL(__NR_accept4, sys_accept4)
644#define __NR_signalfd4 289 644#define __NR_signalfd4 289
645__SYSCALL(__NR_signalfd4, sys_signalfd4) 645__SYSCALL(__NR_signalfd4, sys_signalfd4)
646#define __NR_eventfd2 290 646#define __NR_eventfd2 290
diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h
index b7c0dea119fe..61e08c0a2907 100644
--- a/arch/x86/include/asm/vmi.h
+++ b/arch/x86/include/asm/vmi.h
@@ -223,9 +223,15 @@ struct pci_header {
223} __attribute__((packed)); 223} __attribute__((packed));
224 224
225/* Function prototypes for bootstrapping */ 225/* Function prototypes for bootstrapping */
226#ifdef CONFIG_VMI
226extern void vmi_init(void); 227extern void vmi_init(void);
228extern void vmi_activate(void);
227extern void vmi_bringup(void); 229extern void vmi_bringup(void);
228extern void vmi_apply_boot_page_allocations(void); 230#else
231static inline void vmi_init(void) {}
232static inline void vmi_activate(void) {}
233static inline void vmi_bringup(void) {}
234#endif
229 235
230/* State needed to start an application processor in an SMP system. */ 236/* State needed to start an application processor in an SMP system. */
231struct vmi_ap_state { 237struct vmi_ap_state {
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e489ff9cb3e2..1cad9318d217 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -25,7 +25,7 @@ CFLAGS_tsc.o := $(nostackp)
25 25
26obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o 26obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
27obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 27obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
28obj-y += time_$(BITS).o ioport.o ldt.o 28obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
29obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 29obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
30obj-$(CONFIG_X86_VISWS) += visws_quirks.o 30obj-$(CONFIG_X86_VISWS) += visws_quirks.o
31obj-$(CONFIG_X86_32) += probe_roms_32.o 31obj-$(CONFIG_X86_32) += probe_roms_32.o
@@ -41,7 +41,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
41obj-y += process.o 41obj-y += process.o
42obj-y += i387.o xsave.o 42obj-y += i387.o xsave.o
43obj-y += ptrace.o 43obj-y += ptrace.o
44obj-y += ds.o 44obj-$(CONFIG_X86_DS) += ds.o
45obj-$(CONFIG_X86_32) += tls.o 45obj-$(CONFIG_X86_32) += tls.o
46obj-$(CONFIG_IA32_EMULATION) += tls.o 46obj-$(CONFIG_IA32_EMULATION) += tls.o
47obj-y += step.o 47obj-y += step.o
@@ -65,6 +65,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
65obj-$(CONFIG_X86_IO_APIC) += io_apic.o 65obj-$(CONFIG_X86_IO_APIC) += io_apic.o
66obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 66obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
67obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 67obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
68obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
68obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 69obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 70obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 71obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8c1f76abae9e..65d0b72777ea 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1343,7 +1343,6 @@ static void __init acpi_process_madt(void)
1343 error = acpi_parse_madt_ioapic_entries(); 1343 error = acpi_parse_madt_ioapic_entries();
1344 if (!error) { 1344 if (!error) {
1345 acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; 1345 acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
1346 acpi_irq_balance_set(NULL);
1347 acpi_ioapic = 1; 1346 acpi_ioapic = 1;
1348 1347
1349 smp_found_config = 1; 1348 smp_found_config = 1;
@@ -1361,6 +1360,17 @@ static void __init acpi_process_madt(void)
1361 disable_acpi(); 1360 disable_acpi();
1362 } 1361 }
1363 } 1362 }
1363
1364 /*
1365 * ACPI supports both logical (e.g. Hyper-Threading) and physical
1366 * processors, where MPS only supports physical.
1367 */
1368 if (acpi_lapic && acpi_ioapic)
1369 printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
1370 "information\n");
1371 else if (acpi_lapic)
1372 printk(KERN_INFO "Using ACPI for processor (LAPIC) "
1373 "configuration information\n");
1364#endif 1374#endif
1365 return; 1375 return;
1366} 1376}
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 331b318304eb..a7b6dec6fc3f 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -187,6 +187,8 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
187 187
188 spin_lock_irqsave(&iommu->lock, flags); 188 spin_lock_irqsave(&iommu->lock, flags);
189 ret = __iommu_queue_command(iommu, cmd); 189 ret = __iommu_queue_command(iommu, cmd);
190 if (!ret)
191 iommu->need_sync = 1;
190 spin_unlock_irqrestore(&iommu->lock, flags); 192 spin_unlock_irqrestore(&iommu->lock, flags);
191 193
192 return ret; 194 return ret;
@@ -210,10 +212,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
210 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; 212 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
211 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); 213 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
212 214
213 iommu->need_sync = 0;
214
215 spin_lock_irqsave(&iommu->lock, flags); 215 spin_lock_irqsave(&iommu->lock, flags);
216 216
217 if (!iommu->need_sync)
218 goto out;
219
220 iommu->need_sync = 0;
221
217 ret = __iommu_queue_command(iommu, &cmd); 222 ret = __iommu_queue_command(iommu, &cmd);
218 223
219 if (ret) 224 if (ret)
@@ -254,8 +259,6 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
254 259
255 ret = iommu_queue_command(iommu, &cmd); 260 ret = iommu_queue_command(iommu, &cmd);
256 261
257 iommu->need_sync = 1;
258
259 return ret; 262 return ret;
260} 263}
261 264
@@ -281,8 +284,6 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
281 284
282 ret = iommu_queue_command(iommu, &cmd); 285 ret = iommu_queue_command(iommu, &cmd);
283 286
284 iommu->need_sync = 1;
285
286 return ret; 287 return ret;
287} 288}
288 289
@@ -343,7 +344,7 @@ static int iommu_map(struct protection_domain *dom,
343 u64 __pte, *pte, *page; 344 u64 __pte, *pte, *page;
344 345
345 bus_addr = PAGE_ALIGN(bus_addr); 346 bus_addr = PAGE_ALIGN(bus_addr);
346 phys_addr = PAGE_ALIGN(bus_addr); 347 phys_addr = PAGE_ALIGN(phys_addr);
347 348
348 /* only support 512GB address spaces for now */ 349 /* only support 512GB address spaces for now */
349 if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) 350 if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
@@ -537,7 +538,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
537 address >>= PAGE_SHIFT; 538 address >>= PAGE_SHIFT;
538 iommu_area_free(dom->bitmap, address, pages); 539 iommu_area_free(dom->bitmap, address, pages);
539 540
540 if (address + pages >= dom->next_bit) 541 if (address >= dom->next_bit)
541 dom->need_flush = true; 542 dom->need_flush = true;
542} 543}
543 544
@@ -599,7 +600,7 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
599 continue; 600 continue;
600 601
601 p2 = IOMMU_PTE_PAGE(p1[i]); 602 p2 = IOMMU_PTE_PAGE(p1[i]);
602 for (j = 0; j < 512; ++i) { 603 for (j = 0; j < 512; ++j) {
603 if (!IOMMU_PTE_PRESENT(p2[j])) 604 if (!IOMMU_PTE_PRESENT(p2[j]))
604 continue; 605 continue;
605 p3 = IOMMU_PTE_PAGE(p2[j]); 606 p3 = IOMMU_PTE_PAGE(p2[j]);
@@ -762,8 +763,6 @@ static void set_device_domain(struct amd_iommu *iommu,
762 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 763 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
763 764
764 iommu_queue_inv_dev_entry(iommu, devid); 765 iommu_queue_inv_dev_entry(iommu, devid);
765
766 iommu->need_sync = 1;
767} 766}
768 767
769/***************************************************************************** 768/*****************************************************************************
@@ -858,6 +857,9 @@ static int get_device_resources(struct device *dev,
858 print_devid(_bdf, 1); 857 print_devid(_bdf, 1);
859 } 858 }
860 859
860 if (domain_for_device(_bdf) == NULL)
861 set_device_domain(*iommu, *domain, _bdf);
862
861 return 1; 863 return 1;
862} 864}
863 865
@@ -908,7 +910,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
908 if (address >= dom->aperture_size) 910 if (address >= dom->aperture_size)
909 return; 911 return;
910 912
911 WARN_ON(address & 0xfffULL || address > dom->aperture_size); 913 WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
912 914
913 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; 915 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
914 pte += IOMMU_PTE_L0_INDEX(address); 916 pte += IOMMU_PTE_L0_INDEX(address);
@@ -920,8 +922,8 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
920 922
921/* 923/*
922 * This function contains common code for mapping of a physically 924 * This function contains common code for mapping of a physically
923 * contiguous memory region into DMA address space. It is uses by all 925 * contiguous memory region into DMA address space. It is used by all
924 * mapping functions provided by this IOMMU driver. 926 * mapping functions provided with this IOMMU driver.
925 * Must be called with the domain lock held. 927 * Must be called with the domain lock held.
926 */ 928 */
927static dma_addr_t __map_single(struct device *dev, 929static dma_addr_t __map_single(struct device *dev,
@@ -981,7 +983,8 @@ static void __unmap_single(struct amd_iommu *iommu,
981 dma_addr_t i, start; 983 dma_addr_t i, start;
982 unsigned int pages; 984 unsigned int pages;
983 985
984 if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) 986 if ((dma_addr == bad_dma_address) ||
987 (dma_addr + size > dma_dom->aperture_size))
985 return; 988 return;
986 989
987 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 990 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
@@ -1031,8 +1034,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1031 if (addr == bad_dma_address) 1034 if (addr == bad_dma_address)
1032 goto out; 1035 goto out;
1033 1036
1034 if (unlikely(iommu->need_sync)) 1037 iommu_completion_wait(iommu);
1035 iommu_completion_wait(iommu);
1036 1038
1037out: 1039out:
1038 spin_unlock_irqrestore(&domain->lock, flags); 1040 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1060,8 +1062,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
1060 1062
1061 __unmap_single(iommu, domain->priv, dma_addr, size, dir); 1063 __unmap_single(iommu, domain->priv, dma_addr, size, dir);
1062 1064
1063 if (unlikely(iommu->need_sync)) 1065 iommu_completion_wait(iommu);
1064 iommu_completion_wait(iommu);
1065 1066
1066 spin_unlock_irqrestore(&domain->lock, flags); 1067 spin_unlock_irqrestore(&domain->lock, flags);
1067} 1068}
@@ -1127,8 +1128,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1127 goto unmap; 1128 goto unmap;
1128 } 1129 }
1129 1130
1130 if (unlikely(iommu->need_sync)) 1131 iommu_completion_wait(iommu);
1131 iommu_completion_wait(iommu);
1132 1132
1133out: 1133out:
1134 spin_unlock_irqrestore(&domain->lock, flags); 1134 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1173,8 +1173,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1173 s->dma_address = s->dma_length = 0; 1173 s->dma_address = s->dma_length = 0;
1174 } 1174 }
1175 1175
1176 if (unlikely(iommu->need_sync)) 1176 iommu_completion_wait(iommu);
1177 iommu_completion_wait(iommu);
1178 1177
1179 spin_unlock_irqrestore(&domain->lock, flags); 1178 spin_unlock_irqrestore(&domain->lock, flags);
1180} 1179}
@@ -1225,8 +1224,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
1225 goto out; 1224 goto out;
1226 } 1225 }
1227 1226
1228 if (unlikely(iommu->need_sync)) 1227 iommu_completion_wait(iommu);
1229 iommu_completion_wait(iommu);
1230 1228
1231out: 1229out:
1232 spin_unlock_irqrestore(&domain->lock, flags); 1230 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1257,8 +1255,7 @@ static void free_coherent(struct device *dev, size_t size,
1257 1255
1258 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 1256 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
1259 1257
1260 if (unlikely(iommu->need_sync)) 1258 iommu_completion_wait(iommu);
1261 iommu_completion_wait(iommu);
1262 1259
1263 spin_unlock_irqrestore(&domain->lock, flags); 1260 spin_unlock_irqrestore(&domain->lock, flags);
1264 1261
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 0cdcda35a05f..30ae2701b3df 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -121,7 +121,7 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
121LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 121LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
122 we find in ACPI */ 122 we find in ACPI */
123unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 123unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
124int amd_iommu_isolate; /* if 1, device isolation is enabled */ 124int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */
125bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 125bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
126 126
127LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 127LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -1213,7 +1213,9 @@ static int __init parse_amd_iommu_options(char *str)
1213 for (; *str; ++str) { 1213 for (; *str; ++str) {
1214 if (strncmp(str, "isolate", 7) == 0) 1214 if (strncmp(str, "isolate", 7) == 0)
1215 amd_iommu_isolate = 1; 1215 amd_iommu_isolate = 1;
1216 if (strncmp(str, "fullflush", 11) == 0) 1216 if (strncmp(str, "share", 5) == 0)
1217 amd_iommu_isolate = 0;
1218 if (strncmp(str, "fullflush", 9) == 0)
1217 amd_iommu_unmap_flush = true; 1219 amd_iommu_unmap_flush = true;
1218 } 1220 }
1219 1221
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 04a7f960bbc0..edda4c00e3d2 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -141,7 +141,7 @@ static int lapic_next_event(unsigned long delta,
141 struct clock_event_device *evt); 141 struct clock_event_device *evt);
142static void lapic_timer_setup(enum clock_event_mode mode, 142static void lapic_timer_setup(enum clock_event_mode mode,
143 struct clock_event_device *evt); 143 struct clock_event_device *evt);
144static void lapic_timer_broadcast(cpumask_t mask); 144static void lapic_timer_broadcast(const cpumask_t *mask);
145static void apic_pm_activate(void); 145static void apic_pm_activate(void);
146 146
147/* 147/*
@@ -453,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
453/* 453/*
454 * Local APIC timer broadcast function 454 * Local APIC timer broadcast function
455 */ 455 */
456static void lapic_timer_broadcast(cpumask_t mask) 456static void lapic_timer_broadcast(const cpumask_t *mask)
457{ 457{
458#ifdef CONFIG_SMP 458#ifdef CONFIG_SMP
459 send_IPI_mask(mask, LOCAL_TIMER_VECTOR); 459 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
@@ -469,7 +469,7 @@ static void __cpuinit setup_APIC_timer(void)
469 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 469 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
470 470
471 memcpy(levt, &lapic_clockevent, sizeof(*levt)); 471 memcpy(levt, &lapic_clockevent, sizeof(*levt));
472 levt->cpumask = cpumask_of_cpu(smp_processor_id()); 472 levt->cpumask = cpumask_of(smp_processor_id());
473 473
474 clockevents_register_device(levt); 474 clockevents_register_device(levt);
475} 475}
@@ -1315,7 +1315,7 @@ void enable_x2apic(void)
1315 } 1315 }
1316} 1316}
1317 1317
1318void enable_IR_x2apic(void) 1318void __init enable_IR_x2apic(void)
1319{ 1319{
1320#ifdef CONFIG_INTR_REMAP 1320#ifdef CONFIG_INTR_REMAP
1321 int ret; 1321 int ret;
@@ -1903,8 +1903,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
1903 } 1903 }
1904#endif 1904#endif
1905 1905
1906 cpu_set(cpu, cpu_possible_map); 1906 set_cpu_possible(cpu, true);
1907 cpu_set(cpu, cpu_present_map); 1907 set_cpu_present(cpu, true);
1908} 1908}
1909 1909
1910#ifdef CONFIG_X86_64 1910#ifdef CONFIG_X86_64
@@ -2106,7 +2106,7 @@ __cpuinit int apic_is_clustered_box(void)
2106 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); 2106 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
2107 bitmap_zero(clustermap, NUM_APIC_CLUSTERS); 2107 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
2108 2108
2109 for (i = 0; i < NR_CPUS; i++) { 2109 for (i = 0; i < nr_cpu_ids; i++) {
2110 /* are we being called early in kernel startup? */ 2110 /* are we being called early in kernel startup? */
2111 if (bios_cpu_apicid) { 2111 if (bios_cpu_apicid) {
2112 id = bios_cpu_apicid[i]; 2112 id = bios_cpu_apicid[i];
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 5145a6e72bbb..3a26525a3f31 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -391,11 +391,7 @@ static int power_off;
391#else 391#else
392static int power_off = 1; 392static int power_off = 1;
393#endif 393#endif
394#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
395static int realmode_power_off = 1;
396#else
397static int realmode_power_off; 394static int realmode_power_off;
398#endif
399#ifdef CONFIG_APM_ALLOW_INTS 395#ifdef CONFIG_APM_ALLOW_INTS
400static int allow_ints = 1; 396static int allow_ints = 1;
401#else 397#else
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8e48c5d4467d..88ea02dcb622 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
33#include <linux/cpufreq.h> 33#include <linux/cpufreq.h>
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <linux/ftrace.h>
36 37
37#include <linux/acpi.h> 38#include <linux/acpi.h>
38#include <acpi/processor.h> 39#include <acpi/processor.h>
@@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
391 unsigned int next_perf_state = 0; /* Index into perf table */ 392 unsigned int next_perf_state = 0; /* Index into perf table */
392 unsigned int i; 393 unsigned int i;
393 int result = 0; 394 int result = 0;
395 struct power_trace it;
394 396
395 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); 397 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
396 398
@@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
427 } 429 }
428 } 430 }
429 431
432 trace_power_mark(&it, POWER_PSTATE, next_perf_state);
433
430 switch (data->cpu_feature) { 434 switch (data->cpu_feature) {
431 case SYSTEM_INTEL_MSR_CAPABLE: 435 case SYSTEM_INTEL_MSR_CAPABLE:
432 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 436 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index d3dcd58b87cd..7f05f44b97e9 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -115,9 +115,20 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
115 u32 i = 0; 115 u32 i = 0;
116 116
117 if (cpu_family == CPU_HW_PSTATE) { 117 if (cpu_family == CPU_HW_PSTATE) {
118 rdmsr(MSR_PSTATE_STATUS, lo, hi); 118 if (data->currpstate == HW_PSTATE_INVALID) {
119 i = lo & HW_PSTATE_MASK; 119 /* read (initial) hw pstate if not yet set */
120 data->currpstate = i; 120 rdmsr(MSR_PSTATE_STATUS, lo, hi);
121 i = lo & HW_PSTATE_MASK;
122
123 /*
124 * a workaround for family 11h erratum 311 might cause
125 * an "out-of-range Pstate if the core is in Pstate-0
126 */
127 if (i >= data->numps)
128 data->currpstate = HW_PSTATE_0;
129 else
130 data->currpstate = i;
131 }
121 return 0; 132 return 0;
122 } 133 }
123 do { 134 do {
@@ -1121,6 +1132,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1121 } 1132 }
1122 1133
1123 data->cpu = pol->cpu; 1134 data->cpu = pol->cpu;
1135 data->currpstate = HW_PSTATE_INVALID;
1124 1136
1125 if (powernow_k8_cpu_init_acpi(data)) { 1137 if (powernow_k8_cpu_init_acpi(data)) {
1126 /* 1138 /*
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index ab48cfed4d96..65cfb5d7f77f 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -5,6 +5,19 @@
5 * http://www.gnu.org/licenses/gpl.html 5 * http://www.gnu.org/licenses/gpl.html
6 */ 6 */
7 7
8
9enum pstate {
10 HW_PSTATE_INVALID = 0xff,
11 HW_PSTATE_0 = 0,
12 HW_PSTATE_1 = 1,
13 HW_PSTATE_2 = 2,
14 HW_PSTATE_3 = 3,
15 HW_PSTATE_4 = 4,
16 HW_PSTATE_5 = 5,
17 HW_PSTATE_6 = 6,
18 HW_PSTATE_7 = 7,
19};
20
8struct powernow_k8_data { 21struct powernow_k8_data {
9 unsigned int cpu; 22 unsigned int cpu;
10 23
@@ -23,7 +36,9 @@ struct powernow_k8_data {
23 u32 exttype; /* extended interface = 1 */ 36 u32 exttype; /* extended interface = 1 */
24 37
25 /* keep track of the current fid / vid or pstate */ 38 /* keep track of the current fid / vid or pstate */
26 u32 currvid, currfid, currpstate; 39 u32 currvid;
40 u32 currfid;
41 enum pstate currpstate;
27 42
28 /* the powernow_table includes all frequency and vid/fid pairings: 43 /* the powernow_table includes all frequency and vid/fid pairings:
29 * fid are the lower 8 bits of the index, vid are the upper 8 bits. 44 * fid are the lower 8 bits of the index, vid are the upper 8 bits.
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d55..816f27f289b1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -307,12 +307,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
307 set_cpu_cap(c, X86_FEATURE_P4); 307 set_cpu_cap(c, X86_FEATURE_P4);
308 if (c->x86 == 6) 308 if (c->x86 == 6)
309 set_cpu_cap(c, X86_FEATURE_P3); 309 set_cpu_cap(c, X86_FEATURE_P3);
310#endif
310 311
311 if (cpu_has_bts) 312 if (cpu_has_bts)
312 ptrace_bts_init_intel(c); 313 ptrace_bts_init_intel(c);
313 314
314#endif
315
316 detect_extended_topology(c); 315 detect_extended_topology(c);
317 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { 316 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
318 /* 317 /*
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 3f46afbb1cf1..fb7f946cb65e 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
534 per_cpu(cpuid4_info, cpu) = NULL; 534 per_cpu(cpuid4_info, cpu) = NULL;
535} 535}
536 536
537static int __cpuinit detect_cache_attributes(unsigned int cpu) 537static void get_cpu_leaves(void *_retval)
538{ 538{
539 struct _cpuid4_info *this_leaf; 539 int j, *retval = _retval, cpu = smp_processor_id();
540 unsigned long j;
541 int retval;
542 cpumask_t oldmask;
543
544 if (num_cache_leaves == 0)
545 return -ENOENT;
546
547 per_cpu(cpuid4_info, cpu) = kzalloc(
548 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
549 if (per_cpu(cpuid4_info, cpu) == NULL)
550 return -ENOMEM;
551
552 oldmask = current->cpus_allowed;
553 retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
554 if (retval)
555 goto out;
556 540
557 /* Do cpuid and store the results */ 541 /* Do cpuid and store the results */
558 for (j = 0; j < num_cache_leaves; j++) { 542 for (j = 0; j < num_cache_leaves; j++) {
543 struct _cpuid4_info *this_leaf;
559 this_leaf = CPUID4_INFO_IDX(cpu, j); 544 this_leaf = CPUID4_INFO_IDX(cpu, j);
560 retval = cpuid4_cache_lookup(j, this_leaf); 545 *retval = cpuid4_cache_lookup(j, this_leaf);
561 if (unlikely(retval < 0)) { 546 if (unlikely(*retval < 0)) {
562 int i; 547 int i;
563 548
564 for (i = 0; i < j; i++) 549 for (i = 0; i < j; i++)
@@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
567 } 552 }
568 cache_shared_cpu_map_setup(cpu, j); 553 cache_shared_cpu_map_setup(cpu, j);
569 } 554 }
570 set_cpus_allowed_ptr(current, &oldmask); 555}
556
557static int __cpuinit detect_cache_attributes(unsigned int cpu)
558{
559 int retval;
560
561 if (num_cache_leaves == 0)
562 return -ENOENT;
563
564 per_cpu(cpuid4_info, cpu) = kzalloc(
565 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
566 if (per_cpu(cpuid4_info, cpu) == NULL)
567 return -ENOMEM;
571 568
572out: 569 smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
573 if (retval) { 570 if (retval) {
574 kfree(per_cpu(cpuid4_info, cpu)); 571 kfree(per_cpu(cpuid4_info, cpu));
575 per_cpu(cpuid4_info, cpu) = NULL; 572 per_cpu(cpuid4_info, cpu) = NULL;
@@ -626,8 +623,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
626 cpumask_t *mask = &this_leaf->shared_cpu_map; 623 cpumask_t *mask = &this_leaf->shared_cpu_map;
627 624
628 n = type? 625 n = type?
629 cpulist_scnprintf(buf, len-2, *mask): 626 cpulist_scnprintf(buf, len-2, mask) :
630 cpumask_scnprintf(buf, len-2, *mask); 627 cpumask_scnprintf(buf, len-2, mask);
631 buf[n++] = '\n'; 628 buf[n++] = '\n';
632 buf[n] = '\0'; 629 buf[n] = '\0';
633 } 630 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 5eb390a4b2e9..a1de80f368f1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
83 * CPU Initialization 83 * CPU Initialization
84 */ 84 */
85 85
86struct thresh_restart {
87 struct threshold_block *b;
88 int reset;
89 u16 old_limit;
90};
91
86/* must be called with correct cpu affinity */ 92/* must be called with correct cpu affinity */
87static void threshold_restart_bank(struct threshold_block *b, 93static long threshold_restart_bank(void *_tr)
88 int reset, u16 old_limit)
89{ 94{
95 struct thresh_restart *tr = _tr;
90 u32 mci_misc_hi, mci_misc_lo; 96 u32 mci_misc_hi, mci_misc_lo;
91 97
92 rdmsr(b->address, mci_misc_lo, mci_misc_hi); 98 rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
93 99
94 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 100 if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
95 reset = 1; /* limit cannot be lower than err count */ 101 tr->reset = 1; /* limit cannot be lower than err count */
96 102
97 if (reset) { /* reset err count and overflow bit */ 103 if (tr->reset) { /* reset err count and overflow bit */
98 mci_misc_hi = 104 mci_misc_hi =
99 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | 105 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
100 (THRESHOLD_MAX - b->threshold_limit); 106 (THRESHOLD_MAX - tr->b->threshold_limit);
101 } else if (old_limit) { /* change limit w/o reset */ 107 } else if (tr->old_limit) { /* change limit w/o reset */
102 int new_count = (mci_misc_hi & THRESHOLD_MAX) + 108 int new_count = (mci_misc_hi & THRESHOLD_MAX) +
103 (old_limit - b->threshold_limit); 109 (tr->old_limit - tr->b->threshold_limit);
104 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | 110 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
105 (new_count & THRESHOLD_MAX); 111 (new_count & THRESHOLD_MAX);
106 } 112 }
107 113
108 b->interrupt_enable ? 114 tr->b->interrupt_enable ?
109 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 115 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
110 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 116 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
111 117
112 mci_misc_hi |= MASK_COUNT_EN_HI; 118 mci_misc_hi |= MASK_COUNT_EN_HI;
113 wrmsr(b->address, mci_misc_lo, mci_misc_hi); 119 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
120 return 0;
114} 121}
115 122
116/* cpu init entry point, called from mce.c with preempt off */ 123/* cpu init entry point, called from mce.c with preempt off */
@@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
120 unsigned int cpu = smp_processor_id(); 127 unsigned int cpu = smp_processor_id();
121 u8 lvt_off; 128 u8 lvt_off;
122 u32 low = 0, high = 0, address = 0; 129 u32 low = 0, high = 0, address = 0;
130 struct thresh_restart tr;
123 131
124 for (bank = 0; bank < NR_BANKS; ++bank) { 132 for (bank = 0; bank < NR_BANKS; ++bank) {
125 for (block = 0; block < NR_BLOCKS; ++block) { 133 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
162 wrmsr(address, low, high); 170 wrmsr(address, low, high);
163 171
164 threshold_defaults.address = address; 172 threshold_defaults.address = address;
165 threshold_restart_bank(&threshold_defaults, 0, 0); 173 tr.b = &threshold_defaults;
174 tr.reset = 0;
175 tr.old_limit = 0;
176 threshold_restart_bank(&tr);
166 } 177 }
167 } 178 }
168} 179}
@@ -251,20 +262,6 @@ struct threshold_attr {
251 ssize_t(*store) (struct threshold_block *, const char *, size_t count); 262 ssize_t(*store) (struct threshold_block *, const char *, size_t count);
252}; 263};
253 264
254static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
255 cpumask_t *newmask)
256{
257 *oldmask = current->cpus_allowed;
258 cpus_clear(*newmask);
259 cpu_set(cpu, *newmask);
260 set_cpus_allowed_ptr(current, newmask);
261}
262
263static void affinity_restore(const cpumask_t *oldmask)
264{
265 set_cpus_allowed_ptr(current, oldmask);
266}
267
268#define SHOW_FIELDS(name) \ 265#define SHOW_FIELDS(name) \
269static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ 266static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
270{ \ 267{ \
@@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
277 const char *buf, size_t count) 274 const char *buf, size_t count)
278{ 275{
279 char *end; 276 char *end;
280 cpumask_t oldmask, newmask; 277 struct thresh_restart tr;
281 unsigned long new = simple_strtoul(buf, &end, 0); 278 unsigned long new = simple_strtoul(buf, &end, 0);
282 if (end == buf) 279 if (end == buf)
283 return -EINVAL; 280 return -EINVAL;
284 b->interrupt_enable = !!new; 281 b->interrupt_enable = !!new;
285 282
286 affinity_set(b->cpu, &oldmask, &newmask); 283 tr.b = b;
287 threshold_restart_bank(b, 0, 0); 284 tr.reset = 0;
288 affinity_restore(&oldmask); 285 tr.old_limit = 0;
286 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
289 287
290 return end - buf; 288 return end - buf;
291} 289}
@@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
294 const char *buf, size_t count) 292 const char *buf, size_t count)
295{ 293{
296 char *end; 294 char *end;
297 cpumask_t oldmask, newmask; 295 struct thresh_restart tr;
298 u16 old;
299 unsigned long new = simple_strtoul(buf, &end, 0); 296 unsigned long new = simple_strtoul(buf, &end, 0);
300 if (end == buf) 297 if (end == buf)
301 return -EINVAL; 298 return -EINVAL;
@@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
303 new = THRESHOLD_MAX; 300 new = THRESHOLD_MAX;
304 if (new < 1) 301 if (new < 1)
305 new = 1; 302 new = 1;
306 old = b->threshold_limit; 303 tr.old_limit = b->threshold_limit;
307 b->threshold_limit = new; 304 b->threshold_limit = new;
305 tr.b = b;
306 tr.reset = 0;
308 307
309 affinity_set(b->cpu, &oldmask, &newmask); 308 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
310 threshold_restart_bank(b, 0, old);
311 affinity_restore(&oldmask);
312 309
313 return end - buf; 310 return end - buf;
314} 311}
315 312
316static ssize_t show_error_count(struct threshold_block *b, char *buf) 313static long local_error_count(void *_b)
317{ 314{
318 u32 high, low; 315 struct threshold_block *b = _b;
319 cpumask_t oldmask, newmask; 316 u32 low, high;
320 affinity_set(b->cpu, &oldmask, &newmask); 317
321 rdmsr(b->address, low, high); 318 rdmsr(b->address, low, high);
322 affinity_restore(&oldmask); 319 return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
323 return sprintf(buf, "%x\n", 320}
324 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); 321
322static ssize_t show_error_count(struct threshold_block *b, char *buf)
323{
324 return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
325} 325}
326 326
327static ssize_t store_error_count(struct threshold_block *b, 327static ssize_t store_error_count(struct threshold_block *b,
328 const char *buf, size_t count) 328 const char *buf, size_t count)
329{ 329{
330 cpumask_t oldmask, newmask; 330 struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
331 affinity_set(b->cpu, &oldmask, &newmask); 331
332 threshold_restart_bank(b, 1, 0); 332 work_on_cpu(b->cpu, threshold_restart_bank, &tr);
333 affinity_restore(&oldmask);
334 return 1; 333 return 1;
335} 334}
336 335
@@ -463,12 +462,19 @@ out_free:
463 return err; 462 return err;
464} 463}
465 464
465static long local_allocate_threshold_blocks(void *_bank)
466{
467 unsigned int *bank = _bank;
468
469 return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
470 MSR_IA32_MC0_MISC + *bank * 4);
471}
472
466/* symlinks sibling shared banks to first core. first core owns dir/files. */ 473/* symlinks sibling shared banks to first core. first core owns dir/files. */
467static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) 474static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
468{ 475{
469 int i, err = 0; 476 int i, err = 0;
470 struct threshold_bank *b = NULL; 477 struct threshold_bank *b = NULL;
471 cpumask_t oldmask, newmask;
472 char name[32]; 478 char name[32];
473 479
474 sprintf(name, "threshold_bank%i", bank); 480 sprintf(name, "threshold_bank%i", bank);
@@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
519 525
520 per_cpu(threshold_banks, cpu)[bank] = b; 526 per_cpu(threshold_banks, cpu)[bank] = b;
521 527
522 affinity_set(cpu, &oldmask, &newmask); 528 err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
523 err = allocate_threshold_blocks(cpu, bank, 0,
524 MSR_IA32_MC0_MISC + bank * 4);
525 affinity_restore(&oldmask);
526
527 if (err) 529 if (err)
528 goto out_free; 530 goto out_free;
529 531
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 2b69994fd3a8..19a8c2c0389f 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -7,13 +7,12 @@
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It assumes:
15 * - get_task_struct on all parameter tasks 14 * - get_task_struct on all traced tasks
16 * - current is allowed to trace parameter tasks 15 * - current is allowed to trace tasks
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -21,8 +20,6 @@
21 */ 20 */
22 21
23 22
24#ifdef CONFIG_X86_DS
25
26#include <asm/ds.h> 23#include <asm/ds.h>
27 24
28#include <linux/errno.h> 25#include <linux/errno.h>
@@ -30,6 +27,7 @@
30#include <linux/slab.h> 27#include <linux/slab.h>
31#include <linux/sched.h> 28#include <linux/sched.h>
32#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/kernel.h>
33 31
34 32
35/* 33/*
@@ -46,6 +44,33 @@ struct ds_configuration {
46}; 44};
47static struct ds_configuration ds_cfg; 45static struct ds_configuration ds_cfg;
48 46
47/*
48 * A BTS or PEBS tracer.
49 *
50 * This holds the configuration of the tracer and serves as a handle
51 * to identify tracers.
52 */
53struct ds_tracer {
54 /* the DS context (partially) owned by this tracer */
55 struct ds_context *context;
56 /* the buffer provided on ds_request() and its size in bytes */
57 void *buffer;
58 size_t size;
59};
60
61struct bts_tracer {
62 /* the common DS part */
63 struct ds_tracer ds;
64 /* buffer overflow notification function */
65 bts_ovfl_callback_t ovfl;
66};
67
68struct pebs_tracer {
69 /* the common DS part */
70 struct ds_tracer ds;
71 /* buffer overflow notification function */
72 pebs_ovfl_callback_t ovfl;
73};
49 74
50/* 75/*
51 * Debug Store (DS) save area configuration (see Intel64 and IA32 76 * Debug Store (DS) save area configuration (see Intel64 and IA32
@@ -109,34 +134,13 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
109 (*(unsigned long *)base) = value; 134 (*(unsigned long *)base) = value;
110} 135}
111 136
137#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
112 138
113/*
114 * Locking is done only for allocating BTS or PEBS resources and for
115 * guarding context and buffer memory allocation.
116 *
117 * Most functions require the current task to own the ds context part
118 * they are going to access. All the locking is done when validating
119 * access to the context.
120 */
121static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
122 139
123/* 140/*
124 * Validate that the current task is allowed to access the BTS/PEBS 141 * Locking is done only for allocating BTS or PEBS resources.
125 * buffer of the parameter task.
126 *
127 * Returns 0, if access is granted; -Eerrno, otherwise.
128 */ 142 */
129static inline int ds_validate_access(struct ds_context *context, 143static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
130 enum ds_qualifier qual)
131{
132 if (!context)
133 return -EPERM;
134
135 if (context->owner[qual] == current)
136 return 0;
137
138 return -EPERM;
139}
140 144
141 145
142/* 146/*
@@ -185,80 +189,43 @@ static inline int check_tracer(struct task_struct *task)
185 * 189 *
186 * Contexts are use-counted. They are allocated on first access and 190 * Contexts are use-counted. They are allocated on first access and
187 * deallocated when the last user puts the context. 191 * deallocated when the last user puts the context.
188 *
189 * We distinguish between an allocating and a non-allocating get of a
190 * context:
191 * - the allocating get is used for requesting BTS/PEBS resources. It
192 * requires the caller to hold the global ds_lock.
193 * - the non-allocating get is used for all other cases. A
194 * non-existing context indicates an error. It acquires and releases
195 * the ds_lock itself for obtaining the context.
196 *
197 * A context and its DS configuration are allocated and deallocated
198 * together. A context always has a DS configuration of the
199 * appropriate size.
200 */ 192 */
201static DEFINE_PER_CPU(struct ds_context *, system_context); 193static DEFINE_PER_CPU(struct ds_context *, system_context);
202 194
203#define this_system_context per_cpu(system_context, smp_processor_id()) 195#define this_system_context per_cpu(system_context, smp_processor_id())
204 196
205/*
206 * Returns the pointer to the parameter task's context or to the
207 * system-wide context, if task is NULL.
208 *
209 * Increases the use count of the returned context, if not NULL.
210 */
211static inline struct ds_context *ds_get_context(struct task_struct *task) 197static inline struct ds_context *ds_get_context(struct task_struct *task)
212{ 198{
213 struct ds_context *context;
214
215 spin_lock(&ds_lock);
216
217 context = (task ? task->thread.ds_ctx : this_system_context);
218 if (context)
219 context->count++;
220
221 spin_unlock(&ds_lock);
222
223 return context;
224}
225
226/*
227 * Same as ds_get_context, but allocates the context and it's DS
228 * structure, if necessary; returns NULL; if out of memory.
229 *
230 * pre: requires ds_lock to be held
231 */
232static inline struct ds_context *ds_alloc_context(struct task_struct *task)
233{
234 struct ds_context **p_context = 199 struct ds_context **p_context =
235 (task ? &task->thread.ds_ctx : &this_system_context); 200 (task ? &task->thread.ds_ctx : &this_system_context);
236 struct ds_context *context = *p_context; 201 struct ds_context *context = *p_context;
202 unsigned long irq;
237 203
238 if (!context) { 204 if (!context) {
239 context = kzalloc(sizeof(*context), GFP_KERNEL); 205 context = kzalloc(sizeof(*context), GFP_KERNEL);
240
241 if (!context) 206 if (!context)
242 return NULL; 207 return NULL;
243 208
244 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); 209 spin_lock_irqsave(&ds_lock, irq);
245 if (!context->ds) {
246 kfree(context);
247 return NULL;
248 }
249 210
250 *p_context = context; 211 if (*p_context) {
212 kfree(context);
251 213
252 context->this = p_context; 214 context = *p_context;
253 context->task = task; 215 } else {
216 *p_context = context;
254 217
255 if (task) 218 context->this = p_context;
256 set_tsk_thread_flag(task, TIF_DS_AREA_MSR); 219 context->task = task;
257 220
258 if (!task || (task == current)) 221 if (task)
259 wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); 222 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
260 223
261 get_tracer(task); 224 if (!task || (task == current))
225 wrmsrl(MSR_IA32_DS_AREA,
226 (unsigned long)context->ds);
227 }
228 spin_unlock_irqrestore(&ds_lock, irq);
262 } 229 }
263 230
264 context->count++; 231 context->count++;
@@ -266,16 +233,14 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
266 return context; 233 return context;
267} 234}
268 235
269/*
270 * Decreases the use count of the parameter context, if not NULL.
271 * Deallocates the context, if the use count reaches zero.
272 */
273static inline void ds_put_context(struct ds_context *context) 236static inline void ds_put_context(struct ds_context *context)
274{ 237{
238 unsigned long irq;
239
275 if (!context) 240 if (!context)
276 return; 241 return;
277 242
278 spin_lock(&ds_lock); 243 spin_lock_irqsave(&ds_lock, irq);
279 244
280 if (--context->count) 245 if (--context->count)
281 goto out; 246 goto out;
@@ -288,351 +253,351 @@ static inline void ds_put_context(struct ds_context *context)
288 if (!context->task || (context->task == current)) 253 if (!context->task || (context->task == current))
289 wrmsrl(MSR_IA32_DS_AREA, 0); 254 wrmsrl(MSR_IA32_DS_AREA, 0);
290 255
291 put_tracer(context->task);
292
293 /* free any leftover buffers from tracers that did not
294 * deallocate them properly. */
295 kfree(context->buffer[ds_bts]);
296 kfree(context->buffer[ds_pebs]);
297 kfree(context->ds);
298 kfree(context); 256 kfree(context);
299 out: 257 out:
300 spin_unlock(&ds_lock); 258 spin_unlock_irqrestore(&ds_lock, irq);
301} 259}
302 260
303 261
304/* 262/*
305 * Handle a buffer overflow 263 * Handle a buffer overflow
306 * 264 *
307 * task: the task whose buffers are overflowing;
308 * NULL for a buffer overflow on the current cpu
309 * context: the ds context 265 * context: the ds context
310 * qual: the buffer type 266 * qual: the buffer type
311 */ 267 */
312static void ds_overflow(struct task_struct *task, struct ds_context *context, 268static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
313 enum ds_qualifier qual) 269{
314{ 270 switch (qual) {
315 if (!context) 271 case ds_bts: {
316 return; 272 struct bts_tracer *tracer =
317 273 container_of(context->owner[qual],
318 if (context->callback[qual]) 274 struct bts_tracer, ds);
319 (*context->callback[qual])(task); 275 if (tracer->ovfl)
320 276 tracer->ovfl(tracer);
321 /* todo: do some more overflow handling */ 277 }
278 break;
279 case ds_pebs: {
280 struct pebs_tracer *tracer =
281 container_of(context->owner[qual],
282 struct pebs_tracer, ds);
283 if (tracer->ovfl)
284 tracer->ovfl(tracer);
285 }
286 break;
287 }
322} 288}
323 289
324 290
325/* 291static void ds_install_ds_config(struct ds_context *context,
326 * Allocate a non-pageable buffer of the parameter size. 292 enum ds_qualifier qual,
327 * Checks the memory and the locked memory rlimit. 293 void *base, size_t size, size_t ith)
328 *
329 * Returns the buffer, if successful;
330 * NULL, if out of memory or rlimit exceeded.
331 *
332 * size: the requested buffer size in bytes
333 * pages (out): if not NULL, contains the number of pages reserved
334 */
335static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
336{ 294{
337 unsigned long rlim, vm, pgsz; 295 unsigned long buffer, adj;
338 void *buffer;
339
340 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
341
342 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
343 vm = current->mm->total_vm + pgsz;
344 if (rlim < vm)
345 return NULL;
346 296
347 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 297 /* adjust the buffer address and size to meet alignment
348 vm = current->mm->locked_vm + pgsz; 298 * constraints:
349 if (rlim < vm) 299 * - buffer is double-word aligned
350 return NULL; 300 * - size is multiple of record size
301 *
302 * We checked the size at the very beginning; we have enough
303 * space to do the adjustment.
304 */
305 buffer = (unsigned long)base;
351 306
352 buffer = kzalloc(size, GFP_KERNEL); 307 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
353 if (!buffer) 308 buffer += adj;
354 return NULL; 309 size -= adj;
355 310
356 current->mm->total_vm += pgsz; 311 size /= ds_cfg.sizeof_rec[qual];
357 current->mm->locked_vm += pgsz; 312 size *= ds_cfg.sizeof_rec[qual];
358 313
359 if (pages) 314 ds_set(context->ds, qual, ds_buffer_base, buffer);
360 *pages = pgsz; 315 ds_set(context->ds, qual, ds_index, buffer);
316 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
361 317
362 return buffer; 318 /* The value for 'no threshold' is -1, which will set the
319 * threshold outside of the buffer, just like we want it.
320 */
321 ds_set(context->ds, qual,
322 ds_interrupt_threshold, buffer + size - ith);
363} 323}
364 324
365static int ds_request(struct task_struct *task, void *base, size_t size, 325static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
366 ds_ovfl_callback_t ovfl, enum ds_qualifier qual) 326 struct task_struct *task,
327 void *base, size_t size, size_t th)
367{ 328{
368 struct ds_context *context; 329 struct ds_context *context;
369 unsigned long buffer, adj; 330 unsigned long irq;
370 const unsigned long alignment = (1 << 3); 331 int error;
371 int error = 0;
372 332
333 error = -EOPNOTSUPP;
373 if (!ds_cfg.sizeof_ds) 334 if (!ds_cfg.sizeof_ds)
374 return -EOPNOTSUPP; 335 goto out;
375 336
376 /* we require some space to do alignment adjustments below */ 337 error = -EINVAL;
377 if (size < (alignment + ds_cfg.sizeof_rec[qual])) 338 if (!base)
378 return -EINVAL; 339 goto out;
379 340
380 /* buffer overflow notification is not yet implemented */ 341 /* we require some space to do alignment adjustments below */
381 if (ovfl) 342 error = -EINVAL;
382 return -EOPNOTSUPP; 343 if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
344 goto out;
383 345
346 if (th != (size_t)-1) {
347 th *= ds_cfg.sizeof_rec[qual];
384 348
385 spin_lock(&ds_lock); 349 error = -EINVAL;
350 if (size <= th)
351 goto out;
352 }
386 353
387 if (!check_tracer(task)) 354 tracer->buffer = base;
388 return -EPERM; 355 tracer->size = size;
389 356
390 error = -ENOMEM; 357 error = -ENOMEM;
391 context = ds_alloc_context(task); 358 context = ds_get_context(task);
392 if (!context) 359 if (!context)
393 goto out_unlock; 360 goto out;
394 361 tracer->context = context;
395 error = -EALREADY;
396 if (context->owner[qual] == current)
397 goto out_unlock;
398 error = -EPERM;
399 if (context->owner[qual] != NULL)
400 goto out_unlock;
401 context->owner[qual] = current;
402
403 spin_unlock(&ds_lock);
404 362
405 363
406 error = -ENOMEM; 364 spin_lock_irqsave(&ds_lock, irq);
407 if (!base) {
408 base = ds_allocate_buffer(size, &context->pages[qual]);
409 if (!base)
410 goto out_release;
411
412 context->buffer[qual] = base;
413 }
414 error = 0;
415 365
416 context->callback[qual] = ovfl; 366 error = -EPERM;
417 367 if (!check_tracer(task))
418 /* adjust the buffer address and size to meet alignment 368 goto out_unlock;
419 * constraints: 369 get_tracer(task);
420 * - buffer is double-word aligned
421 * - size is multiple of record size
422 *
423 * We checked the size at the very beginning; we have enough
424 * space to do the adjustment.
425 */
426 buffer = (unsigned long)base;
427 370
428 adj = ALIGN(buffer, alignment) - buffer; 371 error = -EPERM;
429 buffer += adj; 372 if (context->owner[qual])
430 size -= adj; 373 goto out_put_tracer;
374 context->owner[qual] = tracer;
431 375
432 size /= ds_cfg.sizeof_rec[qual]; 376 spin_unlock_irqrestore(&ds_lock, irq);
433 size *= ds_cfg.sizeof_rec[qual];
434 377
435 ds_set(context->ds, qual, ds_buffer_base, buffer);
436 ds_set(context->ds, qual, ds_index, buffer);
437 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
438 378
439 if (ovfl) { 379 ds_install_ds_config(context, qual, base, size, th);
440 /* todo: select a suitable interrupt threshold */
441 } else
442 ds_set(context->ds, qual,
443 ds_interrupt_threshold, buffer + size + 1);
444 380
445 /* we keep the context until ds_release */ 381 return 0;
446 return error;
447
448 out_release:
449 context->owner[qual] = NULL;
450 ds_put_context(context);
451 return error;
452 382
383 out_put_tracer:
384 put_tracer(task);
453 out_unlock: 385 out_unlock:
454 spin_unlock(&ds_lock); 386 spin_unlock_irqrestore(&ds_lock, irq);
455 ds_put_context(context); 387 ds_put_context(context);
388 tracer->context = NULL;
389 out:
456 return error; 390 return error;
457} 391}
458 392
459int ds_request_bts(struct task_struct *task, void *base, size_t size, 393struct bts_tracer *ds_request_bts(struct task_struct *task,
460 ds_ovfl_callback_t ovfl) 394 void *base, size_t size,
395 bts_ovfl_callback_t ovfl, size_t th)
461{ 396{
462 return ds_request(task, base, size, ovfl, ds_bts); 397 struct bts_tracer *tracer;
463} 398 int error;
464 399
465int ds_request_pebs(struct task_struct *task, void *base, size_t size, 400 /* buffer overflow notification is not yet implemented */
466 ds_ovfl_callback_t ovfl) 401 error = -EOPNOTSUPP;
467{ 402 if (ovfl)
468 return ds_request(task, base, size, ovfl, ds_pebs); 403 goto out;
404
405 error = -ENOMEM;
406 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
407 if (!tracer)
408 goto out;
409 tracer->ovfl = ovfl;
410
411 error = ds_request(&tracer->ds, ds_bts, task, base, size, th);
412 if (error < 0)
413 goto out_tracer;
414
415 return tracer;
416
417 out_tracer:
418 kfree(tracer);
419 out:
420 return ERR_PTR(error);
469} 421}
470 422
471static int ds_release(struct task_struct *task, enum ds_qualifier qual) 423struct pebs_tracer *ds_request_pebs(struct task_struct *task,
424 void *base, size_t size,
425 pebs_ovfl_callback_t ovfl, size_t th)
472{ 426{
473 struct ds_context *context; 427 struct pebs_tracer *tracer;
474 int error; 428 int error;
475 429
476 context = ds_get_context(task); 430 /* buffer overflow notification is not yet implemented */
477 error = ds_validate_access(context, qual); 431 error = -EOPNOTSUPP;
478 if (error < 0) 432 if (ovfl)
433 goto out;
434
435 error = -ENOMEM;
436 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
437 if (!tracer)
479 goto out; 438 goto out;
439 tracer->ovfl = ovfl;
480 440
481 kfree(context->buffer[qual]); 441 error = ds_request(&tracer->ds, ds_pebs, task, base, size, th);
482 context->buffer[qual] = NULL; 442 if (error < 0)
443 goto out_tracer;
483 444
484 current->mm->total_vm -= context->pages[qual]; 445 return tracer;
485 current->mm->locked_vm -= context->pages[qual];
486 context->pages[qual] = 0;
487 context->owner[qual] = NULL;
488 446
489 /* 447 out_tracer:
490 * we put the context twice: 448 kfree(tracer);
491 * once for the ds_get_context
492 * once for the corresponding ds_request
493 */
494 ds_put_context(context);
495 out: 449 out:
496 ds_put_context(context); 450 return ERR_PTR(error);
497 return error;
498} 451}
499 452
500int ds_release_bts(struct task_struct *task) 453static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual)
501{ 454{
502 return ds_release(task, ds_bts); 455 BUG_ON(tracer->context->owner[qual] != tracer);
456 tracer->context->owner[qual] = NULL;
457
458 put_tracer(tracer->context->task);
459 ds_put_context(tracer->context);
503} 460}
504 461
505int ds_release_pebs(struct task_struct *task) 462int ds_release_bts(struct bts_tracer *tracer)
506{ 463{
507 return ds_release(task, ds_pebs); 464 if (!tracer)
465 return -EINVAL;
466
467 ds_release(&tracer->ds, ds_bts);
468 kfree(tracer);
469
470 return 0;
508} 471}
509 472
510static int ds_get_index(struct task_struct *task, size_t *pos, 473int ds_release_pebs(struct pebs_tracer *tracer)
511 enum ds_qualifier qual)
512{ 474{
513 struct ds_context *context; 475 if (!tracer)
514 unsigned long base, index; 476 return -EINVAL;
515 int error;
516 477
517 context = ds_get_context(task); 478 ds_release(&tracer->ds, ds_pebs);
518 error = ds_validate_access(context, qual); 479 kfree(tracer);
519 if (error < 0) 480
520 goto out; 481 return 0;
482}
483
484static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual)
485{
486 unsigned long base, index;
521 487
522 base = ds_get(context->ds, qual, ds_buffer_base); 488 base = ds_get(context->ds, qual, ds_buffer_base);
523 index = ds_get(context->ds, qual, ds_index); 489 index = ds_get(context->ds, qual, ds_index);
524 490
525 error = ((index - base) / ds_cfg.sizeof_rec[qual]); 491 return (index - base) / ds_cfg.sizeof_rec[qual];
526 if (pos)
527 *pos = error;
528 out:
529 ds_put_context(context);
530 return error;
531} 492}
532 493
533int ds_get_bts_index(struct task_struct *task, size_t *pos) 494int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos)
534{ 495{
535 return ds_get_index(task, pos, ds_bts); 496 if (!tracer)
497 return -EINVAL;
498
499 if (!pos)
500 return -EINVAL;
501
502 *pos = ds_get_index(tracer->ds.context, ds_bts);
503
504 return 0;
536} 505}
537 506
538int ds_get_pebs_index(struct task_struct *task, size_t *pos) 507int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos)
539{ 508{
540 return ds_get_index(task, pos, ds_pebs); 509 if (!tracer)
510 return -EINVAL;
511
512 if (!pos)
513 return -EINVAL;
514
515 *pos = ds_get_index(tracer->ds.context, ds_pebs);
516
517 return 0;
541} 518}
542 519
543static int ds_get_end(struct task_struct *task, size_t *pos, 520static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual)
544 enum ds_qualifier qual)
545{ 521{
546 struct ds_context *context; 522 unsigned long base, max;
547 unsigned long base, end;
548 int error;
549
550 context = ds_get_context(task);
551 error = ds_validate_access(context, qual);
552 if (error < 0)
553 goto out;
554 523
555 base = ds_get(context->ds, qual, ds_buffer_base); 524 base = ds_get(context->ds, qual, ds_buffer_base);
556 end = ds_get(context->ds, qual, ds_absolute_maximum); 525 max = ds_get(context->ds, qual, ds_absolute_maximum);
557 526
558 error = ((end - base) / ds_cfg.sizeof_rec[qual]); 527 return (max - base) / ds_cfg.sizeof_rec[qual];
559 if (pos)
560 *pos = error;
561 out:
562 ds_put_context(context);
563 return error;
564} 528}
565 529
566int ds_get_bts_end(struct task_struct *task, size_t *pos) 530int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos)
567{ 531{
568 return ds_get_end(task, pos, ds_bts); 532 if (!tracer)
533 return -EINVAL;
534
535 if (!pos)
536 return -EINVAL;
537
538 *pos = ds_get_end(tracer->ds.context, ds_bts);
539
540 return 0;
569} 541}
570 542
571int ds_get_pebs_end(struct task_struct *task, size_t *pos) 543int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos)
572{ 544{
573 return ds_get_end(task, pos, ds_pebs); 545 if (!tracer)
546 return -EINVAL;
547
548 if (!pos)
549 return -EINVAL;
550
551 *pos = ds_get_end(tracer->ds.context, ds_pebs);
552
553 return 0;
574} 554}
575 555
576static int ds_access(struct task_struct *task, size_t index, 556static int ds_access(struct ds_context *context, enum ds_qualifier qual,
577 const void **record, enum ds_qualifier qual) 557 size_t index, const void **record)
578{ 558{
579 struct ds_context *context;
580 unsigned long base, idx; 559 unsigned long base, idx;
581 int error;
582 560
583 if (!record) 561 if (!record)
584 return -EINVAL; 562 return -EINVAL;
585 563
586 context = ds_get_context(task);
587 error = ds_validate_access(context, qual);
588 if (error < 0)
589 goto out;
590
591 base = ds_get(context->ds, qual, ds_buffer_base); 564 base = ds_get(context->ds, qual, ds_buffer_base);
592 idx = base + (index * ds_cfg.sizeof_rec[qual]); 565 idx = base + (index * ds_cfg.sizeof_rec[qual]);
593 566
594 error = -EINVAL;
595 if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) 567 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
596 goto out; 568 return -EINVAL;
597 569
598 *record = (const void *)idx; 570 *record = (const void *)idx;
599 error = ds_cfg.sizeof_rec[qual]; 571
600 out: 572 return ds_cfg.sizeof_rec[qual];
601 ds_put_context(context);
602 return error;
603} 573}
604 574
605int ds_access_bts(struct task_struct *task, size_t index, const void **record) 575int ds_access_bts(struct bts_tracer *tracer, size_t index,
576 const void **record)
606{ 577{
607 return ds_access(task, index, record, ds_bts); 578 if (!tracer)
579 return -EINVAL;
580
581 return ds_access(tracer->ds.context, ds_bts, index, record);
608} 582}
609 583
610int ds_access_pebs(struct task_struct *task, size_t index, const void **record) 584int ds_access_pebs(struct pebs_tracer *tracer, size_t index,
585 const void **record)
611{ 586{
612 return ds_access(task, index, record, ds_pebs); 587 if (!tracer)
588 return -EINVAL;
589
590 return ds_access(tracer->ds.context, ds_pebs, index, record);
613} 591}
614 592
615static int ds_write(struct task_struct *task, const void *record, size_t size, 593static int ds_write(struct ds_context *context, enum ds_qualifier qual,
616 enum ds_qualifier qual, int force) 594 const void *record, size_t size)
617{ 595{
618 struct ds_context *context; 596 int bytes_written = 0;
619 int error;
620 597
621 if (!record) 598 if (!record)
622 return -EINVAL; 599 return -EINVAL;
623 600
624 error = -EPERM;
625 context = ds_get_context(task);
626 if (!context)
627 goto out;
628
629 if (!force) {
630 error = ds_validate_access(context, qual);
631 if (error < 0)
632 goto out;
633 }
634
635 error = 0;
636 while (size) { 601 while (size) {
637 unsigned long base, index, end, write_end, int_th; 602 unsigned long base, index, end, write_end, int_th;
638 unsigned long write_size, adj_write_size; 603 unsigned long write_size, adj_write_size;
@@ -660,14 +625,14 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
660 write_end = end; 625 write_end = end;
661 626
662 if (write_end <= index) 627 if (write_end <= index)
663 goto out; 628 break;
664 629
665 write_size = min((unsigned long) size, write_end - index); 630 write_size = min((unsigned long) size, write_end - index);
666 memcpy((void *)index, record, write_size); 631 memcpy((void *)index, record, write_size);
667 632
668 record = (const char *)record + write_size; 633 record = (const char *)record + write_size;
669 size -= write_size; 634 size -= write_size;
670 error += write_size; 635 bytes_written += write_size;
671 636
672 adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; 637 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
673 adj_write_size *= ds_cfg.sizeof_rec[qual]; 638 adj_write_size *= ds_cfg.sizeof_rec[qual];
@@ -682,47 +647,32 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
682 ds_set(context->ds, qual, ds_index, index); 647 ds_set(context->ds, qual, ds_index, index);
683 648
684 if (index >= int_th) 649 if (index >= int_th)
685 ds_overflow(task, context, qual); 650 ds_overflow(context, qual);
686 } 651 }
687 652
688 out: 653 return bytes_written;
689 ds_put_context(context);
690 return error;
691} 654}
692 655
693int ds_write_bts(struct task_struct *task, const void *record, size_t size) 656int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size)
694{ 657{
695 return ds_write(task, record, size, ds_bts, /* force = */ 0); 658 if (!tracer)
696} 659 return -EINVAL;
697 660
698int ds_write_pebs(struct task_struct *task, const void *record, size_t size) 661 return ds_write(tracer->ds.context, ds_bts, record, size);
699{
700 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
701} 662}
702 663
703int ds_unchecked_write_bts(struct task_struct *task, 664int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size)
704 const void *record, size_t size)
705{ 665{
706 return ds_write(task, record, size, ds_bts, /* force = */ 1); 666 if (!tracer)
707} 667 return -EINVAL;
708 668
709int ds_unchecked_write_pebs(struct task_struct *task, 669 return ds_write(tracer->ds.context, ds_pebs, record, size);
710 const void *record, size_t size)
711{
712 return ds_write(task, record, size, ds_pebs, /* force = */ 1);
713} 670}
714 671
715static int ds_reset_or_clear(struct task_struct *task, 672static void ds_reset_or_clear(struct ds_context *context,
716 enum ds_qualifier qual, int clear) 673 enum ds_qualifier qual, int clear)
717{ 674{
718 struct ds_context *context;
719 unsigned long base, end; 675 unsigned long base, end;
720 int error;
721
722 context = ds_get_context(task);
723 error = ds_validate_access(context, qual);
724 if (error < 0)
725 goto out;
726 676
727 base = ds_get(context->ds, qual, ds_buffer_base); 677 base = ds_get(context->ds, qual, ds_buffer_base);
728 end = ds_get(context->ds, qual, ds_absolute_maximum); 678 end = ds_get(context->ds, qual, ds_absolute_maximum);
@@ -731,89 +681,100 @@ static int ds_reset_or_clear(struct task_struct *task,
731 memset((void *)base, 0, end - base); 681 memset((void *)base, 0, end - base);
732 682
733 ds_set(context->ds, qual, ds_index, base); 683 ds_set(context->ds, qual, ds_index, base);
734
735 error = 0;
736 out:
737 ds_put_context(context);
738 return error;
739} 684}
740 685
741int ds_reset_bts(struct task_struct *task) 686int ds_reset_bts(struct bts_tracer *tracer)
742{ 687{
743 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); 688 if (!tracer)
689 return -EINVAL;
690
691 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0);
692
693 return 0;
744} 694}
745 695
746int ds_reset_pebs(struct task_struct *task) 696int ds_reset_pebs(struct pebs_tracer *tracer)
747{ 697{
748 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); 698 if (!tracer)
699 return -EINVAL;
700
701 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0);
702
703 return 0;
749} 704}
750 705
751int ds_clear_bts(struct task_struct *task) 706int ds_clear_bts(struct bts_tracer *tracer)
752{ 707{
753 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); 708 if (!tracer)
709 return -EINVAL;
710
711 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1);
712
713 return 0;
754} 714}
755 715
756int ds_clear_pebs(struct task_struct *task) 716int ds_clear_pebs(struct pebs_tracer *tracer)
757{ 717{
758 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); 718 if (!tracer)
719 return -EINVAL;
720
721 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1);
722
723 return 0;
759} 724}
760 725
761int ds_get_pebs_reset(struct task_struct *task, u64 *value) 726int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value)
762{ 727{
763 struct ds_context *context; 728 if (!tracer)
764 int error; 729 return -EINVAL;
765 730
766 if (!value) 731 if (!value)
767 return -EINVAL; 732 return -EINVAL;
768 733
769 context = ds_get_context(task); 734 *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
770 error = ds_validate_access(context, ds_pebs);
771 if (error < 0)
772 goto out;
773 735
774 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); 736 return 0;
775
776 error = 0;
777 out:
778 ds_put_context(context);
779 return error;
780} 737}
781 738
782int ds_set_pebs_reset(struct task_struct *task, u64 value) 739int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
783{ 740{
784 struct ds_context *context; 741 if (!tracer)
785 int error; 742 return -EINVAL;
786
787 context = ds_get_context(task);
788 error = ds_validate_access(context, ds_pebs);
789 if (error < 0)
790 goto out;
791 743
792 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; 744 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
793 745
794 error = 0; 746 return 0;
795 out:
796 ds_put_context(context);
797 return error;
798} 747}
799 748
800static const struct ds_configuration ds_cfg_var = { 749static const struct ds_configuration ds_cfg_var = {
801 .sizeof_ds = sizeof(long) * 12, 750 .sizeof_ds = sizeof(long) * 12,
802 .sizeof_field = sizeof(long), 751 .sizeof_field = sizeof(long),
803 .sizeof_rec[ds_bts] = sizeof(long) * 3, 752 .sizeof_rec[ds_bts] = sizeof(long) * 3,
753#ifdef __i386__
804 .sizeof_rec[ds_pebs] = sizeof(long) * 10 754 .sizeof_rec[ds_pebs] = sizeof(long) * 10
755#else
756 .sizeof_rec[ds_pebs] = sizeof(long) * 18
757#endif
805}; 758};
806static const struct ds_configuration ds_cfg_64 = { 759static const struct ds_configuration ds_cfg_64 = {
807 .sizeof_ds = 8 * 12, 760 .sizeof_ds = 8 * 12,
808 .sizeof_field = 8, 761 .sizeof_field = 8,
809 .sizeof_rec[ds_bts] = 8 * 3, 762 .sizeof_rec[ds_bts] = 8 * 3,
763#ifdef __i386__
810 .sizeof_rec[ds_pebs] = 8 * 10 764 .sizeof_rec[ds_pebs] = 8 * 10
765#else
766 .sizeof_rec[ds_pebs] = 8 * 18
767#endif
811}; 768};
812 769
813static inline void 770static inline void
814ds_configure(const struct ds_configuration *cfg) 771ds_configure(const struct ds_configuration *cfg)
815{ 772{
816 ds_cfg = *cfg; 773 ds_cfg = *cfg;
774
775 printk(KERN_INFO "DS available\n");
776
777 BUG_ON(MAX_SIZEOF_DS < ds_cfg.sizeof_ds);
817} 778}
818 779
819void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) 780void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -821,17 +782,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
821 switch (c->x86) { 782 switch (c->x86) {
822 case 0x6: 783 case 0x6:
823 switch (c->x86_model) { 784 switch (c->x86_model) {
785 case 0 ... 0xC:
786 /* sorry, don't know about them */
787 break;
824 case 0xD: 788 case 0xD:
825 case 0xE: /* Pentium M */ 789 case 0xE: /* Pentium M */
826 ds_configure(&ds_cfg_var); 790 ds_configure(&ds_cfg_var);
827 break; 791 break;
828 case 0xF: /* Core2 */ 792 default: /* Core2, Atom, ... */
829 case 0x1C: /* Atom */
830 ds_configure(&ds_cfg_64); 793 ds_configure(&ds_cfg_64);
831 break; 794 break;
832 default:
833 /* sorry, don't know about them */
834 break;
835 } 795 }
836 break; 796 break;
837 case 0xF: 797 case 0xF:
@@ -858,7 +818,8 @@ void ds_free(struct ds_context *context)
858 * is dying. There should not be any user of that context left 818 * is dying. There should not be any user of that context left
859 * to disturb us, anymore. */ 819 * to disturb us, anymore. */
860 unsigned long leftovers = context->count; 820 unsigned long leftovers = context->count;
861 while (leftovers--) 821 while (leftovers--) {
822 put_tracer(context->task);
862 ds_put_context(context); 823 ds_put_context(context);
824 }
863} 825}
864#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
new file mode 100644
index 000000000000..6b1f6f6f8661
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.c
@@ -0,0 +1,351 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5#include <linux/kallsyms.h>
6#include <linux/kprobes.h>
7#include <linux/uaccess.h>
8#include <linux/utsname.h>
9#include <linux/hardirq.h>
10#include <linux/kdebug.h>
11#include <linux/module.h>
12#include <linux/ptrace.h>
13#include <linux/kexec.h>
14#include <linux/bug.h>
15#include <linux/nmi.h>
16#include <linux/sysfs.h>
17
18#include <asm/stacktrace.h>
19
20#include "dumpstack.h"
21
22int panic_on_unrecovered_nmi;
23unsigned int code_bytes = 64;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static int die_counter;
26
27void printk_address(unsigned long address, int reliable)
28{
29 printk(" [<%p>] %s%pS\n", (void *) address,
30 reliable ? "" : "? ", (void *) address);
31}
32
33#ifdef CONFIG_FUNCTION_GRAPH_TRACER
34static void
35print_ftrace_graph_addr(unsigned long addr, void *data,
36 const struct stacktrace_ops *ops,
37 struct thread_info *tinfo, int *graph)
38{
39 struct task_struct *task = tinfo->task;
40 unsigned long ret_addr;
41 int index = task->curr_ret_stack;
42
43 if (addr != (unsigned long)return_to_handler)
44 return;
45
46 if (!task->ret_stack || index < *graph)
47 return;
48
49 index -= *graph;
50 ret_addr = task->ret_stack[index].ret;
51
52 ops->address(data, ret_addr, 1);
53
54 (*graph)++;
55}
56#else
57static inline void
58print_ftrace_graph_addr(unsigned long addr, void *data,
59 const struct stacktrace_ops *ops,
60 struct thread_info *tinfo, int *graph)
61{ }
62#endif
63
64/*
65 * x86-64 can have up to three kernel stacks:
66 * process stack
67 * interrupt stack
68 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
69 */
70
71static inline int valid_stack_ptr(struct thread_info *tinfo,
72 void *p, unsigned int size, void *end)
73{
74 void *t = tinfo;
75 if (end) {
76 if (p < end && p >= (end-THREAD_SIZE))
77 return 1;
78 else
79 return 0;
80 }
81 return p > t && p < t + THREAD_SIZE - size;
82}
83
84unsigned long
85print_context_stack(struct thread_info *tinfo,
86 unsigned long *stack, unsigned long bp,
87 const struct stacktrace_ops *ops, void *data,
88 unsigned long *end, int *graph)
89{
90 struct stack_frame *frame = (struct stack_frame *)bp;
91
92 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
93 unsigned long addr;
94
95 addr = *stack;
96 if (__kernel_text_address(addr)) {
97 if ((unsigned long) stack == bp + sizeof(long)) {
98 ops->address(data, addr, 1);
99 frame = frame->next_frame;
100 bp = (unsigned long) frame;
101 } else {
102 ops->address(data, addr, bp == 0);
103 }
104 print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
105 }
106 stack++;
107 }
108 return bp;
109}
110
111
112static void
113print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
114{
115 printk(data);
116 print_symbol(msg, symbol);
117 printk("\n");
118}
119
120static void print_trace_warning(void *data, char *msg)
121{
122 printk("%s%s\n", (char *)data, msg);
123}
124
125static int print_trace_stack(void *data, char *name)
126{
127 printk("%s <%s> ", (char *)data, name);
128 return 0;
129}
130
131/*
132 * Print one address/symbol entries per line.
133 */
134static void print_trace_address(void *data, unsigned long addr, int reliable)
135{
136 touch_nmi_watchdog();
137 printk(data);
138 printk_address(addr, reliable);
139}
140
141static const struct stacktrace_ops print_trace_ops = {
142 .warning = print_trace_warning,
143 .warning_symbol = print_trace_warning_symbol,
144 .stack = print_trace_stack,
145 .address = print_trace_address,
146};
147
148void
149show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
150 unsigned long *stack, unsigned long bp, char *log_lvl)
151{
152 printk("%sCall Trace:\n", log_lvl);
153 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
154}
155
156void show_trace(struct task_struct *task, struct pt_regs *regs,
157 unsigned long *stack, unsigned long bp)
158{
159 show_trace_log_lvl(task, regs, stack, bp, "");
160}
161
162void show_stack(struct task_struct *task, unsigned long *sp)
163{
164 show_stack_log_lvl(task, NULL, sp, 0, "");
165}
166
167/*
168 * The architecture-independent dump_stack generator
169 */
170void dump_stack(void)
171{
172 unsigned long bp = 0;
173 unsigned long stack;
174
175#ifdef CONFIG_FRAME_POINTER
176 if (!bp)
177 get_bp(bp);
178#endif
179
180 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
181 current->pid, current->comm, print_tainted(),
182 init_utsname()->release,
183 (int)strcspn(init_utsname()->version, " "),
184 init_utsname()->version);
185 show_trace(NULL, NULL, &stack, bp);
186}
187EXPORT_SYMBOL(dump_stack);
188
189static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
190static int die_owner = -1;
191static unsigned int die_nest_count;
192
193unsigned __kprobes long oops_begin(void)
194{
195 int cpu;
196 unsigned long flags;
197
198 oops_enter();
199
200 /* racy, but better than risking deadlock. */
201 raw_local_irq_save(flags);
202 cpu = smp_processor_id();
203 if (!__raw_spin_trylock(&die_lock)) {
204 if (cpu == die_owner)
205 /* nested oops. should stop eventually */;
206 else
207 __raw_spin_lock(&die_lock);
208 }
209 die_nest_count++;
210 die_owner = cpu;
211 console_verbose();
212 bust_spinlocks(1);
213 return flags;
214}
215
216void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
217{
218 if (regs && kexec_should_crash(current))
219 crash_kexec(regs);
220
221 bust_spinlocks(0);
222 die_owner = -1;
223 add_taint(TAINT_DIE);
224 die_nest_count--;
225 if (!die_nest_count)
226 /* Nest count reaches zero, release the lock. */
227 __raw_spin_unlock(&die_lock);
228 raw_local_irq_restore(flags);
229 oops_exit();
230
231 if (!signr)
232 return;
233 if (in_interrupt())
234 panic("Fatal exception in interrupt");
235 if (panic_on_oops)
236 panic("Fatal exception");
237 do_exit(signr);
238}
239
240int __kprobes __die(const char *str, struct pt_regs *regs, long err)
241{
242#ifdef CONFIG_X86_32
243 unsigned short ss;
244 unsigned long sp;
245#endif
246 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
247#ifdef CONFIG_PREEMPT
248 printk("PREEMPT ");
249#endif
250#ifdef CONFIG_SMP
251 printk("SMP ");
252#endif
253#ifdef CONFIG_DEBUG_PAGEALLOC
254 printk("DEBUG_PAGEALLOC");
255#endif
256 printk("\n");
257 sysfs_printk_last_file();
258 if (notify_die(DIE_OOPS, str, regs, err,
259 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
260 return 1;
261
262 show_registers(regs);
263#ifdef CONFIG_X86_32
264 sp = (unsigned long) (&regs->sp);
265 savesegment(ss, ss);
266 if (user_mode(regs)) {
267 sp = regs->sp;
268 ss = regs->ss & 0xffff;
269 }
270 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
271 print_symbol("%s", regs->ip);
272 printk(" SS:ESP %04x:%08lx\n", ss, sp);
273#else
274 /* Executive summary in case the oops scrolled away */
275 printk(KERN_ALERT "RIP ");
276 printk_address(regs->ip, 1);
277 printk(" RSP <%016lx>\n", regs->sp);
278#endif
279 return 0;
280}
281
282/*
283 * This is gone through when something in the kernel has done something bad
284 * and is about to be terminated:
285 */
286void die(const char *str, struct pt_regs *regs, long err)
287{
288 unsigned long flags = oops_begin();
289 int sig = SIGSEGV;
290
291 if (!user_mode_vm(regs))
292 report_bug(regs->ip, regs);
293
294 if (__die(str, regs, err))
295 sig = 0;
296 oops_end(flags, regs, sig);
297}
298
299void notrace __kprobes
300die_nmi(char *str, struct pt_regs *regs, int do_panic)
301{
302 unsigned long flags;
303
304 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
305 return;
306
307 /*
308 * We are in trouble anyway, lets at least try
309 * to get a message out.
310 */
311 flags = oops_begin();
312 printk(KERN_EMERG "%s", str);
313 printk(" on CPU%d, ip %08lx, registers:\n",
314 smp_processor_id(), regs->ip);
315 show_registers(regs);
316 oops_end(flags, regs, 0);
317 if (do_panic || panic_on_oops)
318 panic("Non maskable interrupt");
319 nmi_exit();
320 local_irq_enable();
321 do_exit(SIGBUS);
322}
323
324static int __init oops_setup(char *s)
325{
326 if (!s)
327 return -EINVAL;
328 if (!strcmp(s, "panic"))
329 panic_on_oops = 1;
330 return 0;
331}
332early_param("oops", oops_setup);
333
334static int __init kstack_setup(char *s)
335{
336 if (!s)
337 return -EINVAL;
338 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
339 return 0;
340}
341early_param("kstack", kstack_setup);
342
343static int __init code_bytes_setup(char *s)
344{
345 code_bytes = simple_strtoul(s, NULL, 0);
346 if (code_bytes > 8192)
347 code_bytes = 8192;
348
349 return 1;
350}
351__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
new file mode 100644
index 000000000000..da87590b8698
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
6#ifndef DUMPSTACK_H
7#define DUMPSTACK_H
8
9#ifdef CONFIG_X86_32
10#define STACKSLOTS_PER_LINE 8
11#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
12#else
13#define STACKSLOTS_PER_LINE 4
14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
15#endif
16
17extern unsigned long
18print_context_stack(struct thread_info *tinfo,
19 unsigned long *stack, unsigned long bp,
20 const struct stacktrace_ops *ops, void *data,
21 unsigned long *end, int *graph);
22
23extern void
24show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
25 unsigned long *stack, unsigned long bp, char *log_lvl);
26
27extern void
28show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
29 unsigned long *sp, unsigned long bp, char *log_lvl);
30
31extern unsigned int code_bytes;
32extern int kstack_depth_to_print;
33
34/* The form of the top of the frame on the stack */
35struct stack_frame {
36 struct stack_frame *next_frame;
37 unsigned long return_address;
38};
39#endif
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index b3614752197b..d593cd1f58dc 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,69 +17,14 @@
17 17
18#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
19 19
20#define STACKSLOTS_PER_LINE 8 20#include "dumpstack.h"
21#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
22
23int panic_on_unrecovered_nmi;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static unsigned int code_bytes = 64;
26static int die_counter;
27
28void printk_address(unsigned long address, int reliable)
29{
30 printk(" [<%p>] %s%pS\n", (void *) address,
31 reliable ? "" : "? ", (void *) address);
32}
33
34static inline int valid_stack_ptr(struct thread_info *tinfo,
35 void *p, unsigned int size, void *end)
36{
37 void *t = tinfo;
38 if (end) {
39 if (p < end && p >= (end-THREAD_SIZE))
40 return 1;
41 else
42 return 0;
43 }
44 return p > t && p < t + THREAD_SIZE - size;
45}
46
47/* The form of the top of the frame on the stack */
48struct stack_frame {
49 struct stack_frame *next_frame;
50 unsigned long return_address;
51};
52
53static inline unsigned long
54print_context_stack(struct thread_info *tinfo,
55 unsigned long *stack, unsigned long bp,
56 const struct stacktrace_ops *ops, void *data,
57 unsigned long *end)
58{
59 struct stack_frame *frame = (struct stack_frame *)bp;
60
61 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
62 unsigned long addr;
63
64 addr = *stack;
65 if (__kernel_text_address(addr)) {
66 if ((unsigned long) stack == bp + sizeof(long)) {
67 ops->address(data, addr, 1);
68 frame = frame->next_frame;
69 bp = (unsigned long) frame;
70 } else {
71 ops->address(data, addr, bp == 0);
72 }
73 }
74 stack++;
75 }
76 return bp;
77}
78 21
79void dump_trace(struct task_struct *task, struct pt_regs *regs, 22void dump_trace(struct task_struct *task, struct pt_regs *regs,
80 unsigned long *stack, unsigned long bp, 23 unsigned long *stack, unsigned long bp,
81 const struct stacktrace_ops *ops, void *data) 24 const struct stacktrace_ops *ops, void *data)
82{ 25{
26 int graph = 0;
27
83 if (!task) 28 if (!task)
84 task = current; 29 task = current;
85 30
@@ -107,7 +52,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
107 52
108 context = (struct thread_info *) 53 context = (struct thread_info *)
109 ((unsigned long)stack & (~(THREAD_SIZE - 1))); 54 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
110 bp = print_context_stack(context, stack, bp, ops, data, NULL); 55 bp = print_context_stack(context, stack, bp, ops,
56 data, NULL, &graph);
111 57
112 stack = (unsigned long *)context->previous_esp; 58 stack = (unsigned long *)context->previous_esp;
113 if (!stack) 59 if (!stack)
@@ -119,57 +65,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
119} 65}
120EXPORT_SYMBOL(dump_trace); 66EXPORT_SYMBOL(dump_trace);
121 67
122static void 68void
123print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
124{
125 printk(data);
126 print_symbol(msg, symbol);
127 printk("\n");
128}
129
130static void print_trace_warning(void *data, char *msg)
131{
132 printk("%s%s\n", (char *)data, msg);
133}
134
135static int print_trace_stack(void *data, char *name)
136{
137 printk("%s <%s> ", (char *)data, name);
138 return 0;
139}
140
141/*
142 * Print one address/symbol entries per line.
143 */
144static void print_trace_address(void *data, unsigned long addr, int reliable)
145{
146 touch_nmi_watchdog();
147 printk(data);
148 printk_address(addr, reliable);
149}
150
151static const struct stacktrace_ops print_trace_ops = {
152 .warning = print_trace_warning,
153 .warning_symbol = print_trace_warning_symbol,
154 .stack = print_trace_stack,
155 .address = print_trace_address,
156};
157
158static void
159show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
160 unsigned long *stack, unsigned long bp, char *log_lvl)
161{
162 printk("%sCall Trace:\n", log_lvl);
163 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
164}
165
166void show_trace(struct task_struct *task, struct pt_regs *regs,
167 unsigned long *stack, unsigned long bp)
168{
169 show_trace_log_lvl(task, regs, stack, bp, "");
170}
171
172static void
173show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 69show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
174 unsigned long *sp, unsigned long bp, char *log_lvl) 70 unsigned long *sp, unsigned long bp, char *log_lvl)
175{ 71{
@@ -196,33 +92,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
196 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 92 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
197} 93}
198 94
199void show_stack(struct task_struct *task, unsigned long *sp)
200{
201 show_stack_log_lvl(task, NULL, sp, 0, "");
202}
203
204/*
205 * The architecture-independent dump_stack generator
206 */
207void dump_stack(void)
208{
209 unsigned long bp = 0;
210 unsigned long stack;
211
212#ifdef CONFIG_FRAME_POINTER
213 if (!bp)
214 get_bp(bp);
215#endif
216
217 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
218 current->pid, current->comm, print_tainted(),
219 init_utsname()->release,
220 (int)strcspn(init_utsname()->version, " "),
221 init_utsname()->version);
222 show_trace(NULL, NULL, &stack, bp);
223}
224
225EXPORT_SYMBOL(dump_stack);
226 95
227void show_registers(struct pt_regs *regs) 96void show_registers(struct pt_regs *regs)
228{ 97{
@@ -283,167 +152,3 @@ int is_valid_bugaddr(unsigned long ip)
283 return ud2 == 0x0b0f; 152 return ud2 == 0x0b0f;
284} 153}
285 154
286static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
287static int die_owner = -1;
288static unsigned int die_nest_count;
289
290unsigned __kprobes long oops_begin(void)
291{
292 unsigned long flags;
293
294 oops_enter();
295
296 if (die_owner != raw_smp_processor_id()) {
297 console_verbose();
298 raw_local_irq_save(flags);
299 __raw_spin_lock(&die_lock);
300 die_owner = smp_processor_id();
301 die_nest_count = 0;
302 bust_spinlocks(1);
303 } else {
304 raw_local_irq_save(flags);
305 }
306 die_nest_count++;
307 return flags;
308}
309
310void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
311{
312 bust_spinlocks(0);
313 die_owner = -1;
314 add_taint(TAINT_DIE);
315 __raw_spin_unlock(&die_lock);
316 raw_local_irq_restore(flags);
317
318 if (!regs)
319 return;
320
321 if (kexec_should_crash(current))
322 crash_kexec(regs);
323 if (in_interrupt())
324 panic("Fatal exception in interrupt");
325 if (panic_on_oops)
326 panic("Fatal exception");
327 oops_exit();
328 do_exit(signr);
329}
330
331int __kprobes __die(const char *str, struct pt_regs *regs, long err)
332{
333 unsigned short ss;
334 unsigned long sp;
335
336 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
337#ifdef CONFIG_PREEMPT
338 printk("PREEMPT ");
339#endif
340#ifdef CONFIG_SMP
341 printk("SMP ");
342#endif
343#ifdef CONFIG_DEBUG_PAGEALLOC
344 printk("DEBUG_PAGEALLOC");
345#endif
346 printk("\n");
347 sysfs_printk_last_file();
348 if (notify_die(DIE_OOPS, str, regs, err,
349 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
350 return 1;
351
352 show_registers(regs);
353 /* Executive summary in case the oops scrolled away */
354 sp = (unsigned long) (&regs->sp);
355 savesegment(ss, ss);
356 if (user_mode(regs)) {
357 sp = regs->sp;
358 ss = regs->ss & 0xffff;
359 }
360 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
361 print_symbol("%s", regs->ip);
362 printk(" SS:ESP %04x:%08lx\n", ss, sp);
363 return 0;
364}
365
366/*
367 * This is gone through when something in the kernel has done something bad
368 * and is about to be terminated:
369 */
370void die(const char *str, struct pt_regs *regs, long err)
371{
372 unsigned long flags = oops_begin();
373
374 if (die_nest_count < 3) {
375 report_bug(regs->ip, regs);
376
377 if (__die(str, regs, err))
378 regs = NULL;
379 } else {
380 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
381 }
382
383 oops_end(flags, regs, SIGSEGV);
384}
385
386static DEFINE_SPINLOCK(nmi_print_lock);
387
388void notrace __kprobes
389die_nmi(char *str, struct pt_regs *regs, int do_panic)
390{
391 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
392 return;
393
394 spin_lock(&nmi_print_lock);
395 /*
396 * We are in trouble anyway, lets at least try
397 * to get a message out:
398 */
399 bust_spinlocks(1);
400 printk(KERN_EMERG "%s", str);
401 printk(" on CPU%d, ip %08lx, registers:\n",
402 smp_processor_id(), regs->ip);
403 show_registers(regs);
404 if (do_panic)
405 panic("Non maskable interrupt");
406 console_silent();
407 spin_unlock(&nmi_print_lock);
408
409 /*
410 * If we are in kernel we are probably nested up pretty bad
411 * and might aswell get out now while we still can:
412 */
413 if (!user_mode_vm(regs)) {
414 current->thread.trap_no = 2;
415 crash_kexec(regs);
416 }
417
418 bust_spinlocks(0);
419 do_exit(SIGSEGV);
420}
421
422static int __init oops_setup(char *s)
423{
424 if (!s)
425 return -EINVAL;
426 if (!strcmp(s, "panic"))
427 panic_on_oops = 1;
428 return 0;
429}
430early_param("oops", oops_setup);
431
432static int __init kstack_setup(char *s)
433{
434 if (!s)
435 return -EINVAL;
436 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
437 return 0;
438}
439early_param("kstack", kstack_setup);
440
441static int __init code_bytes_setup(char *s)
442{
443 code_bytes = simple_strtoul(s, NULL, 0);
444 if (code_bytes > 8192)
445 code_bytes = 8192;
446
447 return 1;
448}
449__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 96a5db7da8a7..c302d0707048 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -17,19 +17,7 @@
17 17
18#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
19 19
20#define STACKSLOTS_PER_LINE 4 20#include "dumpstack.h"
21#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
22
23int panic_on_unrecovered_nmi;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static unsigned int code_bytes = 64;
26static int die_counter;
27
28void printk_address(unsigned long address, int reliable)
29{
30 printk(" [<%p>] %s%pS\n", (void *) address,
31 reliable ? "" : "? ", (void *) address);
32}
33 21
34static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 22static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
35 unsigned *usedp, char **idp) 23 unsigned *usedp, char **idp)
@@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
113 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 101 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
114 */ 102 */
115 103
116static inline int valid_stack_ptr(struct thread_info *tinfo,
117 void *p, unsigned int size, void *end)
118{
119 void *t = tinfo;
120 if (end) {
121 if (p < end && p >= (end-THREAD_SIZE))
122 return 1;
123 else
124 return 0;
125 }
126 return p > t && p < t + THREAD_SIZE - size;
127}
128
129/* The form of the top of the frame on the stack */
130struct stack_frame {
131 struct stack_frame *next_frame;
132 unsigned long return_address;
133};
134
135static inline unsigned long
136print_context_stack(struct thread_info *tinfo,
137 unsigned long *stack, unsigned long bp,
138 const struct stacktrace_ops *ops, void *data,
139 unsigned long *end)
140{
141 struct stack_frame *frame = (struct stack_frame *)bp;
142
143 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
144 unsigned long addr;
145
146 addr = *stack;
147 if (__kernel_text_address(addr)) {
148 if ((unsigned long) stack == bp + sizeof(long)) {
149 ops->address(data, addr, 1);
150 frame = frame->next_frame;
151 bp = (unsigned long) frame;
152 } else {
153 ops->address(data, addr, bp == 0);
154 }
155 }
156 stack++;
157 }
158 return bp;
159}
160
161void dump_trace(struct task_struct *task, struct pt_regs *regs, 104void dump_trace(struct task_struct *task, struct pt_regs *regs,
162 unsigned long *stack, unsigned long bp, 105 unsigned long *stack, unsigned long bp,
163 const struct stacktrace_ops *ops, void *data) 106 const struct stacktrace_ops *ops, void *data)
@@ -166,6 +109,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
166 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 109 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
167 unsigned used = 0; 110 unsigned used = 0;
168 struct thread_info *tinfo; 111 struct thread_info *tinfo;
112 int graph = 0;
169 113
170 if (!task) 114 if (!task)
171 task = current; 115 task = current;
@@ -206,7 +150,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
206 break; 150 break;
207 151
208 bp = print_context_stack(tinfo, stack, bp, ops, 152 bp = print_context_stack(tinfo, stack, bp, ops,
209 data, estack_end); 153 data, estack_end, &graph);
210 ops->stack(data, "<EOE>"); 154 ops->stack(data, "<EOE>");
211 /* 155 /*
212 * We link to the next stack via the 156 * We link to the next stack via the
@@ -225,7 +169,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
225 if (ops->stack(data, "IRQ") < 0) 169 if (ops->stack(data, "IRQ") < 0)
226 break; 170 break;
227 bp = print_context_stack(tinfo, stack, bp, 171 bp = print_context_stack(tinfo, stack, bp,
228 ops, data, irqstack_end); 172 ops, data, irqstack_end, &graph);
229 /* 173 /*
230 * We link to the next stack (which would be 174 * We link to the next stack (which would be
231 * the process stack normally) the last 175 * the process stack normally) the last
@@ -243,62 +187,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
243 /* 187 /*
244 * This handles the process stack: 188 * This handles the process stack:
245 */ 189 */
246 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); 190 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph);
247 put_cpu(); 191 put_cpu();
248} 192}
249EXPORT_SYMBOL(dump_trace); 193EXPORT_SYMBOL(dump_trace);
250 194
251static void 195void
252print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
253{
254 printk(data);
255 print_symbol(msg, symbol);
256 printk("\n");
257}
258
259static void print_trace_warning(void *data, char *msg)
260{
261 printk("%s%s\n", (char *)data, msg);
262}
263
264static int print_trace_stack(void *data, char *name)
265{
266 printk("%s <%s> ", (char *)data, name);
267 return 0;
268}
269
270/*
271 * Print one address/symbol entries per line.
272 */
273static void print_trace_address(void *data, unsigned long addr, int reliable)
274{
275 touch_nmi_watchdog();
276 printk(data);
277 printk_address(addr, reliable);
278}
279
280static const struct stacktrace_ops print_trace_ops = {
281 .warning = print_trace_warning,
282 .warning_symbol = print_trace_warning_symbol,
283 .stack = print_trace_stack,
284 .address = print_trace_address,
285};
286
287static void
288show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
289 unsigned long *stack, unsigned long bp, char *log_lvl)
290{
291 printk("%sCall Trace:\n", log_lvl);
292 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
293}
294
295void show_trace(struct task_struct *task, struct pt_regs *regs,
296 unsigned long *stack, unsigned long bp)
297{
298 show_trace_log_lvl(task, regs, stack, bp, "");
299}
300
301static void
302show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 196show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
303 unsigned long *sp, unsigned long bp, char *log_lvl) 197 unsigned long *sp, unsigned long bp, char *log_lvl)
304{ 198{
@@ -342,33 +236,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
342 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 236 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
343} 237}
344 238
345void show_stack(struct task_struct *task, unsigned long *sp)
346{
347 show_stack_log_lvl(task, NULL, sp, 0, "");
348}
349
350/*
351 * The architecture-independent dump_stack generator
352 */
353void dump_stack(void)
354{
355 unsigned long bp = 0;
356 unsigned long stack;
357
358#ifdef CONFIG_FRAME_POINTER
359 if (!bp)
360 get_bp(bp);
361#endif
362
363 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
364 current->pid, current->comm, print_tainted(),
365 init_utsname()->release,
366 (int)strcspn(init_utsname()->version, " "),
367 init_utsname()->version);
368 show_trace(NULL, NULL, &stack, bp);
369}
370EXPORT_SYMBOL(dump_stack);
371
372void show_registers(struct pt_regs *regs) 239void show_registers(struct pt_regs *regs)
373{ 240{
374 int i; 241 int i;
@@ -429,147 +296,3 @@ int is_valid_bugaddr(unsigned long ip)
429 return ud2 == 0x0b0f; 296 return ud2 == 0x0b0f;
430} 297}
431 298
432static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
433static int die_owner = -1;
434static unsigned int die_nest_count;
435
436unsigned __kprobes long oops_begin(void)
437{
438 int cpu;
439 unsigned long flags;
440
441 oops_enter();
442
443 /* racy, but better than risking deadlock. */
444 raw_local_irq_save(flags);
445 cpu = smp_processor_id();
446 if (!__raw_spin_trylock(&die_lock)) {
447 if (cpu == die_owner)
448 /* nested oops. should stop eventually */;
449 else
450 __raw_spin_lock(&die_lock);
451 }
452 die_nest_count++;
453 die_owner = cpu;
454 console_verbose();
455 bust_spinlocks(1);
456 return flags;
457}
458
459void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
460{
461 die_owner = -1;
462 bust_spinlocks(0);
463 die_nest_count--;
464 if (!die_nest_count)
465 /* Nest count reaches zero, release the lock. */
466 __raw_spin_unlock(&die_lock);
467 raw_local_irq_restore(flags);
468 if (!regs) {
469 oops_exit();
470 return;
471 }
472 if (in_interrupt())
473 panic("Fatal exception in interrupt");
474 if (panic_on_oops)
475 panic("Fatal exception");
476 oops_exit();
477 do_exit(signr);
478}
479
480int __kprobes __die(const char *str, struct pt_regs *regs, long err)
481{
482 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
483#ifdef CONFIG_PREEMPT
484 printk("PREEMPT ");
485#endif
486#ifdef CONFIG_SMP
487 printk("SMP ");
488#endif
489#ifdef CONFIG_DEBUG_PAGEALLOC
490 printk("DEBUG_PAGEALLOC");
491#endif
492 printk("\n");
493 sysfs_printk_last_file();
494 if (notify_die(DIE_OOPS, str, regs, err,
495 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
496 return 1;
497
498 show_registers(regs);
499 add_taint(TAINT_DIE);
500 /* Executive summary in case the oops scrolled away */
501 printk(KERN_ALERT "RIP ");
502 printk_address(regs->ip, 1);
503 printk(" RSP <%016lx>\n", regs->sp);
504 if (kexec_should_crash(current))
505 crash_kexec(regs);
506 return 0;
507}
508
509void die(const char *str, struct pt_regs *regs, long err)
510{
511 unsigned long flags = oops_begin();
512
513 if (!user_mode(regs))
514 report_bug(regs->ip, regs);
515
516 if (__die(str, regs, err))
517 regs = NULL;
518 oops_end(flags, regs, SIGSEGV);
519}
520
521notrace __kprobes void
522die_nmi(char *str, struct pt_regs *regs, int do_panic)
523{
524 unsigned long flags;
525
526 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
527 return;
528
529 flags = oops_begin();
530 /*
531 * We are in trouble anyway, lets at least try
532 * to get a message out.
533 */
534 printk(KERN_EMERG "%s", str);
535 printk(" on CPU%d, ip %08lx, registers:\n",
536 smp_processor_id(), regs->ip);
537 show_registers(regs);
538 if (kexec_should_crash(current))
539 crash_kexec(regs);
540 if (do_panic || panic_on_oops)
541 panic("Non maskable interrupt");
542 oops_end(flags, NULL, SIGBUS);
543 nmi_exit();
544 local_irq_enable();
545 do_exit(SIGBUS);
546}
547
548static int __init oops_setup(char *s)
549{
550 if (!s)
551 return -EINVAL;
552 if (!strcmp(s, "panic"))
553 panic_on_oops = 1;
554 return 0;
555}
556early_param("oops", oops_setup);
557
558static int __init kstack_setup(char *s)
559{
560 if (!s)
561 return -EINVAL;
562 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
563 return 0;
564}
565early_param("kstack", kstack_setup);
566
567static int __init code_bytes_setup(char *s)
568{
569 code_bytes = simple_strtoul(s, NULL, 0);
570 if (code_bytes > 8192)
571 code_bytes = 8192;
572
573 return 1;
574}
575__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 3ce029ffaa55..1b894b72c0f5 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -188,20 +188,6 @@ static void __init ati_bugs_contd(int num, int slot, int func)
188} 188}
189#endif 189#endif
190 190
191#ifdef CONFIG_DMAR
192static void __init intel_g33_dmar(int num, int slot, int func)
193{
194 struct acpi_table_header *dmar_tbl;
195 acpi_status status;
196
197 status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
198 if (ACPI_SUCCESS(status)) {
199 printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n");
200 dmar_disabled = 1;
201 }
202}
203#endif
204
205#define QFLAG_APPLY_ONCE 0x1 191#define QFLAG_APPLY_ONCE 0x1
206#define QFLAG_APPLIED 0x2 192#define QFLAG_APPLIED 0x2
207#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) 193#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -225,10 +211,6 @@ static struct chipset early_qrk[] __initdata = {
225 PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, 211 PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
226 { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, 212 { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
227 PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, 213 PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
228#ifdef CONFIG_DMAR
229 { PCI_VENDOR_ID_INTEL, 0x29c0,
230 PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar },
231#endif
232 {} 214 {}
233}; 215};
234 216
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca1..43ceb3f454bf 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1157,6 +1157,9 @@ ENTRY(mcount)
1157END(mcount) 1157END(mcount)
1158 1158
1159ENTRY(ftrace_caller) 1159ENTRY(ftrace_caller)
1160 cmpl $0, function_trace_stop
1161 jne ftrace_stub
1162
1160 pushl %eax 1163 pushl %eax
1161 pushl %ecx 1164 pushl %ecx
1162 pushl %edx 1165 pushl %edx
@@ -1171,6 +1174,11 @@ ftrace_call:
1171 popl %edx 1174 popl %edx
1172 popl %ecx 1175 popl %ecx
1173 popl %eax 1176 popl %eax
1177#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1178.globl ftrace_graph_call
1179ftrace_graph_call:
1180 jmp ftrace_stub
1181#endif
1174 1182
1175.globl ftrace_stub 1183.globl ftrace_stub
1176ftrace_stub: 1184ftrace_stub:
@@ -1180,8 +1188,18 @@ END(ftrace_caller)
1180#else /* ! CONFIG_DYNAMIC_FTRACE */ 1188#else /* ! CONFIG_DYNAMIC_FTRACE */
1181 1189
1182ENTRY(mcount) 1190ENTRY(mcount)
1191 cmpl $0, function_trace_stop
1192 jne ftrace_stub
1193
1183 cmpl $ftrace_stub, ftrace_trace_function 1194 cmpl $ftrace_stub, ftrace_trace_function
1184 jnz trace 1195 jnz trace
1196#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1197 cmpl $ftrace_stub, ftrace_graph_return
1198 jnz ftrace_graph_caller
1199
1200 cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
1201 jnz ftrace_graph_caller
1202#endif
1185.globl ftrace_stub 1203.globl ftrace_stub
1186ftrace_stub: 1204ftrace_stub:
1187 ret 1205 ret
@@ -1200,12 +1218,43 @@ trace:
1200 popl %edx 1218 popl %edx
1201 popl %ecx 1219 popl %ecx
1202 popl %eax 1220 popl %eax
1203
1204 jmp ftrace_stub 1221 jmp ftrace_stub
1205END(mcount) 1222END(mcount)
1206#endif /* CONFIG_DYNAMIC_FTRACE */ 1223#endif /* CONFIG_DYNAMIC_FTRACE */
1207#endif /* CONFIG_FUNCTION_TRACER */ 1224#endif /* CONFIG_FUNCTION_TRACER */
1208 1225
1226#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1227ENTRY(ftrace_graph_caller)
1228 cmpl $0, function_trace_stop
1229 jne ftrace_stub
1230
1231 pushl %eax
1232 pushl %ecx
1233 pushl %edx
1234 movl 0xc(%esp), %edx
1235 lea 0x4(%ebp), %eax
1236 subl $MCOUNT_INSN_SIZE, %edx
1237 call prepare_ftrace_return
1238 popl %edx
1239 popl %ecx
1240 popl %eax
1241 ret
1242END(ftrace_graph_caller)
1243
1244.globl return_to_handler
1245return_to_handler:
1246 pushl $0
1247 pushl %eax
1248 pushl %ecx
1249 pushl %edx
1250 call ftrace_return_to_handler
1251 movl %eax, 0xc(%esp)
1252 popl %edx
1253 popl %ecx
1254 popl %eax
1255 ret
1256#endif
1257
1209.section .rodata,"a" 1258.section .rodata,"a"
1210#include "syscall_table_32.S" 1259#include "syscall_table_32.S"
1211 1260
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..54e0bbdccb99 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -68,6 +68,8 @@ ENTRY(mcount)
68END(mcount) 68END(mcount)
69 69
70ENTRY(ftrace_caller) 70ENTRY(ftrace_caller)
71 cmpl $0, function_trace_stop
72 jne ftrace_stub
71 73
72 /* taken from glibc */ 74 /* taken from glibc */
73 subq $0x38, %rsp 75 subq $0x38, %rsp
@@ -96,6 +98,12 @@ ftrace_call:
96 movq (%rsp), %rax 98 movq (%rsp), %rax
97 addq $0x38, %rsp 99 addq $0x38, %rsp
98 100
101#ifdef CONFIG_FUNCTION_GRAPH_TRACER
102.globl ftrace_graph_call
103ftrace_graph_call:
104 jmp ftrace_stub
105#endif
106
99.globl ftrace_stub 107.globl ftrace_stub
100ftrace_stub: 108ftrace_stub:
101 retq 109 retq
@@ -103,8 +111,20 @@ END(ftrace_caller)
103 111
104#else /* ! CONFIG_DYNAMIC_FTRACE */ 112#else /* ! CONFIG_DYNAMIC_FTRACE */
105ENTRY(mcount) 113ENTRY(mcount)
114 cmpl $0, function_trace_stop
115 jne ftrace_stub
116
106 cmpq $ftrace_stub, ftrace_trace_function 117 cmpq $ftrace_stub, ftrace_trace_function
107 jnz trace 118 jnz trace
119
120#ifdef CONFIG_FUNCTION_GRAPH_TRACER
121 cmpq $ftrace_stub, ftrace_graph_return
122 jnz ftrace_graph_caller
123
124 cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
125 jnz ftrace_graph_caller
126#endif
127
108.globl ftrace_stub 128.globl ftrace_stub
109ftrace_stub: 129ftrace_stub:
110 retq 130 retq
@@ -140,6 +160,69 @@ END(mcount)
140#endif /* CONFIG_DYNAMIC_FTRACE */ 160#endif /* CONFIG_DYNAMIC_FTRACE */
141#endif /* CONFIG_FUNCTION_TRACER */ 161#endif /* CONFIG_FUNCTION_TRACER */
142 162
163#ifdef CONFIG_FUNCTION_GRAPH_TRACER
164ENTRY(ftrace_graph_caller)
165 cmpl $0, function_trace_stop
166 jne ftrace_stub
167
168 subq $0x38, %rsp
169 movq %rax, (%rsp)
170 movq %rcx, 8(%rsp)
171 movq %rdx, 16(%rsp)
172 movq %rsi, 24(%rsp)
173 movq %rdi, 32(%rsp)
174 movq %r8, 40(%rsp)
175 movq %r9, 48(%rsp)
176
177 leaq 8(%rbp), %rdi
178 movq 0x38(%rsp), %rsi
179 subq $MCOUNT_INSN_SIZE, %rsi
180
181 call prepare_ftrace_return
182
183 movq 48(%rsp), %r9
184 movq 40(%rsp), %r8
185 movq 32(%rsp), %rdi
186 movq 24(%rsp), %rsi
187 movq 16(%rsp), %rdx
188 movq 8(%rsp), %rcx
189 movq (%rsp), %rax
190 addq $0x38, %rsp
191 retq
192END(ftrace_graph_caller)
193
194
195.globl return_to_handler
196return_to_handler:
197 subq $80, %rsp
198
199 movq %rax, (%rsp)
200 movq %rcx, 8(%rsp)
201 movq %rdx, 16(%rsp)
202 movq %rsi, 24(%rsp)
203 movq %rdi, 32(%rsp)
204 movq %r8, 40(%rsp)
205 movq %r9, 48(%rsp)
206 movq %r10, 56(%rsp)
207 movq %r11, 64(%rsp)
208
209 call ftrace_return_to_handler
210
211 movq %rax, 72(%rsp)
212 movq 64(%rsp), %r11
213 movq 56(%rsp), %r10
214 movq 48(%rsp), %r9
215 movq 40(%rsp), %r8
216 movq 32(%rsp), %rdi
217 movq 24(%rsp), %rsi
218 movq 16(%rsp), %rdx
219 movq 8(%rsp), %rcx
220 movq (%rsp), %rax
221 addq $72, %rsp
222 retq
223#endif
224
225
143#ifndef CONFIG_PREEMPT 226#ifndef CONFIG_PREEMPT
144#define retint_kernel retint_restore_args 227#define retint_kernel retint_restore_args
145#endif 228#endif
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index f454c78fcef6..53699c931ad4 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -38,8 +38,11 @@
38#include <asm/io.h> 38#include <asm/io.h>
39#include <asm/nmi.h> 39#include <asm/nmi.h>
40#include <asm/smp.h> 40#include <asm/smp.h>
41#include <asm/atomic.h>
41#include <asm/apicdef.h> 42#include <asm/apicdef.h>
42#include <mach_mpparse.h> 43#include <mach_mpparse.h>
44#include <asm/genapic.h>
45#include <asm/setup.h>
43 46
44/* 47/*
45 * ES7000 chipsets 48 * ES7000 chipsets
@@ -161,6 +164,43 @@ es7000_rename_gsi(int ioapic, int gsi)
161 return gsi; 164 return gsi;
162} 165}
163 166
167static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
168{
169 unsigned long vect = 0, psaival = 0;
170
171 if (psai == NULL)
172 return -1;
173
174 vect = ((unsigned long)__pa(eip)/0x1000) << 16;
175 psaival = (0x1000000 | vect | cpu);
176
177 while (*psai & 0x1000000)
178 ;
179
180 *psai = psaival;
181
182 return 0;
183}
184
185static void noop_wait_for_deassert(atomic_t *deassert_not_used)
186{
187}
188
189static int __init es7000_update_genapic(void)
190{
191 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
192
193 /* MPENTIUMIII */
194 if (boot_cpu_data.x86 == 6 &&
195 (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
196 es7000_update_genapic_to_cluster();
197 genapic->wait_for_init_deassert = noop_wait_for_deassert;
198 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
199 }
200
201 return 0;
202}
203
164void __init 204void __init
165setup_unisys(void) 205setup_unisys(void)
166{ 206{
@@ -176,6 +216,8 @@ setup_unisys(void)
176 else 216 else
177 es7000_plat = ES7000_CLASSIC; 217 es7000_plat = ES7000_CLASSIC;
178 ioapic_renumber_irq = es7000_rename_gsi; 218 ioapic_renumber_irq = es7000_rename_gsi;
219
220 x86_quirks->update_genapic = es7000_update_genapic;
179} 221}
180 222
181/* 223/*
@@ -250,31 +292,24 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
250{ 292{
251 struct acpi_table_header *header = NULL; 293 struct acpi_table_header *header = NULL;
252 int i = 0; 294 int i = 0;
253 acpi_size tbl_size;
254 295
255 while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) { 296 while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) {
256 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { 297 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
257 struct oem_table *t = (struct oem_table *)header; 298 struct oem_table *t = (struct oem_table *)header;
258 299
259 oem_addrX = t->OEMTableAddr; 300 oem_addrX = t->OEMTableAddr;
260 oem_size = t->OEMTableSize; 301 oem_size = t->OEMTableSize;
261 early_acpi_os_unmap_memory(header, tbl_size);
262 302
263 *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, 303 *oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
264 oem_size); 304 oem_size);
265 return 0; 305 return 0;
266 } 306 }
267 early_acpi_os_unmap_memory(header, tbl_size);
268 } 307 }
269 return -1; 308 return -1;
270} 309}
271 310
272void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) 311void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
273{ 312{
274 if (!oem_addr)
275 return;
276
277 __acpi_unmap_table((char *)oem_addr, oem_size);
278} 313}
279#endif 314#endif
280 315
@@ -324,26 +359,6 @@ es7000_mip_write(struct mip_reg *mip_reg)
324 return status; 359 return status;
325} 360}
326 361
327int
328es7000_start_cpu(int cpu, unsigned long eip)
329{
330 unsigned long vect = 0, psaival = 0;
331
332 if (psai == NULL)
333 return -1;
334
335 vect = ((unsigned long)__pa(eip)/0x1000) << 16;
336 psaival = (0x1000000 | vect | cpu);
337
338 while (*psai & 0x1000000)
339 ;
340
341 *psai = psaival;
342
343 return 0;
344
345}
346
347void __init 362void __init
348es7000_sw_apic(void) 363es7000_sw_apic(void)
349{ 364{
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9bf..1b43086b097a 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -14,14 +14,17 @@
14#include <linux/uaccess.h> 14#include <linux/uaccess.h>
15#include <linux/ftrace.h> 15#include <linux/ftrace.h>
16#include <linux/percpu.h> 16#include <linux/percpu.h>
17#include <linux/sched.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/list.h> 19#include <linux/list.h>
19 20
20#include <asm/ftrace.h> 21#include <asm/ftrace.h>
22#include <linux/ftrace.h>
21#include <asm/nops.h> 23#include <asm/nops.h>
24#include <asm/nmi.h>
22 25
23 26
24static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; 27#ifdef CONFIG_DYNAMIC_FTRACE
25 28
26union ftrace_code_union { 29union ftrace_code_union {
27 char code[MCOUNT_INSN_SIZE]; 30 char code[MCOUNT_INSN_SIZE];
@@ -31,18 +34,12 @@ union ftrace_code_union {
31 } __attribute__((packed)); 34 } __attribute__((packed));
32}; 35};
33 36
34
35static int ftrace_calc_offset(long ip, long addr) 37static int ftrace_calc_offset(long ip, long addr)
36{ 38{
37 return (int)(addr - ip); 39 return (int)(addr - ip);
38} 40}
39 41
40unsigned char *ftrace_nop_replace(void) 42static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
41{
42 return ftrace_nop;
43}
44
45unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
46{ 43{
47 static union ftrace_code_union calc; 44 static union ftrace_code_union calc;
48 45
@@ -56,7 +53,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
56 return calc.code; 53 return calc.code;
57} 54}
58 55
59int 56/*
57 * Modifying code must take extra care. On an SMP machine, if
58 * the code being modified is also being executed on another CPU
59 * that CPU will have undefined results and possibly take a GPF.
60 * We use kstop_machine to stop other CPUS from exectuing code.
61 * But this does not stop NMIs from happening. We still need
62 * to protect against that. We separate out the modification of
63 * the code to take care of this.
64 *
65 * Two buffers are added: An IP buffer and a "code" buffer.
66 *
67 * 1) Put the instruction pointer into the IP buffer
68 * and the new code into the "code" buffer.
69 * 2) Set a flag that says we are modifying code
70 * 3) Wait for any running NMIs to finish.
71 * 4) Write the code
72 * 5) clear the flag.
73 * 6) Wait for any running NMIs to finish.
74 *
75 * If an NMI is executed, the first thing it does is to call
76 * "ftrace_nmi_enter". This will check if the flag is set to write
77 * and if it is, it will write what is in the IP and "code" buffers.
78 *
79 * The trick is, it does not matter if everyone is writing the same
80 * content to the code location. Also, if a CPU is executing code
81 * it is OK to write to that code location if the contents being written
82 * are the same as what exists.
83 */
84
85static atomic_t in_nmi = ATOMIC_INIT(0);
86static int mod_code_status; /* holds return value of text write */
87static int mod_code_write; /* set when NMI should do the write */
88static void *mod_code_ip; /* holds the IP to write to */
89static void *mod_code_newcode; /* holds the text to write to the IP */
90
91static unsigned nmi_wait_count;
92static atomic_t nmi_update_count = ATOMIC_INIT(0);
93
94int ftrace_arch_read_dyn_info(char *buf, int size)
95{
96 int r;
97
98 r = snprintf(buf, size, "%u %u",
99 nmi_wait_count,
100 atomic_read(&nmi_update_count));
101 return r;
102}
103
104static void ftrace_mod_code(void)
105{
106 /*
107 * Yes, more than one CPU process can be writing to mod_code_status.
108 * (and the code itself)
109 * But if one were to fail, then they all should, and if one were
110 * to succeed, then they all should.
111 */
112 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
113 MCOUNT_INSN_SIZE);
114}
115
116void ftrace_nmi_enter(void)
117{
118 atomic_inc(&in_nmi);
119 /* Must have in_nmi seen before reading write flag */
120 smp_mb();
121 if (mod_code_write) {
122 ftrace_mod_code();
123 atomic_inc(&nmi_update_count);
124 }
125}
126
127void ftrace_nmi_exit(void)
128{
129 /* Finish all executions before clearing in_nmi */
130 smp_wmb();
131 atomic_dec(&in_nmi);
132}
133
134static void wait_for_nmi(void)
135{
136 int waited = 0;
137
138 while (atomic_read(&in_nmi)) {
139 waited = 1;
140 cpu_relax();
141 }
142
143 if (waited)
144 nmi_wait_count++;
145}
146
147static int
148do_ftrace_mod_code(unsigned long ip, void *new_code)
149{
150 mod_code_ip = (void *)ip;
151 mod_code_newcode = new_code;
152
153 /* The buffers need to be visible before we let NMIs write them */
154 smp_wmb();
155
156 mod_code_write = 1;
157
158 /* Make sure write bit is visible before we wait on NMIs */
159 smp_mb();
160
161 wait_for_nmi();
162
163 /* Make sure all running NMIs have finished before we write the code */
164 smp_mb();
165
166 ftrace_mod_code();
167
168 /* Make sure the write happens before clearing the bit */
169 smp_wmb();
170
171 mod_code_write = 0;
172
173 /* make sure NMIs see the cleared bit */
174 smp_mb();
175
176 wait_for_nmi();
177
178 return mod_code_status;
179}
180
181
182
183
184static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
185
186static unsigned char *ftrace_nop_replace(void)
187{
188 return ftrace_nop;
189}
190
191static int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code, 192ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code) 193 unsigned char *new_code)
62{ 194{
@@ -81,7 +213,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
81 return -EINVAL; 213 return -EINVAL;
82 214
83 /* replace the text with the new text */ 215 /* replace the text with the new text */
84 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) 216 if (do_ftrace_mod_code(ip, new_code))
85 return -EPERM; 217 return -EPERM;
86 218
87 sync_core(); 219 sync_core();
@@ -89,6 +221,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
89 return 0; 221 return 0;
90} 222}
91 223
224int ftrace_make_nop(struct module *mod,
225 struct dyn_ftrace *rec, unsigned long addr)
226{
227 unsigned char *new, *old;
228 unsigned long ip = rec->ip;
229
230 old = ftrace_call_replace(ip, addr);
231 new = ftrace_nop_replace();
232
233 return ftrace_modify_code(rec->ip, old, new);
234}
235
236int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
237{
238 unsigned char *new, *old;
239 unsigned long ip = rec->ip;
240
241 old = ftrace_nop_replace();
242 new = ftrace_call_replace(ip, addr);
243
244 return ftrace_modify_code(rec->ip, old, new);
245}
246
92int ftrace_update_ftrace_func(ftrace_func_t func) 247int ftrace_update_ftrace_func(ftrace_func_t func)
93{ 248{
94 unsigned long ip = (unsigned long)(&ftrace_call); 249 unsigned long ip = (unsigned long)(&ftrace_call);
@@ -165,3 +320,218 @@ int __init ftrace_dyn_arch_init(void *data)
165 320
166 return 0; 321 return 0;
167} 322}
323#endif
324
325#ifdef CONFIG_FUNCTION_GRAPH_TRACER
326
327#ifdef CONFIG_DYNAMIC_FTRACE
328extern void ftrace_graph_call(void);
329
330static int ftrace_mod_jmp(unsigned long ip,
331 int old_offset, int new_offset)
332{
333 unsigned char code[MCOUNT_INSN_SIZE];
334
335 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
336 return -EFAULT;
337
338 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
339 return -EINVAL;
340
341 *(int *)(&code[1]) = new_offset;
342
343 if (do_ftrace_mod_code(ip, &code))
344 return -EPERM;
345
346 return 0;
347}
348
349int ftrace_enable_ftrace_graph_caller(void)
350{
351 unsigned long ip = (unsigned long)(&ftrace_graph_call);
352 int old_offset, new_offset;
353
354 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
355 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
356
357 return ftrace_mod_jmp(ip, old_offset, new_offset);
358}
359
360int ftrace_disable_ftrace_graph_caller(void)
361{
362 unsigned long ip = (unsigned long)(&ftrace_graph_call);
363 int old_offset, new_offset;
364
365 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
366 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
367
368 return ftrace_mod_jmp(ip, old_offset, new_offset);
369}
370
371#else /* CONFIG_DYNAMIC_FTRACE */
372
373/*
374 * These functions are picked from those used on
375 * this page for dynamic ftrace. They have been
376 * simplified to ignore all traces in NMI context.
377 */
378static atomic_t in_nmi;
379
380void ftrace_nmi_enter(void)
381{
382 atomic_inc(&in_nmi);
383}
384
385void ftrace_nmi_exit(void)
386{
387 atomic_dec(&in_nmi);
388}
389
390#endif /* !CONFIG_DYNAMIC_FTRACE */
391
392/* Add a function return address to the trace stack on thread info.*/
393static int push_return_trace(unsigned long ret, unsigned long long time,
394 unsigned long func, int *depth)
395{
396 int index;
397
398 if (!current->ret_stack)
399 return -EBUSY;
400
401 /* The return trace stack is full */
402 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
403 atomic_inc(&current->trace_overrun);
404 return -EBUSY;
405 }
406
407 index = ++current->curr_ret_stack;
408 barrier();
409 current->ret_stack[index].ret = ret;
410 current->ret_stack[index].func = func;
411 current->ret_stack[index].calltime = time;
412 *depth = index;
413
414 return 0;
415}
416
417/* Retrieve a function return address to the trace stack on thread info.*/
418static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
419{
420 int index;
421
422 index = current->curr_ret_stack;
423
424 if (unlikely(index < 0)) {
425 ftrace_graph_stop();
426 WARN_ON(1);
427 /* Might as well panic, otherwise we have no where to go */
428 *ret = (unsigned long)panic;
429 return;
430 }
431
432 *ret = current->ret_stack[index].ret;
433 trace->func = current->ret_stack[index].func;
434 trace->calltime = current->ret_stack[index].calltime;
435 trace->overrun = atomic_read(&current->trace_overrun);
436 trace->depth = index;
437 barrier();
438 current->curr_ret_stack--;
439
440}
441
442/*
443 * Send the trace to the ring-buffer.
444 * @return the original return address.
445 */
446unsigned long ftrace_return_to_handler(void)
447{
448 struct ftrace_graph_ret trace;
449 unsigned long ret;
450
451 pop_return_trace(&trace, &ret);
452 trace.rettime = cpu_clock(raw_smp_processor_id());
453 ftrace_graph_return(&trace);
454
455 if (unlikely(!ret)) {
456 ftrace_graph_stop();
457 WARN_ON(1);
458 /* Might as well panic. What else to do? */
459 ret = (unsigned long)panic;
460 }
461
462 return ret;
463}
464
465/*
466 * Hook the return address and push it in the stack of return addrs
467 * in current thread info.
468 */
469void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
470{
471 unsigned long old;
472 unsigned long long calltime;
473 int faulted;
474 struct ftrace_graph_ent trace;
475 unsigned long return_hooker = (unsigned long)
476 &return_to_handler;
477
478 /* Nmi's are currently unsupported */
479 if (unlikely(atomic_read(&in_nmi)))
480 return;
481
482 if (unlikely(atomic_read(&current->tracing_graph_pause)))
483 return;
484
485 /*
486 * Protect against fault, even if it shouldn't
487 * happen. This tool is too much intrusive to
488 * ignore such a protection.
489 */
490 asm volatile(
491 "1: " _ASM_MOV " (%[parent_old]), %[old]\n"
492 "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n"
493 " movl $0, %[faulted]\n"
494
495 ".section .fixup, \"ax\"\n"
496 "3: movl $1, %[faulted]\n"
497 ".previous\n"
498
499 _ASM_EXTABLE(1b, 3b)
500 _ASM_EXTABLE(2b, 3b)
501
502 : [parent_replaced] "=r" (parent), [old] "=r" (old),
503 [faulted] "=r" (faulted)
504 : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
505 : "memory"
506 );
507
508 if (unlikely(faulted)) {
509 ftrace_graph_stop();
510 WARN_ON(1);
511 return;
512 }
513
514 if (unlikely(!__kernel_text_address(old))) {
515 ftrace_graph_stop();
516 *parent = old;
517 WARN_ON(1);
518 return;
519 }
520
521 calltime = cpu_clock(raw_smp_processor_id());
522
523 if (push_return_trace(old, calltime,
524 self_addr, &trace.depth) == -EBUSY) {
525 *parent = old;
526 return;
527 }
528
529 trace.func = self_addr;
530
531 /* Only trace if the calling function expects to */
532 if (!ftrace_graph_entry(&trace)) {
533 current->curr_ret_stack--;
534 *parent = old;
535 }
536}
537#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 6c9bfc9e1e95..2bced78b0b8e 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -21,6 +21,7 @@
21#include <asm/smp.h> 21#include <asm/smp.h>
22#include <asm/ipi.h> 22#include <asm/ipi.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
24#include <asm/setup.h>
24 25
25extern struct genapic apic_flat; 26extern struct genapic apic_flat;
26extern struct genapic apic_physflat; 27extern struct genapic apic_physflat;
@@ -53,6 +54,9 @@ void __init setup_apic_routing(void)
53 genapic = &apic_physflat; 54 genapic = &apic_physflat;
54 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 55 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
55 } 56 }
57
58 if (x86_quirks->update_genapic)
59 x86_quirks->update_genapic();
56} 60}
57 61
58/* Same for both flat and physical. */ 62/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index c0262791bda4..7fa5f49c2dda 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
30 return 1; 30 return 1;
31} 31}
32 32
33static cpumask_t flat_target_cpus(void) 33static const struct cpumask *flat_target_cpus(void)
34{ 34{
35 return cpu_online_map; 35 return cpu_online_mask;
36} 36}
37 37
38static cpumask_t flat_vector_allocation_domain(int cpu) 38static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
39{ 39{
40 /* Careful. Some cpus do not strictly honor the set of cpus 40 /* Careful. Some cpus do not strictly honor the set of cpus
41 * specified in the interrupt destination when using lowest 41 * specified in the interrupt destination when using lowest
@@ -45,8 +45,8 @@ static cpumask_t flat_vector_allocation_domain(int cpu)
45 * deliver interrupts to the wrong hyperthread when only one 45 * deliver interrupts to the wrong hyperthread when only one
46 * hyperthread was specified in the interrupt desitination. 46 * hyperthread was specified in the interrupt desitination.
47 */ 47 */
48 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 48 cpumask_clear(retmask);
49 return domain; 49 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
50} 50}
51 51
52/* 52/*
@@ -69,9 +69,8 @@ static void flat_init_apic_ldr(void)
69 apic_write(APIC_LDR, val); 69 apic_write(APIC_LDR, val);
70} 70}
71 71
72static void flat_send_IPI_mask(cpumask_t cpumask, int vector) 72static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
73{ 73{
74 unsigned long mask = cpus_addr(cpumask)[0];
75 unsigned long flags; 74 unsigned long flags;
76 75
77 local_irq_save(flags); 76 local_irq_save(flags);
@@ -79,20 +78,41 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
79 local_irq_restore(flags); 78 local_irq_restore(flags);
80} 79}
81 80
81static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
82{
83 unsigned long mask = cpumask_bits(cpumask)[0];
84
85 _flat_send_IPI_mask(mask, vector);
86}
87
88static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
89 int vector)
90{
91 unsigned long mask = cpumask_bits(cpumask)[0];
92 int cpu = smp_processor_id();
93
94 if (cpu < BITS_PER_LONG)
95 clear_bit(cpu, &mask);
96 _flat_send_IPI_mask(mask, vector);
97}
98
82static void flat_send_IPI_allbutself(int vector) 99static void flat_send_IPI_allbutself(int vector)
83{ 100{
101 int cpu = smp_processor_id();
84#ifdef CONFIG_HOTPLUG_CPU 102#ifdef CONFIG_HOTPLUG_CPU
85 int hotplug = 1; 103 int hotplug = 1;
86#else 104#else
87 int hotplug = 0; 105 int hotplug = 0;
88#endif 106#endif
89 if (hotplug || vector == NMI_VECTOR) { 107 if (hotplug || vector == NMI_VECTOR) {
90 cpumask_t allbutme = cpu_online_map; 108 if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
109 unsigned long mask = cpumask_bits(cpu_online_mask)[0];
91 110
92 cpu_clear(smp_processor_id(), allbutme); 111 if (cpu < BITS_PER_LONG)
112 clear_bit(cpu, &mask);
93 113
94 if (!cpus_empty(allbutme)) 114 _flat_send_IPI_mask(mask, vector);
95 flat_send_IPI_mask(allbutme, vector); 115 }
96 } else if (num_online_cpus() > 1) { 116 } else if (num_online_cpus() > 1) {
97 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); 117 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
98 } 118 }
@@ -101,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector)
101static void flat_send_IPI_all(int vector) 121static void flat_send_IPI_all(int vector)
102{ 122{
103 if (vector == NMI_VECTOR) 123 if (vector == NMI_VECTOR)
104 flat_send_IPI_mask(cpu_online_map, vector); 124 flat_send_IPI_mask(cpu_online_mask, vector);
105 else 125 else
106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 126 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
107} 127}
@@ -135,9 +155,18 @@ static int flat_apic_id_registered(void)
135 return physid_isset(read_xapic_id(), phys_cpu_present_map); 155 return physid_isset(read_xapic_id(), phys_cpu_present_map);
136} 156}
137 157
138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 158static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
159{
160 return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
161}
162
163static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
164 const struct cpumask *andmask)
139{ 165{
140 return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; 166 unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
167 unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
168
169 return mask1 & mask2;
141} 170}
142 171
143static unsigned int phys_pkg_id(int index_msb) 172static unsigned int phys_pkg_id(int index_msb)
@@ -157,8 +186,10 @@ struct genapic apic_flat = {
157 .send_IPI_all = flat_send_IPI_all, 186 .send_IPI_all = flat_send_IPI_all,
158 .send_IPI_allbutself = flat_send_IPI_allbutself, 187 .send_IPI_allbutself = flat_send_IPI_allbutself,
159 .send_IPI_mask = flat_send_IPI_mask, 188 .send_IPI_mask = flat_send_IPI_mask,
189 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
160 .send_IPI_self = apic_send_IPI_self, 190 .send_IPI_self = apic_send_IPI_self,
161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 191 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
192 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
162 .phys_pkg_id = phys_pkg_id, 193 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id, 194 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id, 195 .set_apic_id = set_apic_id,
@@ -188,35 +219,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
188 return 0; 219 return 0;
189} 220}
190 221
191static cpumask_t physflat_target_cpus(void) 222static const struct cpumask *physflat_target_cpus(void)
192{ 223{
193 return cpu_online_map; 224 return cpu_online_mask;
194} 225}
195 226
196static cpumask_t physflat_vector_allocation_domain(int cpu) 227static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
197{ 228{
198 return cpumask_of_cpu(cpu); 229 cpumask_clear(retmask);
230 cpumask_set_cpu(cpu, retmask);
199} 231}
200 232
201static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) 233static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
202{ 234{
203 send_IPI_mask_sequence(cpumask, vector); 235 send_IPI_mask_sequence(cpumask, vector);
204} 236}
205 237
206static void physflat_send_IPI_allbutself(int vector) 238static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
239 int vector)
207{ 240{
208 cpumask_t allbutme = cpu_online_map; 241 send_IPI_mask_allbutself(cpumask, vector);
242}
209 243
210 cpu_clear(smp_processor_id(), allbutme); 244static void physflat_send_IPI_allbutself(int vector)
211 physflat_send_IPI_mask(allbutme, vector); 245{
246 send_IPI_mask_allbutself(cpu_online_mask, vector);
212} 247}
213 248
214static void physflat_send_IPI_all(int vector) 249static void physflat_send_IPI_all(int vector)
215{ 250{
216 physflat_send_IPI_mask(cpu_online_map, vector); 251 physflat_send_IPI_mask(cpu_online_mask, vector);
217} 252}
218 253
219static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) 254static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
220{ 255{
221 int cpu; 256 int cpu;
222 257
@@ -224,13 +259,29 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
224 * We're using fixed IRQ delivery, can only return one phys APIC ID. 259 * We're using fixed IRQ delivery, can only return one phys APIC ID.
225 * May as well be the first. 260 * May as well be the first.
226 */ 261 */
227 cpu = first_cpu(cpumask); 262 cpu = cpumask_first(cpumask);
228 if ((unsigned)cpu < nr_cpu_ids) 263 if ((unsigned)cpu < nr_cpu_ids)
229 return per_cpu(x86_cpu_to_apicid, cpu); 264 return per_cpu(x86_cpu_to_apicid, cpu);
230 else 265 else
231 return BAD_APICID; 266 return BAD_APICID;
232} 267}
233 268
269static unsigned int
270physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
271 const struct cpumask *andmask)
272{
273 int cpu;
274
275 /*
276 * We're using fixed IRQ delivery, can only return one phys APIC ID.
277 * May as well be the first.
278 */
279 cpu = cpumask_any_and(cpumask, andmask);
280 if (cpu < nr_cpu_ids)
281 return per_cpu(x86_cpu_to_apicid, cpu);
282 return BAD_APICID;
283}
284
234struct genapic apic_physflat = { 285struct genapic apic_physflat = {
235 .name = "physical flat", 286 .name = "physical flat",
236 .acpi_madt_oem_check = physflat_acpi_madt_oem_check, 287 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
@@ -243,8 +294,10 @@ struct genapic apic_physflat = {
243 .send_IPI_all = physflat_send_IPI_all, 294 .send_IPI_all = physflat_send_IPI_all,
244 .send_IPI_allbutself = physflat_send_IPI_allbutself, 295 .send_IPI_allbutself = physflat_send_IPI_allbutself,
245 .send_IPI_mask = physflat_send_IPI_mask, 296 .send_IPI_mask = physflat_send_IPI_mask,
297 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
246 .send_IPI_self = apic_send_IPI_self, 298 .send_IPI_self = apic_send_IPI_self,
247 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 299 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
300 .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
248 .phys_pkg_id = phys_pkg_id, 301 .phys_pkg_id = phys_pkg_id,
249 .get_apic_id = get_apic_id, 302 .get_apic_id = get_apic_id,
250 .set_apic_id = set_apic_id, 303 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index f6a2c8eb48a6..4716a0c9f936 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
22 22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24 24
25static cpumask_t x2apic_target_cpus(void) 25static const struct cpumask *x2apic_target_cpus(void)
26{ 26{
27 return cpumask_of_cpu(0); 27 return cpumask_of(0);
28} 28}
29 29
30/* 30/*
31 * for now each logical cpu is in its own vector allocation domain. 31 * for now each logical cpu is in its own vector allocation domain.
32 */ 32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu) 33static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
34{ 34{
35 cpumask_t domain = CPU_MASK_NONE; 35 cpumask_clear(retmask);
36 cpu_set(cpu, domain); 36 cpumask_set_cpu(cpu, retmask);
37 return domain;
38} 37}
39 38
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 39static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register 55 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes. 56 * writes.
58 */ 57 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector) 58static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
60{ 59{
61 unsigned long flags; 60 unsigned long flags;
62 unsigned long query_cpu; 61 unsigned long query_cpu;
63 62
64 local_irq_save(flags); 63 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) { 64 for_each_cpu(query_cpu, mask)
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), 65 __x2apic_send_IPI_dest(
67 vector, APIC_DEST_LOGICAL); 66 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
68 } 67 vector, APIC_DEST_LOGICAL);
69 local_irq_restore(flags); 68 local_irq_restore(flags);
70} 69}
71 70
72static void x2apic_send_IPI_allbutself(int vector) 71static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
72 int vector)
73{ 73{
74 cpumask_t mask = cpu_online_map; 74 unsigned long flags;
75 unsigned long query_cpu;
76 unsigned long this_cpu = smp_processor_id();
75 77
76 cpu_clear(smp_processor_id(), mask); 78 local_irq_save(flags);
79 for_each_cpu(query_cpu, mask)
80 if (query_cpu != this_cpu)
81 __x2apic_send_IPI_dest(
82 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
83 vector, APIC_DEST_LOGICAL);
84 local_irq_restore(flags);
85}
86
87static void x2apic_send_IPI_allbutself(int vector)
88{
89 unsigned long flags;
90 unsigned long query_cpu;
91 unsigned long this_cpu = smp_processor_id();
77 92
78 if (!cpus_empty(mask)) 93 local_irq_save(flags);
79 x2apic_send_IPI_mask(mask, vector); 94 for_each_online_cpu(query_cpu)
95 if (query_cpu != this_cpu)
96 __x2apic_send_IPI_dest(
97 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
98 vector, APIC_DEST_LOGICAL);
99 local_irq_restore(flags);
80} 100}
81 101
82static void x2apic_send_IPI_all(int vector) 102static void x2apic_send_IPI_all(int vector)
83{ 103{
84 x2apic_send_IPI_mask(cpu_online_map, vector); 104 x2apic_send_IPI_mask(cpu_online_mask, vector);
85} 105}
86 106
87static int x2apic_apic_id_registered(void) 107static int x2apic_apic_id_registered(void)
@@ -89,7 +109,7 @@ static int x2apic_apic_id_registered(void)
89 return 1; 109 return 1;
90} 110}
91 111
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) 112static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
93{ 113{
94 int cpu; 114 int cpu;
95 115
@@ -97,13 +117,28 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
97 * We're using fixed IRQ delivery, can only return one phys APIC ID. 117 * We're using fixed IRQ delivery, can only return one phys APIC ID.
98 * May as well be the first. 118 * May as well be the first.
99 */ 119 */
100 cpu = first_cpu(cpumask); 120 cpu = cpumask_first(cpumask);
101 if ((unsigned)cpu < NR_CPUS) 121 if ((unsigned)cpu < nr_cpu_ids)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu); 122 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else 123 else
104 return BAD_APICID; 124 return BAD_APICID;
105} 125}
106 126
127static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
128 const struct cpumask *andmask)
129{
130 int cpu;
131
132 /*
133 * We're using fixed IRQ delivery, can only return one phys APIC ID.
134 * May as well be the first.
135 */
136 cpu = cpumask_any_and(cpumask, andmask);
137 if (cpu < nr_cpu_ids)
138 return per_cpu(x86_cpu_to_apicid, cpu);
139 return BAD_APICID;
140}
141
107static unsigned int get_apic_id(unsigned long x) 142static unsigned int get_apic_id(unsigned long x)
108{ 143{
109 unsigned int id; 144 unsigned int id;
@@ -150,8 +185,10 @@ struct genapic apic_x2apic_cluster = {
150 .send_IPI_all = x2apic_send_IPI_all, 185 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 186 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask, 187 .send_IPI_mask = x2apic_send_IPI_mask,
188 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
153 .send_IPI_self = x2apic_send_IPI_self, 189 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 190 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
191 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
155 .phys_pkg_id = phys_pkg_id, 192 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id, 193 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id, 194 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d042211768b7..b255507884f2 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29 29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31 31
32static cpumask_t x2apic_target_cpus(void) 32static const struct cpumask *x2apic_target_cpus(void)
33{ 33{
34 return cpumask_of_cpu(0); 34 return cpumask_of(0);
35} 35}
36 36
37static cpumask_t x2apic_vector_allocation_domain(int cpu) 37static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
38{ 38{
39 cpumask_t domain = CPU_MASK_NONE; 39 cpumask_clear(retmask);
40 cpu_set(cpu, domain); 40 cpumask_set_cpu(cpu, retmask);
41 return domain;
42} 41}
43 42
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 43static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -54,32 +53,54 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
54 x2apic_icr_write(cfg, apicid); 53 x2apic_icr_write(cfg, apicid);
55} 54}
56 55
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector) 56static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
58{ 57{
59 unsigned long flags; 58 unsigned long flags;
60 unsigned long query_cpu; 59 unsigned long query_cpu;
61 60
62 local_irq_save(flags); 61 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) { 62 for_each_cpu(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), 63 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL); 64 vector, APIC_DEST_PHYSICAL);
66 } 65 }
67 local_irq_restore(flags); 66 local_irq_restore(flags);
68} 67}
69 68
70static void x2apic_send_IPI_allbutself(int vector) 69static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
70 int vector)
71{ 71{
72 cpumask_t mask = cpu_online_map; 72 unsigned long flags;
73 unsigned long query_cpu;
74 unsigned long this_cpu = smp_processor_id();
75
76 local_irq_save(flags);
77 for_each_cpu(query_cpu, mask) {
78 if (query_cpu != this_cpu)
79 __x2apic_send_IPI_dest(
80 per_cpu(x86_cpu_to_apicid, query_cpu),
81 vector, APIC_DEST_PHYSICAL);
82 }
83 local_irq_restore(flags);
84}
73 85
74 cpu_clear(smp_processor_id(), mask); 86static void x2apic_send_IPI_allbutself(int vector)
87{
88 unsigned long flags;
89 unsigned long query_cpu;
90 unsigned long this_cpu = smp_processor_id();
75 91
76 if (!cpus_empty(mask)) 92 local_irq_save(flags);
77 x2apic_send_IPI_mask(mask, vector); 93 for_each_online_cpu(query_cpu)
94 if (query_cpu != this_cpu)
95 __x2apic_send_IPI_dest(
96 per_cpu(x86_cpu_to_apicid, query_cpu),
97 vector, APIC_DEST_PHYSICAL);
98 local_irq_restore(flags);
78} 99}
79 100
80static void x2apic_send_IPI_all(int vector) 101static void x2apic_send_IPI_all(int vector)
81{ 102{
82 x2apic_send_IPI_mask(cpu_online_map, vector); 103 x2apic_send_IPI_mask(cpu_online_mask, vector);
83} 104}
84 105
85static int x2apic_apic_id_registered(void) 106static int x2apic_apic_id_registered(void)
@@ -87,7 +108,7 @@ static int x2apic_apic_id_registered(void)
87 return 1; 108 return 1;
88} 109}
89 110
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) 111static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
91{ 112{
92 int cpu; 113 int cpu;
93 114
@@ -95,13 +116,28 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
95 * We're using fixed IRQ delivery, can only return one phys APIC ID. 116 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first. 117 * May as well be the first.
97 */ 118 */
98 cpu = first_cpu(cpumask); 119 cpu = cpumask_first(cpumask);
99 if ((unsigned)cpu < NR_CPUS) 120 if ((unsigned)cpu < nr_cpu_ids)
100 return per_cpu(x86_cpu_to_apicid, cpu); 121 return per_cpu(x86_cpu_to_apicid, cpu);
101 else 122 else
102 return BAD_APICID; 123 return BAD_APICID;
103} 124}
104 125
126static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
127 const struct cpumask *andmask)
128{
129 int cpu;
130
131 /*
132 * We're using fixed IRQ delivery, can only return one phys APIC ID.
133 * May as well be the first.
134 */
135 cpu = cpumask_any_and(cpumask, andmask);
136 if (cpu < nr_cpu_ids)
137 return per_cpu(x86_cpu_to_apicid, cpu);
138 return BAD_APICID;
139}
140
105static unsigned int get_apic_id(unsigned long x) 141static unsigned int get_apic_id(unsigned long x)
106{ 142{
107 unsigned int id; 143 unsigned int id;
@@ -145,8 +181,10 @@ struct genapic apic_x2apic_phys = {
145 .send_IPI_all = x2apic_send_IPI_all, 181 .send_IPI_all = x2apic_send_IPI_all,
146 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 182 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
147 .send_IPI_mask = x2apic_send_IPI_mask, 183 .send_IPI_mask = x2apic_send_IPI_mask,
184 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
148 .send_IPI_self = x2apic_send_IPI_self, 185 .send_IPI_self = x2apic_send_IPI_self,
149 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 186 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
187 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
150 .phys_pkg_id = phys_pkg_id, 188 .phys_pkg_id = phys_pkg_id,
151 .get_apic_id = get_apic_id, 189 .get_apic_id = get_apic_id,
152 .set_apic_id = set_apic_id, 190 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2c7dbdb98278..3984682cd849 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -75,16 +75,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
75 75
76/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 76/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
77 77
78static cpumask_t uv_target_cpus(void) 78static const struct cpumask *uv_target_cpus(void)
79{ 79{
80 return cpumask_of_cpu(0); 80 return cpumask_of(0);
81} 81}
82 82
83static cpumask_t uv_vector_allocation_domain(int cpu) 83static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
84{ 84{
85 cpumask_t domain = CPU_MASK_NONE; 85 cpumask_clear(retmask);
86 cpu_set(cpu, domain); 86 cpumask_set_cpu(cpu, retmask);
87 return domain;
88} 87}
89 88
90int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) 89int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -123,28 +122,37 @@ static void uv_send_IPI_one(int cpu, int vector)
123 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 122 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
124} 123}
125 124
126static void uv_send_IPI_mask(cpumask_t mask, int vector) 125static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
127{ 126{
128 unsigned int cpu; 127 unsigned int cpu;
129 128
130 for_each_possible_cpu(cpu) 129 for_each_cpu(cpu, mask)
131 if (cpu_isset(cpu, mask)) 130 uv_send_IPI_one(cpu, vector);
131}
132
133static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
134{
135 unsigned int cpu;
136 unsigned int this_cpu = smp_processor_id();
137
138 for_each_cpu(cpu, mask)
139 if (cpu != this_cpu)
132 uv_send_IPI_one(cpu, vector); 140 uv_send_IPI_one(cpu, vector);
133} 141}
134 142
135static void uv_send_IPI_allbutself(int vector) 143static void uv_send_IPI_allbutself(int vector)
136{ 144{
137 cpumask_t mask = cpu_online_map; 145 unsigned int cpu;
138 146 unsigned int this_cpu = smp_processor_id();
139 cpu_clear(smp_processor_id(), mask);
140 147
141 if (!cpus_empty(mask)) 148 for_each_online_cpu(cpu)
142 uv_send_IPI_mask(mask, vector); 149 if (cpu != this_cpu)
150 uv_send_IPI_one(cpu, vector);
143} 151}
144 152
145static void uv_send_IPI_all(int vector) 153static void uv_send_IPI_all(int vector)
146{ 154{
147 uv_send_IPI_mask(cpu_online_map, vector); 155 uv_send_IPI_mask(cpu_online_mask, vector);
148} 156}
149 157
150static int uv_apic_id_registered(void) 158static int uv_apic_id_registered(void)
@@ -156,7 +164,7 @@ static void uv_init_apic_ldr(void)
156{ 164{
157} 165}
158 166
159static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 167static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
160{ 168{
161 int cpu; 169 int cpu;
162 170
@@ -164,13 +172,28 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
164 * We're using fixed IRQ delivery, can only return one phys APIC ID. 172 * We're using fixed IRQ delivery, can only return one phys APIC ID.
165 * May as well be the first. 173 * May as well be the first.
166 */ 174 */
167 cpu = first_cpu(cpumask); 175 cpu = cpumask_first(cpumask);
168 if ((unsigned)cpu < nr_cpu_ids) 176 if ((unsigned)cpu < nr_cpu_ids)
169 return per_cpu(x86_cpu_to_apicid, cpu); 177 return per_cpu(x86_cpu_to_apicid, cpu);
170 else 178 else
171 return BAD_APICID; 179 return BAD_APICID;
172} 180}
173 181
182static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
183 const struct cpumask *andmask)
184{
185 int cpu;
186
187 /*
188 * We're using fixed IRQ delivery, can only return one phys APIC ID.
189 * May as well be the first.
190 */
191 cpu = cpumask_any_and(cpumask, andmask);
192 if (cpu < nr_cpu_ids)
193 return per_cpu(x86_cpu_to_apicid, cpu);
194 return BAD_APICID;
195}
196
174static unsigned int get_apic_id(unsigned long x) 197static unsigned int get_apic_id(unsigned long x)
175{ 198{
176 unsigned int id; 199 unsigned int id;
@@ -218,8 +241,10 @@ struct genapic apic_x2apic_uv_x = {
218 .send_IPI_all = uv_send_IPI_all, 241 .send_IPI_all = uv_send_IPI_all,
219 .send_IPI_allbutself = uv_send_IPI_allbutself, 242 .send_IPI_allbutself = uv_send_IPI_allbutself,
220 .send_IPI_mask = uv_send_IPI_mask, 243 .send_IPI_mask = uv_send_IPI_mask,
244 .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
221 .send_IPI_self = uv_send_IPI_self, 245 .send_IPI_self = uv_send_IPI_self,
222 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 246 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
247 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
223 .phys_pkg_id = phys_pkg_id, 248 .phys_pkg_id = phys_pkg_id,
224 .get_apic_id = get_apic_id, 249 .get_apic_id = get_apic_id,
225 .set_apic_id = set_apic_id, 250 .set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 067d8de913f6..e76d7e272974 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -246,7 +246,7 @@ static void hpet_legacy_clockevent_register(void)
246 * Start hpet with the boot cpu mask and make it 246 * Start hpet with the boot cpu mask and make it
247 * global after the IO_APIC has been initialized. 247 * global after the IO_APIC has been initialized.
248 */ 248 */
249 hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); 249 hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
250 clockevents_register_device(&hpet_clockevent); 250 clockevents_register_device(&hpet_clockevent);
251 global_clock_event = &hpet_clockevent; 251 global_clock_event = &hpet_clockevent;
252 printk(KERN_DEBUG "hpet clockevent registered\n"); 252 printk(KERN_DEBUG "hpet clockevent registered\n");
@@ -301,7 +301,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
301 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); 301 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
302 hpet_setup_msi_irq(hdev->irq); 302 hpet_setup_msi_irq(hdev->irq);
303 disable_irq(hdev->irq); 303 disable_irq(hdev->irq);
304 irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); 304 irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
305 enable_irq(hdev->irq); 305 enable_irq(hdev->irq);
306 } 306 }
307 break; 307 break;
@@ -449,7 +449,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
449 return -1; 449 return -1;
450 450
451 disable_irq(dev->irq); 451 disable_irq(dev->irq);
452 irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); 452 irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
453 enable_irq(dev->irq); 453 enable_irq(dev->irq);
454 454
455 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", 455 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@ -500,7 +500,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
500 /* 5 usec minimum reprogramming delta. */ 500 /* 5 usec minimum reprogramming delta. */
501 evt->min_delta_ns = 5000; 501 evt->min_delta_ns = 5000;
502 502
503 evt->cpumask = cpumask_of_cpu(hdev->cpu); 503 evt->cpumask = cpumask_of(hdev->cpu);
504 clockevents_register_device(evt); 504 clockevents_register_device(evt);
505} 505}
506 506
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 1f20608d4ca8..b0f61f0dcd0a 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void)
58 stts(); 58 stts();
59} 59}
60 60
61void __init init_thread_xstate(void) 61void __cpuinit init_thread_xstate(void)
62{ 62{
63 if (!HAVE_HWFP) { 63 if (!HAVE_HWFP) {
64 xstate_size = sizeof(struct i387_soft_struct); 64 xstate_size = sizeof(struct i387_soft_struct);
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c1b5e3ece1f2..10f92fb532f3 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void)
114 * Start pit with the boot cpu mask and make it global after the 114 * Start pit with the boot cpu mask and make it global after the
115 * IO_APIC has been initialized. 115 * IO_APIC has been initialized.
116 */ 116 */
117 pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); 117 pit_clockevent.cpumask = cpumask_of(smp_processor_id());
118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
119 pit_clockevent.shift); 119 pit_clockevent.shift);
120 pit_clockevent.max_delta_ns = 120 pit_clockevent.max_delta_ns =
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 7a3f2028e2eb..6bd51ce3ce32 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -108,93 +108,271 @@ static int __init parse_noapic(char *str)
108early_param("noapic", parse_noapic); 108early_param("noapic", parse_noapic);
109 109
110struct irq_pin_list; 110struct irq_pin_list;
111
112/*
113 * This is performance-critical, we want to do it O(1)
114 *
115 * the indexing order of this array favors 1:1 mappings
116 * between pins and IRQs.
117 */
118
119struct irq_pin_list {
120 int apic, pin;
121 struct irq_pin_list *next;
122};
123
124static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
125{
126 struct irq_pin_list *pin;
127 int node;
128
129 node = cpu_to_node(cpu);
130
131 pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
132 printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node);
133
134 return pin;
135}
136
111struct irq_cfg { 137struct irq_cfg {
112 unsigned int irq;
113 struct irq_pin_list *irq_2_pin; 138 struct irq_pin_list *irq_2_pin;
114 cpumask_t domain; 139 cpumask_var_t domain;
115 cpumask_t old_domain; 140 cpumask_var_t old_domain;
116 unsigned move_cleanup_count; 141 unsigned move_cleanup_count;
117 u8 vector; 142 u8 vector;
118 u8 move_in_progress : 1; 143 u8 move_in_progress : 1;
144#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
145 u8 move_desc_pending : 1;
146#endif
119}; 147};
120 148
121/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 149/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
150#ifdef CONFIG_SPARSE_IRQ
151static struct irq_cfg irq_cfgx[] = {
152#else
122static struct irq_cfg irq_cfgx[NR_IRQS] = { 153static struct irq_cfg irq_cfgx[NR_IRQS] = {
123 [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, 154#endif
124 [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, 155 [0] = { .vector = IRQ0_VECTOR, },
125 [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, 156 [1] = { .vector = IRQ1_VECTOR, },
126 [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, 157 [2] = { .vector = IRQ2_VECTOR, },
127 [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, 158 [3] = { .vector = IRQ3_VECTOR, },
128 [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, 159 [4] = { .vector = IRQ4_VECTOR, },
129 [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, 160 [5] = { .vector = IRQ5_VECTOR, },
130 [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, 161 [6] = { .vector = IRQ6_VECTOR, },
131 [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, 162 [7] = { .vector = IRQ7_VECTOR, },
132 [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, 163 [8] = { .vector = IRQ8_VECTOR, },
133 [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, 164 [9] = { .vector = IRQ9_VECTOR, },
134 [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, 165 [10] = { .vector = IRQ10_VECTOR, },
135 [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, 166 [11] = { .vector = IRQ11_VECTOR, },
136 [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, 167 [12] = { .vector = IRQ12_VECTOR, },
137 [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, 168 [13] = { .vector = IRQ13_VECTOR, },
138 [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, 169 [14] = { .vector = IRQ14_VECTOR, },
170 [15] = { .vector = IRQ15_VECTOR, },
139}; 171};
140 172
141#define for_each_irq_cfg(irq, cfg) \ 173void __init arch_early_irq_init(void)
142 for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) 174{
175 struct irq_cfg *cfg;
176 struct irq_desc *desc;
177 int count;
178 int i;
143 179
180 cfg = irq_cfgx;
181 count = ARRAY_SIZE(irq_cfgx);
182
183 for (i = 0; i < count; i++) {
184 desc = irq_to_desc(i);
185 desc->chip_data = &cfg[i];
186 alloc_bootmem_cpumask_var(&cfg[i].domain);
187 alloc_bootmem_cpumask_var(&cfg[i].old_domain);
188 if (i < NR_IRQS_LEGACY)
189 cpumask_setall(cfg[i].domain);
190 }
191}
192
193#ifdef CONFIG_SPARSE_IRQ
144static struct irq_cfg *irq_cfg(unsigned int irq) 194static struct irq_cfg *irq_cfg(unsigned int irq)
145{ 195{
146 return irq < nr_irqs ? irq_cfgx + irq : NULL; 196 struct irq_cfg *cfg = NULL;
197 struct irq_desc *desc;
198
199 desc = irq_to_desc(irq);
200 if (desc)
201 cfg = desc->chip_data;
202
203 return cfg;
147} 204}
148 205
149static struct irq_cfg *irq_cfg_alloc(unsigned int irq) 206static struct irq_cfg *get_one_free_irq_cfg(int cpu)
150{ 207{
151 return irq_cfg(irq); 208 struct irq_cfg *cfg;
209 int node;
210
211 node = cpu_to_node(cpu);
212
213 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
214 if (cfg) {
215 /* FIXME: needs alloc_cpumask_var_node() */
216 if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
217 kfree(cfg);
218 cfg = NULL;
219 } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) {
220 free_cpumask_var(cfg->domain);
221 kfree(cfg);
222 cfg = NULL;
223 } else {
224 cpumask_clear(cfg->domain);
225 cpumask_clear(cfg->old_domain);
226 }
227 }
228 printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node);
229
230 return cfg;
152} 231}
153 232
154/* 233void arch_init_chip_data(struct irq_desc *desc, int cpu)
155 * Rough estimation of how many shared IRQs there are, can be changed 234{
156 * anytime. 235 struct irq_cfg *cfg;
157 */
158#define MAX_PLUS_SHARED_IRQS NR_IRQS
159#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
160 236
161/* 237 cfg = desc->chip_data;
162 * This is performance-critical, we want to do it O(1) 238 if (!cfg) {
163 * 239 desc->chip_data = get_one_free_irq_cfg(cpu);
164 * the indexing order of this array favors 1:1 mappings 240 if (!desc->chip_data) {
165 * between pins and IRQs. 241 printk(KERN_ERR "can not alloc irq_cfg\n");
166 */ 242 BUG_ON(1);
243 }
244 }
245}
167 246
168struct irq_pin_list { 247#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
169 int apic, pin;
170 struct irq_pin_list *next;
171};
172 248
173static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; 249static void
174static struct irq_pin_list *irq_2_pin_ptr; 250init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
251{
252 struct irq_pin_list *old_entry, *head, *tail, *entry;
253
254 cfg->irq_2_pin = NULL;
255 old_entry = old_cfg->irq_2_pin;
256 if (!old_entry)
257 return;
258
259 entry = get_one_free_irq_2_pin(cpu);
260 if (!entry)
261 return;
175 262
176static void __init irq_2_pin_init(void) 263 entry->apic = old_entry->apic;
264 entry->pin = old_entry->pin;
265 head = entry;
266 tail = entry;
267 old_entry = old_entry->next;
268 while (old_entry) {
269 entry = get_one_free_irq_2_pin(cpu);
270 if (!entry) {
271 entry = head;
272 while (entry) {
273 head = entry->next;
274 kfree(entry);
275 entry = head;
276 }
277 /* still use the old one */
278 return;
279 }
280 entry->apic = old_entry->apic;
281 entry->pin = old_entry->pin;
282 tail->next = entry;
283 tail = entry;
284 old_entry = old_entry->next;
285 }
286
287 tail->next = NULL;
288 cfg->irq_2_pin = head;
289}
290
291static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
177{ 292{
178 struct irq_pin_list *pin = irq_2_pin_head; 293 struct irq_pin_list *entry, *next;
179 int i;
180 294
181 for (i = 1; i < PIN_MAP_SIZE; i++) 295 if (old_cfg->irq_2_pin == cfg->irq_2_pin)
182 pin[i-1].next = &pin[i]; 296 return;
183 297
184 irq_2_pin_ptr = &pin[0]; 298 entry = old_cfg->irq_2_pin;
299
300 while (entry) {
301 next = entry->next;
302 kfree(entry);
303 entry = next;
304 }
305 old_cfg->irq_2_pin = NULL;
185} 306}
186 307
187static struct irq_pin_list *get_one_free_irq_2_pin(void) 308void arch_init_copy_chip_data(struct irq_desc *old_desc,
309 struct irq_desc *desc, int cpu)
188{ 310{
189 struct irq_pin_list *pin = irq_2_pin_ptr; 311 struct irq_cfg *cfg;
312 struct irq_cfg *old_cfg;
190 313
191 if (!pin) 314 cfg = get_one_free_irq_cfg(cpu);
192 panic("can not get more irq_2_pin\n");
193 315
194 irq_2_pin_ptr = pin->next; 316 if (!cfg)
195 pin->next = NULL; 317 return;
196 return pin; 318
319 desc->chip_data = cfg;
320
321 old_cfg = old_desc->chip_data;
322
323 memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
324
325 init_copy_irq_2_pin(old_cfg, cfg, cpu);
326}
327
328static void free_irq_cfg(struct irq_cfg *old_cfg)
329{
330 kfree(old_cfg);
331}
332
333void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
334{
335 struct irq_cfg *old_cfg, *cfg;
336
337 old_cfg = old_desc->chip_data;
338 cfg = desc->chip_data;
339
340 if (old_cfg == cfg)
341 return;
342
343 if (old_cfg) {
344 free_irq_2_pin(old_cfg, cfg);
345 free_irq_cfg(old_cfg);
346 old_desc->chip_data = NULL;
347 }
348}
349
350static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
351{
352 struct irq_cfg *cfg = desc->chip_data;
353
354 if (!cfg->move_in_progress) {
355 /* it means that domain is not changed */
356 if (!cpus_intersects(desc->affinity, mask))
357 cfg->move_desc_pending = 1;
358 }
197} 359}
360#endif
361
362#else
363static struct irq_cfg *irq_cfg(unsigned int irq)
364{
365 return irq < nr_irqs ? irq_cfgx + irq : NULL;
366}
367
368#endif
369
370#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
371static inline void
372set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
373{
374}
375#endif
198 376
199struct io_apic { 377struct io_apic {
200 unsigned int index; 378 unsigned int index;
@@ -237,11 +415,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
237 writel(value, &io_apic->data); 415 writel(value, &io_apic->data);
238} 416}
239 417
240static bool io_apic_level_ack_pending(unsigned int irq) 418static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
241{ 419{
242 struct irq_pin_list *entry; 420 struct irq_pin_list *entry;
243 unsigned long flags; 421 unsigned long flags;
244 struct irq_cfg *cfg = irq_cfg(irq);
245 422
246 spin_lock_irqsave(&ioapic_lock, flags); 423 spin_lock_irqsave(&ioapic_lock, flags);
247 entry = cfg->irq_2_pin; 424 entry = cfg->irq_2_pin;
@@ -323,13 +500,32 @@ static void ioapic_mask_entry(int apic, int pin)
323} 500}
324 501
325#ifdef CONFIG_SMP 502#ifdef CONFIG_SMP
326static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) 503static void send_cleanup_vector(struct irq_cfg *cfg)
504{
505 cpumask_var_t cleanup_mask;
506
507 if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
508 unsigned int i;
509 cfg->move_cleanup_count = 0;
510 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
511 cfg->move_cleanup_count++;
512 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
513 send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
514 } else {
515 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
516 cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
517 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
518 free_cpumask_var(cleanup_mask);
519 }
520 cfg->move_in_progress = 0;
521}
522
523static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
327{ 524{
328 int apic, pin; 525 int apic, pin;
329 struct irq_cfg *cfg;
330 struct irq_pin_list *entry; 526 struct irq_pin_list *entry;
527 u8 vector = cfg->vector;
331 528
332 cfg = irq_cfg(irq);
333 entry = cfg->irq_2_pin; 529 entry = cfg->irq_2_pin;
334 for (;;) { 530 for (;;) {
335 unsigned int reg; 531 unsigned int reg;
@@ -359,36 +555,61 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
359 } 555 }
360} 556}
361 557
362static int assign_irq_vector(int irq, cpumask_t mask); 558static int
559assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
560
561/*
562 * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
563 * of that, or returns BAD_APICID and leaves desc->affinity untouched.
564 */
565static unsigned int
566set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
567{
568 struct irq_cfg *cfg;
569 unsigned int irq;
570
571 if (!cpumask_intersects(mask, cpu_online_mask))
572 return BAD_APICID;
363 573
364static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 574 irq = desc->irq;
575 cfg = desc->chip_data;
576 if (assign_irq_vector(irq, cfg, mask))
577 return BAD_APICID;
578
579 cpumask_and(&desc->affinity, cfg->domain, mask);
580 set_extra_move_desc(desc, mask);
581 return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
582}
583
584static void
585set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
365{ 586{
366 struct irq_cfg *cfg; 587 struct irq_cfg *cfg;
367 unsigned long flags; 588 unsigned long flags;
368 unsigned int dest; 589 unsigned int dest;
369 cpumask_t tmp; 590 unsigned int irq;
370 struct irq_desc *desc;
371 591
372 cpus_and(tmp, mask, cpu_online_map); 592 irq = desc->irq;
373 if (cpus_empty(tmp)) 593 cfg = desc->chip_data;
374 return;
375 594
376 cfg = irq_cfg(irq); 595 spin_lock_irqsave(&ioapic_lock, flags);
377 if (assign_irq_vector(irq, mask)) 596 dest = set_desc_affinity(desc, mask);
378 return; 597 if (dest != BAD_APICID) {
598 /* Only the high 8 bits are valid. */
599 dest = SET_APIC_LOGICAL_ID(dest);
600 __target_IO_APIC_irq(irq, dest, cfg);
601 }
602 spin_unlock_irqrestore(&ioapic_lock, flags);
603}
379 604
380 cpus_and(tmp, cfg->domain, mask); 605static void
381 dest = cpu_mask_to_apicid(tmp); 606set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
382 /* 607{
383 * Only the high 8 bits are valid. 608 struct irq_desc *desc;
384 */
385 dest = SET_APIC_LOGICAL_ID(dest);
386 609
387 desc = irq_to_desc(irq); 610 desc = irq_to_desc(irq);
388 spin_lock_irqsave(&ioapic_lock, flags); 611
389 __target_IO_APIC_irq(irq, dest, cfg->vector); 612 set_ioapic_affinity_irq_desc(desc, mask);
390 desc->affinity = mask;
391 spin_unlock_irqrestore(&ioapic_lock, flags);
392} 613}
393#endif /* CONFIG_SMP */ 614#endif /* CONFIG_SMP */
394 615
@@ -397,16 +618,18 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
397 * shared ISA-space IRQs, so we have to support them. We are super 618 * shared ISA-space IRQs, so we have to support them. We are super
398 * fast in the common case, and fast for shared ISA-space IRQs. 619 * fast in the common case, and fast for shared ISA-space IRQs.
399 */ 620 */
400static void add_pin_to_irq(unsigned int irq, int apic, int pin) 621static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
401{ 622{
402 struct irq_cfg *cfg;
403 struct irq_pin_list *entry; 623 struct irq_pin_list *entry;
404 624
405 /* first time to refer irq_cfg, so with new */
406 cfg = irq_cfg_alloc(irq);
407 entry = cfg->irq_2_pin; 625 entry = cfg->irq_2_pin;
408 if (!entry) { 626 if (!entry) {
409 entry = get_one_free_irq_2_pin(); 627 entry = get_one_free_irq_2_pin(cpu);
628 if (!entry) {
629 printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
630 apic, pin);
631 return;
632 }
410 cfg->irq_2_pin = entry; 633 cfg->irq_2_pin = entry;
411 entry->apic = apic; 634 entry->apic = apic;
412 entry->pin = pin; 635 entry->pin = pin;
@@ -421,7 +644,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
421 entry = entry->next; 644 entry = entry->next;
422 } 645 }
423 646
424 entry->next = get_one_free_irq_2_pin(); 647 entry->next = get_one_free_irq_2_pin(cpu);
425 entry = entry->next; 648 entry = entry->next;
426 entry->apic = apic; 649 entry->apic = apic;
427 entry->pin = pin; 650 entry->pin = pin;
@@ -430,11 +653,10 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
430/* 653/*
431 * Reroute an IRQ to a different pin. 654 * Reroute an IRQ to a different pin.
432 */ 655 */
433static void __init replace_pin_at_irq(unsigned int irq, 656static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
434 int oldapic, int oldpin, 657 int oldapic, int oldpin,
435 int newapic, int newpin) 658 int newapic, int newpin)
436{ 659{
437 struct irq_cfg *cfg = irq_cfg(irq);
438 struct irq_pin_list *entry = cfg->irq_2_pin; 660 struct irq_pin_list *entry = cfg->irq_2_pin;
439 int replaced = 0; 661 int replaced = 0;
440 662
@@ -451,18 +673,16 @@ static void __init replace_pin_at_irq(unsigned int irq,
451 673
452 /* why? call replace before add? */ 674 /* why? call replace before add? */
453 if (!replaced) 675 if (!replaced)
454 add_pin_to_irq(irq, newapic, newpin); 676 add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
455} 677}
456 678
457static inline void io_apic_modify_irq(unsigned int irq, 679static inline void io_apic_modify_irq(struct irq_cfg *cfg,
458 int mask_and, int mask_or, 680 int mask_and, int mask_or,
459 void (*final)(struct irq_pin_list *entry)) 681 void (*final)(struct irq_pin_list *entry))
460{ 682{
461 int pin; 683 int pin;
462 struct irq_cfg *cfg;
463 struct irq_pin_list *entry; 684 struct irq_pin_list *entry;
464 685
465 cfg = irq_cfg(irq);
466 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { 686 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
467 unsigned int reg; 687 unsigned int reg;
468 pin = entry->pin; 688 pin = entry->pin;
@@ -475,9 +695,9 @@ static inline void io_apic_modify_irq(unsigned int irq,
475 } 695 }
476} 696}
477 697
478static void __unmask_IO_APIC_irq(unsigned int irq) 698static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
479{ 699{
480 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); 700 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
481} 701}
482 702
483#ifdef CONFIG_X86_64 703#ifdef CONFIG_X86_64
@@ -492,47 +712,64 @@ void io_apic_sync(struct irq_pin_list *entry)
492 readl(&io_apic->data); 712 readl(&io_apic->data);
493} 713}
494 714
495static void __mask_IO_APIC_irq(unsigned int irq) 715static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
496{ 716{
497 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); 717 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
498} 718}
499#else /* CONFIG_X86_32 */ 719#else /* CONFIG_X86_32 */
500static void __mask_IO_APIC_irq(unsigned int irq) 720static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
501{ 721{
502 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); 722 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
503} 723}
504 724
505static void __mask_and_edge_IO_APIC_irq(unsigned int irq) 725static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
506{ 726{
507 io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, 727 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
508 IO_APIC_REDIR_MASKED, NULL); 728 IO_APIC_REDIR_MASKED, NULL);
509} 729}
510 730
511static void __unmask_and_level_IO_APIC_irq(unsigned int irq) 731static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
512{ 732{
513 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 733 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
514 IO_APIC_REDIR_LEVEL_TRIGGER, NULL); 734 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
515} 735}
516#endif /* CONFIG_X86_32 */ 736#endif /* CONFIG_X86_32 */
517 737
518static void mask_IO_APIC_irq (unsigned int irq) 738static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
519{ 739{
740 struct irq_cfg *cfg = desc->chip_data;
520 unsigned long flags; 741 unsigned long flags;
521 742
743 BUG_ON(!cfg);
744
522 spin_lock_irqsave(&ioapic_lock, flags); 745 spin_lock_irqsave(&ioapic_lock, flags);
523 __mask_IO_APIC_irq(irq); 746 __mask_IO_APIC_irq(cfg);
524 spin_unlock_irqrestore(&ioapic_lock, flags); 747 spin_unlock_irqrestore(&ioapic_lock, flags);
525} 748}
526 749
527static void unmask_IO_APIC_irq (unsigned int irq) 750static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
528{ 751{
752 struct irq_cfg *cfg = desc->chip_data;
529 unsigned long flags; 753 unsigned long flags;
530 754
531 spin_lock_irqsave(&ioapic_lock, flags); 755 spin_lock_irqsave(&ioapic_lock, flags);
532 __unmask_IO_APIC_irq(irq); 756 __unmask_IO_APIC_irq(cfg);
533 spin_unlock_irqrestore(&ioapic_lock, flags); 757 spin_unlock_irqrestore(&ioapic_lock, flags);
534} 758}
535 759
760static void mask_IO_APIC_irq(unsigned int irq)
761{
762 struct irq_desc *desc = irq_to_desc(irq);
763
764 mask_IO_APIC_irq_desc(desc);
765}
766static void unmask_IO_APIC_irq(unsigned int irq)
767{
768 struct irq_desc *desc = irq_to_desc(irq);
769
770 unmask_IO_APIC_irq_desc(desc);
771}
772
536static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 773static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
537{ 774{
538 struct IO_APIC_route_entry entry; 775 struct IO_APIC_route_entry entry;
@@ -809,7 +1046,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
809 */ 1046 */
810static int EISA_ELCR(unsigned int irq) 1047static int EISA_ELCR(unsigned int irq)
811{ 1048{
812 if (irq < 16) { 1049 if (irq < NR_IRQS_LEGACY) {
813 unsigned int port = 0x4d0 + (irq >> 3); 1050 unsigned int port = 0x4d0 + (irq >> 3);
814 return (inb(port) >> (irq & 7)) & 1; 1051 return (inb(port) >> (irq & 7)) & 1;
815 } 1052 }
@@ -1034,7 +1271,8 @@ void unlock_vector_lock(void)
1034 spin_unlock(&vector_lock); 1271 spin_unlock(&vector_lock);
1035} 1272}
1036 1273
1037static int __assign_irq_vector(int irq, cpumask_t mask) 1274static int
1275__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1038{ 1276{
1039 /* 1277 /*
1040 * NOTE! The local APIC isn't very good at handling 1278 * NOTE! The local APIC isn't very good at handling
@@ -1049,39 +1287,39 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
1049 */ 1287 */
1050 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 1288 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
1051 unsigned int old_vector; 1289 unsigned int old_vector;
1052 int cpu; 1290 int cpu, err;
1053 struct irq_cfg *cfg; 1291 cpumask_var_t tmp_mask;
1054
1055 cfg = irq_cfg(irq);
1056
1057 /* Only try and allocate irqs on cpus that are present */
1058 cpus_and(mask, mask, cpu_online_map);
1059 1292
1060 if ((cfg->move_in_progress) || cfg->move_cleanup_count) 1293 if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1061 return -EBUSY; 1294 return -EBUSY;
1062 1295
1296 if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
1297 return -ENOMEM;
1298
1063 old_vector = cfg->vector; 1299 old_vector = cfg->vector;
1064 if (old_vector) { 1300 if (old_vector) {
1065 cpumask_t tmp; 1301 cpumask_and(tmp_mask, mask, cpu_online_mask);
1066 cpus_and(tmp, cfg->domain, mask); 1302 cpumask_and(tmp_mask, cfg->domain, tmp_mask);
1067 if (!cpus_empty(tmp)) 1303 if (!cpumask_empty(tmp_mask)) {
1304 free_cpumask_var(tmp_mask);
1068 return 0; 1305 return 0;
1306 }
1069 } 1307 }
1070 1308
1071 for_each_cpu_mask_nr(cpu, mask) { 1309 /* Only try and allocate irqs on cpus that are present */
1072 cpumask_t domain, new_mask; 1310 err = -ENOSPC;
1311 for_each_cpu_and(cpu, mask, cpu_online_mask) {
1073 int new_cpu; 1312 int new_cpu;
1074 int vector, offset; 1313 int vector, offset;
1075 1314
1076 domain = vector_allocation_domain(cpu); 1315 vector_allocation_domain(cpu, tmp_mask);
1077 cpus_and(new_mask, domain, cpu_online_map);
1078 1316
1079 vector = current_vector; 1317 vector = current_vector;
1080 offset = current_offset; 1318 offset = current_offset;
1081next: 1319next:
1082 vector += 8; 1320 vector += 8;
1083 if (vector >= first_system_vector) { 1321 if (vector >= first_system_vector) {
1084 /* If we run out of vectors on large boxen, must share them. */ 1322 /* If out of vectors on large boxen, must share them. */
1085 offset = (offset + 1) % 8; 1323 offset = (offset + 1) % 8;
1086 vector = FIRST_DEVICE_VECTOR + offset; 1324 vector = FIRST_DEVICE_VECTOR + offset;
1087 } 1325 }
@@ -1094,7 +1332,7 @@ next:
1094 if (vector == SYSCALL_VECTOR) 1332 if (vector == SYSCALL_VECTOR)
1095 goto next; 1333 goto next;
1096#endif 1334#endif
1097 for_each_cpu_mask_nr(new_cpu, new_mask) 1335 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1098 if (per_cpu(vector_irq, new_cpu)[vector] != -1) 1336 if (per_cpu(vector_irq, new_cpu)[vector] != -1)
1099 goto next; 1337 goto next;
1100 /* Found one! */ 1338 /* Found one! */
@@ -1102,44 +1340,56 @@ next:
1102 current_offset = offset; 1340 current_offset = offset;
1103 if (old_vector) { 1341 if (old_vector) {
1104 cfg->move_in_progress = 1; 1342 cfg->move_in_progress = 1;
1105 cfg->old_domain = cfg->domain; 1343 cpumask_copy(cfg->old_domain, cfg->domain);
1106 } 1344 }
1107 for_each_cpu_mask_nr(new_cpu, new_mask) 1345 for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1108 per_cpu(vector_irq, new_cpu)[vector] = irq; 1346 per_cpu(vector_irq, new_cpu)[vector] = irq;
1109 cfg->vector = vector; 1347 cfg->vector = vector;
1110 cfg->domain = domain; 1348 cpumask_copy(cfg->domain, tmp_mask);
1111 return 0; 1349 err = 0;
1350 break;
1112 } 1351 }
1113 return -ENOSPC; 1352 free_cpumask_var(tmp_mask);
1353 return err;
1114} 1354}
1115 1355
1116static int assign_irq_vector(int irq, cpumask_t mask) 1356static int
1357assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1117{ 1358{
1118 int err; 1359 int err;
1119 unsigned long flags; 1360 unsigned long flags;
1120 1361
1121 spin_lock_irqsave(&vector_lock, flags); 1362 spin_lock_irqsave(&vector_lock, flags);
1122 err = __assign_irq_vector(irq, mask); 1363 err = __assign_irq_vector(irq, cfg, mask);
1123 spin_unlock_irqrestore(&vector_lock, flags); 1364 spin_unlock_irqrestore(&vector_lock, flags);
1124 return err; 1365 return err;
1125} 1366}
1126 1367
1127static void __clear_irq_vector(int irq) 1368static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1128{ 1369{
1129 struct irq_cfg *cfg;
1130 cpumask_t mask;
1131 int cpu, vector; 1370 int cpu, vector;
1132 1371
1133 cfg = irq_cfg(irq);
1134 BUG_ON(!cfg->vector); 1372 BUG_ON(!cfg->vector);
1135 1373
1136 vector = cfg->vector; 1374 vector = cfg->vector;
1137 cpus_and(mask, cfg->domain, cpu_online_map); 1375 for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
1138 for_each_cpu_mask_nr(cpu, mask)
1139 per_cpu(vector_irq, cpu)[vector] = -1; 1376 per_cpu(vector_irq, cpu)[vector] = -1;
1140 1377
1141 cfg->vector = 0; 1378 cfg->vector = 0;
1142 cpus_clear(cfg->domain); 1379 cpumask_clear(cfg->domain);
1380
1381 if (likely(!cfg->move_in_progress))
1382 return;
1383 for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
1384 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1385 vector++) {
1386 if (per_cpu(vector_irq, cpu)[vector] != irq)
1387 continue;
1388 per_cpu(vector_irq, cpu)[vector] = -1;
1389 break;
1390 }
1391 }
1392 cfg->move_in_progress = 0;
1143} 1393}
1144 1394
1145void __setup_vector_irq(int cpu) 1395void __setup_vector_irq(int cpu)
@@ -1148,10 +1398,14 @@ void __setup_vector_irq(int cpu)
1148 /* This function must be called with vector_lock held */ 1398 /* This function must be called with vector_lock held */
1149 int irq, vector; 1399 int irq, vector;
1150 struct irq_cfg *cfg; 1400 struct irq_cfg *cfg;
1401 struct irq_desc *desc;
1151 1402
1152 /* Mark the inuse vectors */ 1403 /* Mark the inuse vectors */
1153 for_each_irq_cfg(irq, cfg) { 1404 for_each_irq_desc(irq, desc) {
1154 if (!cpu_isset(cpu, cfg->domain)) 1405 if (!desc)
1406 continue;
1407 cfg = desc->chip_data;
1408 if (!cpumask_test_cpu(cpu, cfg->domain))
1155 continue; 1409 continue;
1156 vector = cfg->vector; 1410 vector = cfg->vector;
1157 per_cpu(vector_irq, cpu)[vector] = irq; 1411 per_cpu(vector_irq, cpu)[vector] = irq;
@@ -1163,7 +1417,7 @@ void __setup_vector_irq(int cpu)
1163 continue; 1417 continue;
1164 1418
1165 cfg = irq_cfg(irq); 1419 cfg = irq_cfg(irq);
1166 if (!cpu_isset(cpu, cfg->domain)) 1420 if (!cpumask_test_cpu(cpu, cfg->domain))
1167 per_cpu(vector_irq, cpu)[vector] = -1; 1421 per_cpu(vector_irq, cpu)[vector] = -1;
1168 } 1422 }
1169} 1423}
@@ -1201,11 +1455,8 @@ static inline int IO_APIC_irq_trigger(int irq)
1201} 1455}
1202#endif 1456#endif
1203 1457
1204static void ioapic_register_intr(int irq, unsigned long trigger) 1458static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
1205{ 1459{
1206 struct irq_desc *desc;
1207
1208 desc = irq_to_desc(irq);
1209 1460
1210 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1461 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1211 trigger == IOAPIC_LEVEL) 1462 trigger == IOAPIC_LEVEL)
@@ -1297,23 +1548,22 @@ static int setup_ioapic_entry(int apic, int irq,
1297 return 0; 1548 return 0;
1298} 1549}
1299 1550
1300static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1551static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
1301 int trigger, int polarity) 1552 int trigger, int polarity)
1302{ 1553{
1303 struct irq_cfg *cfg; 1554 struct irq_cfg *cfg;
1304 struct IO_APIC_route_entry entry; 1555 struct IO_APIC_route_entry entry;
1305 cpumask_t mask; 1556 unsigned int dest;
1306 1557
1307 if (!IO_APIC_IRQ(irq)) 1558 if (!IO_APIC_IRQ(irq))
1308 return; 1559 return;
1309 1560
1310 cfg = irq_cfg(irq); 1561 cfg = desc->chip_data;
1311 1562
1312 mask = TARGET_CPUS; 1563 if (assign_irq_vector(irq, cfg, TARGET_CPUS))
1313 if (assign_irq_vector(irq, mask))
1314 return; 1564 return;
1315 1565
1316 cpus_and(mask, cfg->domain, mask); 1566 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
1317 1567
1318 apic_printk(APIC_VERBOSE,KERN_DEBUG 1568 apic_printk(APIC_VERBOSE,KERN_DEBUG
1319 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1569 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1323,16 +1573,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1323 1573
1324 1574
1325 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, 1575 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
1326 cpu_mask_to_apicid(mask), trigger, polarity, 1576 dest, trigger, polarity, cfg->vector)) {
1327 cfg->vector)) {
1328 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1577 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1329 mp_ioapics[apic].mp_apicid, pin); 1578 mp_ioapics[apic].mp_apicid, pin);
1330 __clear_irq_vector(irq); 1579 __clear_irq_vector(irq, cfg);
1331 return; 1580 return;
1332 } 1581 }
1333 1582
1334 ioapic_register_intr(irq, trigger); 1583 ioapic_register_intr(irq, desc, trigger);
1335 if (irq < 16) 1584 if (irq < NR_IRQS_LEGACY)
1336 disable_8259A_irq(irq); 1585 disable_8259A_irq(irq);
1337 1586
1338 ioapic_write_entry(apic, pin, entry); 1587 ioapic_write_entry(apic, pin, entry);
@@ -1342,6 +1591,9 @@ static void __init setup_IO_APIC_irqs(void)
1342{ 1591{
1343 int apic, pin, idx, irq; 1592 int apic, pin, idx, irq;
1344 int notcon = 0; 1593 int notcon = 0;
1594 struct irq_desc *desc;
1595 struct irq_cfg *cfg;
1596 int cpu = boot_cpu_id;
1345 1597
1346 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1598 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1347 1599
@@ -1373,9 +1625,15 @@ static void __init setup_IO_APIC_irqs(void)
1373 if (multi_timer_check(apic, irq)) 1625 if (multi_timer_check(apic, irq))
1374 continue; 1626 continue;
1375#endif 1627#endif
1376 add_pin_to_irq(irq, apic, pin); 1628 desc = irq_to_desc_alloc_cpu(irq, cpu);
1629 if (!desc) {
1630 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1631 continue;
1632 }
1633 cfg = desc->chip_data;
1634 add_pin_to_irq_cpu(cfg, cpu, apic, pin);
1377 1635
1378 setup_IO_APIC_irq(apic, pin, irq, 1636 setup_IO_APIC_irq(apic, pin, irq, desc,
1379 irq_trigger(idx), irq_polarity(idx)); 1637 irq_trigger(idx), irq_polarity(idx));
1380 } 1638 }
1381 } 1639 }
@@ -1434,6 +1692,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1434 union IO_APIC_reg_03 reg_03; 1692 union IO_APIC_reg_03 reg_03;
1435 unsigned long flags; 1693 unsigned long flags;
1436 struct irq_cfg *cfg; 1694 struct irq_cfg *cfg;
1695 struct irq_desc *desc;
1437 unsigned int irq; 1696 unsigned int irq;
1438 1697
1439 if (apic_verbosity == APIC_QUIET) 1698 if (apic_verbosity == APIC_QUIET)
@@ -1523,8 +1782,13 @@ __apicdebuginit(void) print_IO_APIC(void)
1523 } 1782 }
1524 } 1783 }
1525 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1784 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1526 for_each_irq_cfg(irq, cfg) { 1785 for_each_irq_desc(irq, desc) {
1527 struct irq_pin_list *entry = cfg->irq_2_pin; 1786 struct irq_pin_list *entry;
1787
1788 if (!desc)
1789 continue;
1790 cfg = desc->chip_data;
1791 entry = cfg->irq_2_pin;
1528 if (!entry) 1792 if (!entry)
1529 continue; 1793 continue;
1530 printk(KERN_DEBUG "IRQ%d ", irq); 1794 printk(KERN_DEBUG "IRQ%d ", irq);
@@ -2008,14 +2272,16 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2008{ 2272{
2009 int was_pending = 0; 2273 int was_pending = 0;
2010 unsigned long flags; 2274 unsigned long flags;
2275 struct irq_cfg *cfg;
2011 2276
2012 spin_lock_irqsave(&ioapic_lock, flags); 2277 spin_lock_irqsave(&ioapic_lock, flags);
2013 if (irq < 16) { 2278 if (irq < NR_IRQS_LEGACY) {
2014 disable_8259A_irq(irq); 2279 disable_8259A_irq(irq);
2015 if (i8259A_irq_pending(irq)) 2280 if (i8259A_irq_pending(irq))
2016 was_pending = 1; 2281 was_pending = 1;
2017 } 2282 }
2018 __unmask_IO_APIC_irq(irq); 2283 cfg = irq_cfg(irq);
2284 __unmask_IO_APIC_irq(cfg);
2019 spin_unlock_irqrestore(&ioapic_lock, flags); 2285 spin_unlock_irqrestore(&ioapic_lock, flags);
2020 2286
2021 return was_pending; 2287 return was_pending;
@@ -2029,7 +2295,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
2029 unsigned long flags; 2295 unsigned long flags;
2030 2296
2031 spin_lock_irqsave(&vector_lock, flags); 2297 spin_lock_irqsave(&vector_lock, flags);
2032 send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); 2298 send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2033 spin_unlock_irqrestore(&vector_lock, flags); 2299 spin_unlock_irqrestore(&vector_lock, flags);
2034 2300
2035 return 1; 2301 return 1;
@@ -2078,35 +2344,35 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
2078 * as simple as edge triggered migration and we can do the irq migration 2344 * as simple as edge triggered migration and we can do the irq migration
2079 * with a simple atomic update to IO-APIC RTE. 2345 * with a simple atomic update to IO-APIC RTE.
2080 */ 2346 */
2081static void migrate_ioapic_irq(int irq, cpumask_t mask) 2347static void
2348migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2082{ 2349{
2083 struct irq_cfg *cfg; 2350 struct irq_cfg *cfg;
2084 struct irq_desc *desc;
2085 cpumask_t tmp, cleanup_mask;
2086 struct irte irte; 2351 struct irte irte;
2087 int modify_ioapic_rte; 2352 int modify_ioapic_rte;
2088 unsigned int dest; 2353 unsigned int dest;
2089 unsigned long flags; 2354 unsigned long flags;
2355 unsigned int irq;
2090 2356
2091 cpus_and(tmp, mask, cpu_online_map); 2357 if (!cpumask_intersects(mask, cpu_online_mask))
2092 if (cpus_empty(tmp))
2093 return; 2358 return;
2094 2359
2360 irq = desc->irq;
2095 if (get_irte(irq, &irte)) 2361 if (get_irte(irq, &irte))
2096 return; 2362 return;
2097 2363
2098 if (assign_irq_vector(irq, mask)) 2364 cfg = desc->chip_data;
2365 if (assign_irq_vector(irq, cfg, mask))
2099 return; 2366 return;
2100 2367
2101 cfg = irq_cfg(irq); 2368 set_extra_move_desc(desc, mask);
2102 cpus_and(tmp, cfg->domain, mask); 2369
2103 dest = cpu_mask_to_apicid(tmp); 2370 dest = cpu_mask_to_apicid_and(cfg->domain, mask);
2104 2371
2105 desc = irq_to_desc(irq);
2106 modify_ioapic_rte = desc->status & IRQ_LEVEL; 2372 modify_ioapic_rte = desc->status & IRQ_LEVEL;
2107 if (modify_ioapic_rte) { 2373 if (modify_ioapic_rte) {
2108 spin_lock_irqsave(&ioapic_lock, flags); 2374 spin_lock_irqsave(&ioapic_lock, flags);
2109 __target_IO_APIC_irq(irq, dest, cfg->vector); 2375 __target_IO_APIC_irq(irq, dest, cfg);
2110 spin_unlock_irqrestore(&ioapic_lock, flags); 2376 spin_unlock_irqrestore(&ioapic_lock, flags);
2111 } 2377 }
2112 2378
@@ -2118,24 +2384,20 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
2118 */ 2384 */
2119 modify_irte(irq, &irte); 2385 modify_irte(irq, &irte);
2120 2386
2121 if (cfg->move_in_progress) { 2387 if (cfg->move_in_progress)
2122 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 2388 send_cleanup_vector(cfg);
2123 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2124 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2125 cfg->move_in_progress = 0;
2126 }
2127 2389
2128 desc->affinity = mask; 2390 cpumask_copy(&desc->affinity, mask);
2129} 2391}
2130 2392
2131static int migrate_irq_remapped_level(int irq) 2393static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2132{ 2394{
2133 int ret = -1; 2395 int ret = -1;
2134 struct irq_desc *desc = irq_to_desc(irq); 2396 struct irq_cfg *cfg = desc->chip_data;
2135 2397
2136 mask_IO_APIC_irq(irq); 2398 mask_IO_APIC_irq_desc(desc);
2137 2399
2138 if (io_apic_level_ack_pending(irq)) { 2400 if (io_apic_level_ack_pending(cfg)) {
2139 /* 2401 /*
2140 * Interrupt in progress. Migrating irq now will change the 2402 * Interrupt in progress. Migrating irq now will change the
2141 * vector information in the IO-APIC RTE and that will confuse 2403 * vector information in the IO-APIC RTE and that will confuse
@@ -2147,14 +2409,15 @@ static int migrate_irq_remapped_level(int irq)
2147 } 2409 }
2148 2410
2149 /* everthing is clear. we have right of way */ 2411 /* everthing is clear. we have right of way */
2150 migrate_ioapic_irq(irq, desc->pending_mask); 2412 migrate_ioapic_irq_desc(desc, &desc->pending_mask);
2151 2413
2152 ret = 0; 2414 ret = 0;
2153 desc->status &= ~IRQ_MOVE_PENDING; 2415 desc->status &= ~IRQ_MOVE_PENDING;
2154 cpus_clear(desc->pending_mask); 2416 cpumask_clear(&desc->pending_mask);
2155 2417
2156unmask: 2418unmask:
2157 unmask_IO_APIC_irq(irq); 2419 unmask_IO_APIC_irq_desc(desc);
2420
2158 return ret; 2421 return ret;
2159} 2422}
2160 2423
@@ -2164,6 +2427,9 @@ static void ir_irq_migration(struct work_struct *work)
2164 struct irq_desc *desc; 2427 struct irq_desc *desc;
2165 2428
2166 for_each_irq_desc(irq, desc) { 2429 for_each_irq_desc(irq, desc) {
2430 if (!desc)
2431 continue;
2432
2167 if (desc->status & IRQ_MOVE_PENDING) { 2433 if (desc->status & IRQ_MOVE_PENDING) {
2168 unsigned long flags; 2434 unsigned long flags;
2169 2435
@@ -2175,7 +2441,7 @@ static void ir_irq_migration(struct work_struct *work)
2175 continue; 2441 continue;
2176 } 2442 }
2177 2443
2178 desc->chip->set_affinity(irq, desc->pending_mask); 2444 desc->chip->set_affinity(irq, &desc->pending_mask);
2179 spin_unlock_irqrestore(&desc->lock, flags); 2445 spin_unlock_irqrestore(&desc->lock, flags);
2180 } 2446 }
2181 } 2447 }
@@ -2184,18 +2450,24 @@ static void ir_irq_migration(struct work_struct *work)
2184/* 2450/*
2185 * Migrates the IRQ destination in the process context. 2451 * Migrates the IRQ destination in the process context.
2186 */ 2452 */
2187static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 2453static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2454 const struct cpumask *mask)
2188{ 2455{
2189 struct irq_desc *desc = irq_to_desc(irq);
2190
2191 if (desc->status & IRQ_LEVEL) { 2456 if (desc->status & IRQ_LEVEL) {
2192 desc->status |= IRQ_MOVE_PENDING; 2457 desc->status |= IRQ_MOVE_PENDING;
2193 desc->pending_mask = mask; 2458 cpumask_copy(&desc->pending_mask, mask);
2194 migrate_irq_remapped_level(irq); 2459 migrate_irq_remapped_level_desc(desc);
2195 return; 2460 return;
2196 } 2461 }
2197 2462
2198 migrate_ioapic_irq(irq, mask); 2463 migrate_ioapic_irq_desc(desc, mask);
2464}
2465static void set_ir_ioapic_affinity_irq(unsigned int irq,
2466 const struct cpumask *mask)
2467{
2468 struct irq_desc *desc = irq_to_desc(irq);
2469
2470 set_ir_ioapic_affinity_irq_desc(desc, mask);
2199} 2471}
2200#endif 2472#endif
2201 2473
@@ -2215,6 +2487,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2215 struct irq_cfg *cfg; 2487 struct irq_cfg *cfg;
2216 irq = __get_cpu_var(vector_irq)[vector]; 2488 irq = __get_cpu_var(vector_irq)[vector];
2217 2489
2490 if (irq == -1)
2491 continue;
2492
2218 desc = irq_to_desc(irq); 2493 desc = irq_to_desc(irq);
2219 if (!desc) 2494 if (!desc)
2220 continue; 2495 continue;
@@ -2224,7 +2499,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2224 if (!cfg->move_cleanup_count) 2499 if (!cfg->move_cleanup_count)
2225 goto unlock; 2500 goto unlock;
2226 2501
2227 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) 2502 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2228 goto unlock; 2503 goto unlock;
2229 2504
2230 __get_cpu_var(vector_irq)[vector] = -1; 2505 __get_cpu_var(vector_irq)[vector] = -1;
@@ -2236,28 +2511,44 @@ unlock:
2236 irq_exit(); 2511 irq_exit();
2237} 2512}
2238 2513
2239static void irq_complete_move(unsigned int irq) 2514static void irq_complete_move(struct irq_desc **descp)
2240{ 2515{
2241 struct irq_cfg *cfg = irq_cfg(irq); 2516 struct irq_desc *desc = *descp;
2517 struct irq_cfg *cfg = desc->chip_data;
2242 unsigned vector, me; 2518 unsigned vector, me;
2243 2519
2244 if (likely(!cfg->move_in_progress)) 2520 if (likely(!cfg->move_in_progress)) {
2521#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
2522 if (likely(!cfg->move_desc_pending))
2523 return;
2524
2525 /* domain is not change, but affinity is changed */
2526 me = smp_processor_id();
2527 if (cpu_isset(me, desc->affinity)) {
2528 *descp = desc = move_irq_desc(desc, me);
2529 /* get the new one */
2530 cfg = desc->chip_data;
2531 cfg->move_desc_pending = 0;
2532 }
2533#endif
2245 return; 2534 return;
2535 }
2246 2536
2247 vector = ~get_irq_regs()->orig_ax; 2537 vector = ~get_irq_regs()->orig_ax;
2248 me = smp_processor_id(); 2538 me = smp_processor_id();
2249 if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { 2539#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
2250 cpumask_t cleanup_mask; 2540 *descp = desc = move_irq_desc(desc, me);
2541 /* get the new one */
2542 cfg = desc->chip_data;
2543#endif
2251 2544
2252 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 2545 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2253 cfg->move_cleanup_count = cpus_weight(cleanup_mask); 2546 send_cleanup_vector(cfg);
2254 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2255 cfg->move_in_progress = 0;
2256 }
2257} 2547}
2258#else 2548#else
2259static inline void irq_complete_move(unsigned int irq) {} 2549static inline void irq_complete_move(struct irq_desc **descp) {}
2260#endif 2550#endif
2551
2261#ifdef CONFIG_INTR_REMAP 2552#ifdef CONFIG_INTR_REMAP
2262static void ack_x2apic_level(unsigned int irq) 2553static void ack_x2apic_level(unsigned int irq)
2263{ 2554{
@@ -2268,11 +2559,14 @@ static void ack_x2apic_edge(unsigned int irq)
2268{ 2559{
2269 ack_x2APIC_irq(); 2560 ack_x2APIC_irq();
2270} 2561}
2562
2271#endif 2563#endif
2272 2564
2273static void ack_apic_edge(unsigned int irq) 2565static void ack_apic_edge(unsigned int irq)
2274{ 2566{
2275 irq_complete_move(irq); 2567 struct irq_desc *desc = irq_to_desc(irq);
2568
2569 irq_complete_move(&desc);
2276 move_native_irq(irq); 2570 move_native_irq(irq);
2277 ack_APIC_irq(); 2571 ack_APIC_irq();
2278} 2572}
@@ -2281,18 +2575,21 @@ atomic_t irq_mis_count;
2281 2575
2282static void ack_apic_level(unsigned int irq) 2576static void ack_apic_level(unsigned int irq)
2283{ 2577{
2578 struct irq_desc *desc = irq_to_desc(irq);
2579
2284#ifdef CONFIG_X86_32 2580#ifdef CONFIG_X86_32
2285 unsigned long v; 2581 unsigned long v;
2286 int i; 2582 int i;
2287#endif 2583#endif
2584 struct irq_cfg *cfg;
2288 int do_unmask_irq = 0; 2585 int do_unmask_irq = 0;
2289 2586
2290 irq_complete_move(irq); 2587 irq_complete_move(&desc);
2291#ifdef CONFIG_GENERIC_PENDING_IRQ 2588#ifdef CONFIG_GENERIC_PENDING_IRQ
2292 /* If we are moving the irq we need to mask it */ 2589 /* If we are moving the irq we need to mask it */
2293 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { 2590 if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
2294 do_unmask_irq = 1; 2591 do_unmask_irq = 1;
2295 mask_IO_APIC_irq(irq); 2592 mask_IO_APIC_irq_desc(desc);
2296 } 2593 }
2297#endif 2594#endif
2298 2595
@@ -2316,7 +2613,8 @@ static void ack_apic_level(unsigned int irq)
2316 * operation to prevent an edge-triggered interrupt escaping meanwhile. 2613 * operation to prevent an edge-triggered interrupt escaping meanwhile.
2317 * The idea is from Manfred Spraul. --macro 2614 * The idea is from Manfred Spraul. --macro
2318 */ 2615 */
2319 i = irq_cfg(irq)->vector; 2616 cfg = desc->chip_data;
2617 i = cfg->vector;
2320 2618
2321 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); 2619 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2322#endif 2620#endif
@@ -2355,17 +2653,18 @@ static void ack_apic_level(unsigned int irq)
2355 * accurate and is causing problems then it is a hardware bug 2653 * accurate and is causing problems then it is a hardware bug
2356 * and you can go talk to the chipset vendor about it. 2654 * and you can go talk to the chipset vendor about it.
2357 */ 2655 */
2358 if (!io_apic_level_ack_pending(irq)) 2656 cfg = desc->chip_data;
2657 if (!io_apic_level_ack_pending(cfg))
2359 move_masked_irq(irq); 2658 move_masked_irq(irq);
2360 unmask_IO_APIC_irq(irq); 2659 unmask_IO_APIC_irq_desc(desc);
2361 } 2660 }
2362 2661
2363#ifdef CONFIG_X86_32 2662#ifdef CONFIG_X86_32
2364 if (!(v & (1 << (i & 0x1f)))) { 2663 if (!(v & (1 << (i & 0x1f)))) {
2365 atomic_inc(&irq_mis_count); 2664 atomic_inc(&irq_mis_count);
2366 spin_lock(&ioapic_lock); 2665 spin_lock(&ioapic_lock);
2367 __mask_and_edge_IO_APIC_irq(irq); 2666 __mask_and_edge_IO_APIC_irq(cfg);
2368 __unmask_and_level_IO_APIC_irq(irq); 2667 __unmask_and_level_IO_APIC_irq(cfg);
2369 spin_unlock(&ioapic_lock); 2668 spin_unlock(&ioapic_lock);
2370 } 2669 }
2371#endif 2670#endif
@@ -2416,20 +2715,22 @@ static inline void init_IO_APIC_traps(void)
2416 * Also, we've got to be careful not to trash gate 2715 * Also, we've got to be careful not to trash gate
2417 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2716 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2418 */ 2717 */
2419 for_each_irq_cfg(irq, cfg) { 2718 for_each_irq_desc(irq, desc) {
2420 if (IO_APIC_IRQ(irq) && !cfg->vector) { 2719 if (!desc)
2720 continue;
2721
2722 cfg = desc->chip_data;
2723 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2421 /* 2724 /*
2422 * Hmm.. We don't have an entry for this, 2725 * Hmm.. We don't have an entry for this,
2423 * so default to an old-fashioned 8259 2726 * so default to an old-fashioned 8259
2424 * interrupt if we can.. 2727 * interrupt if we can..
2425 */ 2728 */
2426 if (irq < 16) 2729 if (irq < NR_IRQS_LEGACY)
2427 make_8259A_irq(irq); 2730 make_8259A_irq(irq);
2428 else { 2731 else
2429 desc = irq_to_desc(irq);
2430 /* Strange. Oh, well.. */ 2732 /* Strange. Oh, well.. */
2431 desc->chip = &no_irq_chip; 2733 desc->chip = &no_irq_chip;
2432 }
2433 } 2734 }
2434 } 2735 }
2435} 2736}
@@ -2454,7 +2755,7 @@ static void unmask_lapic_irq(unsigned int irq)
2454 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 2755 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2455} 2756}
2456 2757
2457static void ack_lapic_irq (unsigned int irq) 2758static void ack_lapic_irq(unsigned int irq)
2458{ 2759{
2459 ack_APIC_irq(); 2760 ack_APIC_irq();
2460} 2761}
@@ -2466,11 +2767,8 @@ static struct irq_chip lapic_chip __read_mostly = {
2466 .ack = ack_lapic_irq, 2767 .ack = ack_lapic_irq,
2467}; 2768};
2468 2769
2469static void lapic_register_intr(int irq) 2770static void lapic_register_intr(int irq, struct irq_desc *desc)
2470{ 2771{
2471 struct irq_desc *desc;
2472
2473 desc = irq_to_desc(irq);
2474 desc->status &= ~IRQ_LEVEL; 2772 desc->status &= ~IRQ_LEVEL;
2475 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2773 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2476 "edge"); 2774 "edge");
@@ -2574,7 +2872,9 @@ int timer_through_8259 __initdata;
2574 */ 2872 */
2575static inline void __init check_timer(void) 2873static inline void __init check_timer(void)
2576{ 2874{
2577 struct irq_cfg *cfg = irq_cfg(0); 2875 struct irq_desc *desc = irq_to_desc(0);
2876 struct irq_cfg *cfg = desc->chip_data;
2877 int cpu = boot_cpu_id;
2578 int apic1, pin1, apic2, pin2; 2878 int apic1, pin1, apic2, pin2;
2579 unsigned long flags; 2879 unsigned long flags;
2580 unsigned int ver; 2880 unsigned int ver;
@@ -2589,7 +2889,7 @@ static inline void __init check_timer(void)
2589 * get/set the timer IRQ vector: 2889 * get/set the timer IRQ vector:
2590 */ 2890 */
2591 disable_8259A_irq(0); 2891 disable_8259A_irq(0);
2592 assign_irq_vector(0, TARGET_CPUS); 2892 assign_irq_vector(0, cfg, TARGET_CPUS);
2593 2893
2594 /* 2894 /*
2595 * As IRQ0 is to be enabled in the 8259A, the virtual 2895 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2640,10 +2940,10 @@ static inline void __init check_timer(void)
2640 * Ok, does IRQ0 through the IOAPIC work? 2940 * Ok, does IRQ0 through the IOAPIC work?
2641 */ 2941 */
2642 if (no_pin1) { 2942 if (no_pin1) {
2643 add_pin_to_irq(0, apic1, pin1); 2943 add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
2644 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2944 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2645 } 2945 }
2646 unmask_IO_APIC_irq(0); 2946 unmask_IO_APIC_irq_desc(desc);
2647 if (timer_irq_works()) { 2947 if (timer_irq_works()) {
2648 if (nmi_watchdog == NMI_IO_APIC) { 2948 if (nmi_watchdog == NMI_IO_APIC) {
2649 setup_nmi(); 2949 setup_nmi();
@@ -2669,9 +2969,9 @@ static inline void __init check_timer(void)
2669 /* 2969 /*
2670 * legacy devices should be connected to IO APIC #0 2970 * legacy devices should be connected to IO APIC #0
2671 */ 2971 */
2672 replace_pin_at_irq(0, apic1, pin1, apic2, pin2); 2972 replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
2673 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 2973 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2674 unmask_IO_APIC_irq(0); 2974 unmask_IO_APIC_irq_desc(desc);
2675 enable_8259A_irq(0); 2975 enable_8259A_irq(0);
2676 if (timer_irq_works()) { 2976 if (timer_irq_works()) {
2677 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2977 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2703,7 +3003,7 @@ static inline void __init check_timer(void)
2703 apic_printk(APIC_QUIET, KERN_INFO 3003 apic_printk(APIC_QUIET, KERN_INFO
2704 "...trying to set up timer as Virtual Wire IRQ...\n"); 3004 "...trying to set up timer as Virtual Wire IRQ...\n");
2705 3005
2706 lapic_register_intr(0); 3006 lapic_register_intr(0, desc);
2707 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 3007 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
2708 enable_8259A_irq(0); 3008 enable_8259A_irq(0);
2709 3009
@@ -2888,22 +3188,26 @@ unsigned int create_irq_nr(unsigned int irq_want)
2888 unsigned int irq; 3188 unsigned int irq;
2889 unsigned int new; 3189 unsigned int new;
2890 unsigned long flags; 3190 unsigned long flags;
2891 struct irq_cfg *cfg_new; 3191 struct irq_cfg *cfg_new = NULL;
2892 3192 int cpu = boot_cpu_id;
2893 irq_want = nr_irqs - 1; 3193 struct irq_desc *desc_new = NULL;
2894 3194
2895 irq = 0; 3195 irq = 0;
2896 spin_lock_irqsave(&vector_lock, flags); 3196 spin_lock_irqsave(&vector_lock, flags);
2897 for (new = irq_want; new > 0; new--) { 3197 for (new = irq_want; new < NR_IRQS; new++) {
2898 if (platform_legacy_irq(new)) 3198 if (platform_legacy_irq(new))
2899 continue; 3199 continue;
2900 cfg_new = irq_cfg(new); 3200
2901 if (cfg_new && cfg_new->vector != 0) 3201 desc_new = irq_to_desc_alloc_cpu(new, cpu);
3202 if (!desc_new) {
3203 printk(KERN_INFO "can not get irq_desc for %d\n", new);
3204 continue;
3205 }
3206 cfg_new = desc_new->chip_data;
3207
3208 if (cfg_new->vector != 0)
2902 continue; 3209 continue;
2903 /* check if need to create one */ 3210 if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
2904 if (!cfg_new)
2905 cfg_new = irq_cfg_alloc(new);
2906 if (__assign_irq_vector(new, TARGET_CPUS) == 0)
2907 irq = new; 3211 irq = new;
2908 break; 3212 break;
2909 } 3213 }
@@ -2911,15 +3215,21 @@ unsigned int create_irq_nr(unsigned int irq_want)
2911 3215
2912 if (irq > 0) { 3216 if (irq > 0) {
2913 dynamic_irq_init(irq); 3217 dynamic_irq_init(irq);
3218 /* restore it, in case dynamic_irq_init clear it */
3219 if (desc_new)
3220 desc_new->chip_data = cfg_new;
2914 } 3221 }
2915 return irq; 3222 return irq;
2916} 3223}
2917 3224
3225static int nr_irqs_gsi = NR_IRQS_LEGACY;
2918int create_irq(void) 3226int create_irq(void)
2919{ 3227{
3228 unsigned int irq_want;
2920 int irq; 3229 int irq;
2921 3230
2922 irq = create_irq_nr(nr_irqs - 1); 3231 irq_want = nr_irqs_gsi;
3232 irq = create_irq_nr(irq_want);
2923 3233
2924 if (irq == 0) 3234 if (irq == 0)
2925 irq = -1; 3235 irq = -1;
@@ -2930,14 +3240,22 @@ int create_irq(void)
2930void destroy_irq(unsigned int irq) 3240void destroy_irq(unsigned int irq)
2931{ 3241{
2932 unsigned long flags; 3242 unsigned long flags;
3243 struct irq_cfg *cfg;
3244 struct irq_desc *desc;
2933 3245
3246 /* store it, in case dynamic_irq_cleanup clear it */
3247 desc = irq_to_desc(irq);
3248 cfg = desc->chip_data;
2934 dynamic_irq_cleanup(irq); 3249 dynamic_irq_cleanup(irq);
3250 /* connect back irq_cfg */
3251 if (desc)
3252 desc->chip_data = cfg;
2935 3253
2936#ifdef CONFIG_INTR_REMAP 3254#ifdef CONFIG_INTR_REMAP
2937 free_irte(irq); 3255 free_irte(irq);
2938#endif 3256#endif
2939 spin_lock_irqsave(&vector_lock, flags); 3257 spin_lock_irqsave(&vector_lock, flags);
2940 __clear_irq_vector(irq); 3258 __clear_irq_vector(irq, cfg);
2941 spin_unlock_irqrestore(&vector_lock, flags); 3259 spin_unlock_irqrestore(&vector_lock, flags);
2942} 3260}
2943 3261
@@ -2950,16 +3268,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2950 struct irq_cfg *cfg; 3268 struct irq_cfg *cfg;
2951 int err; 3269 int err;
2952 unsigned dest; 3270 unsigned dest;
2953 cpumask_t tmp;
2954 3271
2955 tmp = TARGET_CPUS; 3272 cfg = irq_cfg(irq);
2956 err = assign_irq_vector(irq, tmp); 3273 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
2957 if (err) 3274 if (err)
2958 return err; 3275 return err;
2959 3276
2960 cfg = irq_cfg(irq); 3277 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
2961 cpus_and(tmp, cfg->domain, tmp);
2962 dest = cpu_mask_to_apicid(tmp);
2963 3278
2964#ifdef CONFIG_INTR_REMAP 3279#ifdef CONFIG_INTR_REMAP
2965 if (irq_remapped(irq)) { 3280 if (irq_remapped(irq)) {
@@ -3013,64 +3328,48 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3013} 3328}
3014 3329
3015#ifdef CONFIG_SMP 3330#ifdef CONFIG_SMP
3016static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3331static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3017{ 3332{
3333 struct irq_desc *desc = irq_to_desc(irq);
3018 struct irq_cfg *cfg; 3334 struct irq_cfg *cfg;
3019 struct msi_msg msg; 3335 struct msi_msg msg;
3020 unsigned int dest; 3336 unsigned int dest;
3021 cpumask_t tmp;
3022 struct irq_desc *desc;
3023 3337
3024 cpus_and(tmp, mask, cpu_online_map); 3338 dest = set_desc_affinity(desc, mask);
3025 if (cpus_empty(tmp)) 3339 if (dest == BAD_APICID)
3026 return; 3340 return;
3027 3341
3028 if (assign_irq_vector(irq, mask)) 3342 cfg = desc->chip_data;
3029 return;
3030
3031 cfg = irq_cfg(irq);
3032 cpus_and(tmp, cfg->domain, mask);
3033 dest = cpu_mask_to_apicid(tmp);
3034 3343
3035 read_msi_msg(irq, &msg); 3344 read_msi_msg_desc(desc, &msg);
3036 3345
3037 msg.data &= ~MSI_DATA_VECTOR_MASK; 3346 msg.data &= ~MSI_DATA_VECTOR_MASK;
3038 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3347 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3039 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3348 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3040 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3349 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3041 3350
3042 write_msi_msg(irq, &msg); 3351 write_msi_msg_desc(desc, &msg);
3043 desc = irq_to_desc(irq);
3044 desc->affinity = mask;
3045} 3352}
3046
3047#ifdef CONFIG_INTR_REMAP 3353#ifdef CONFIG_INTR_REMAP
3048/* 3354/*
3049 * Migrate the MSI irq to another cpumask. This migration is 3355 * Migrate the MSI irq to another cpumask. This migration is
3050 * done in the process context using interrupt-remapping hardware. 3356 * done in the process context using interrupt-remapping hardware.
3051 */ 3357 */
3052static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3358static void
3359ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3053{ 3360{
3361 struct irq_desc *desc = irq_to_desc(irq);
3054 struct irq_cfg *cfg; 3362 struct irq_cfg *cfg;
3055 unsigned int dest; 3363 unsigned int dest;
3056 cpumask_t tmp, cleanup_mask;
3057 struct irte irte; 3364 struct irte irte;
3058 struct irq_desc *desc;
3059
3060 cpus_and(tmp, mask, cpu_online_map);
3061 if (cpus_empty(tmp))
3062 return;
3063 3365
3064 if (get_irte(irq, &irte)) 3366 if (get_irte(irq, &irte))
3065 return; 3367 return;
3066 3368
3067 if (assign_irq_vector(irq, mask)) 3369 dest = set_desc_affinity(desc, mask);
3370 if (dest == BAD_APICID)
3068 return; 3371 return;
3069 3372
3070 cfg = irq_cfg(irq);
3071 cpus_and(tmp, cfg->domain, mask);
3072 dest = cpu_mask_to_apicid(tmp);
3073
3074 irte.vector = cfg->vector; 3373 irte.vector = cfg->vector;
3075 irte.dest_id = IRTE_DEST(dest); 3374 irte.dest_id = IRTE_DEST(dest);
3076 3375
@@ -3084,16 +3383,10 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3084 * at the new destination. So, time to cleanup the previous 3383 * at the new destination. So, time to cleanup the previous
3085 * vector allocation. 3384 * vector allocation.
3086 */ 3385 */
3087 if (cfg->move_in_progress) { 3386 if (cfg->move_in_progress)
3088 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 3387 send_cleanup_vector(cfg);
3089 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
3090 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
3091 cfg->move_in_progress = 0;
3092 }
3093
3094 desc = irq_to_desc(irq);
3095 desc->affinity = mask;
3096} 3388}
3389
3097#endif 3390#endif
3098#endif /* CONFIG_SMP */ 3391#endif /* CONFIG_SMP */
3099 3392
@@ -3152,7 +3445,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3152} 3445}
3153#endif 3446#endif
3154 3447
3155static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) 3448static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3156{ 3449{
3157 int ret; 3450 int ret;
3158 struct msi_msg msg; 3451 struct msi_msg msg;
@@ -3161,7 +3454,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
3161 if (ret < 0) 3454 if (ret < 0)
3162 return ret; 3455 return ret;
3163 3456
3164 set_irq_msi(irq, desc); 3457 set_irq_msi(irq, msidesc);
3165 write_msi_msg(irq, &msg); 3458 write_msi_msg(irq, &msg);
3166 3459
3167#ifdef CONFIG_INTR_REMAP 3460#ifdef CONFIG_INTR_REMAP
@@ -3181,26 +3474,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
3181 return 0; 3474 return 0;
3182} 3475}
3183 3476
3184static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) 3477int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
3185{
3186 unsigned int irq;
3187
3188 irq = dev->bus->number;
3189 irq <<= 8;
3190 irq |= dev->devfn;
3191 irq <<= 12;
3192
3193 return irq;
3194}
3195
3196int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
3197{ 3478{
3198 unsigned int irq; 3479 unsigned int irq;
3199 int ret; 3480 int ret;
3200 unsigned int irq_want; 3481 unsigned int irq_want;
3201 3482
3202 irq_want = build_irq_for_pci_dev(dev) + 0x100; 3483 irq_want = nr_irqs_gsi;
3203
3204 irq = create_irq_nr(irq_want); 3484 irq = create_irq_nr(irq_want);
3205 if (irq == 0) 3485 if (irq == 0)
3206 return -1; 3486 return -1;
@@ -3214,7 +3494,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
3214 goto error; 3494 goto error;
3215no_ir: 3495no_ir:
3216#endif 3496#endif
3217 ret = setup_msi_irq(dev, desc, irq); 3497 ret = setup_msi_irq(dev, msidesc, irq);
3218 if (ret < 0) { 3498 if (ret < 0) {
3219 destroy_irq(irq); 3499 destroy_irq(irq);
3220 return ret; 3500 return ret;
@@ -3232,7 +3512,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3232{ 3512{
3233 unsigned int irq; 3513 unsigned int irq;
3234 int ret, sub_handle; 3514 int ret, sub_handle;
3235 struct msi_desc *desc; 3515 struct msi_desc *msidesc;
3236 unsigned int irq_want; 3516 unsigned int irq_want;
3237 3517
3238#ifdef CONFIG_INTR_REMAP 3518#ifdef CONFIG_INTR_REMAP
@@ -3240,10 +3520,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3240 int index = 0; 3520 int index = 0;
3241#endif 3521#endif
3242 3522
3243 irq_want = build_irq_for_pci_dev(dev) + 0x100; 3523 irq_want = nr_irqs_gsi;
3244 sub_handle = 0; 3524 sub_handle = 0;
3245 list_for_each_entry(desc, &dev->msi_list, list) { 3525 list_for_each_entry(msidesc, &dev->msi_list, list) {
3246 irq = create_irq_nr(irq_want--); 3526 irq = create_irq_nr(irq_want);
3527 irq_want++;
3247 if (irq == 0) 3528 if (irq == 0)
3248 return -1; 3529 return -1;
3249#ifdef CONFIG_INTR_REMAP 3530#ifdef CONFIG_INTR_REMAP
@@ -3275,7 +3556,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3275 } 3556 }
3276no_ir: 3557no_ir:
3277#endif 3558#endif
3278 ret = setup_msi_irq(dev, desc, irq); 3559 ret = setup_msi_irq(dev, msidesc, irq);
3279 if (ret < 0) 3560 if (ret < 0)
3280 goto error; 3561 goto error;
3281 sub_handle++; 3562 sub_handle++;
@@ -3294,24 +3575,18 @@ void arch_teardown_msi_irq(unsigned int irq)
3294 3575
3295#ifdef CONFIG_DMAR 3576#ifdef CONFIG_DMAR
3296#ifdef CONFIG_SMP 3577#ifdef CONFIG_SMP
3297static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) 3578static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3298{ 3579{
3580 struct irq_desc *desc = irq_to_desc(irq);
3299 struct irq_cfg *cfg; 3581 struct irq_cfg *cfg;
3300 struct msi_msg msg; 3582 struct msi_msg msg;
3301 unsigned int dest; 3583 unsigned int dest;
3302 cpumask_t tmp;
3303 struct irq_desc *desc;
3304 3584
3305 cpus_and(tmp, mask, cpu_online_map); 3585 dest = set_desc_affinity(desc, mask);
3306 if (cpus_empty(tmp)) 3586 if (dest == BAD_APICID)
3307 return; 3587 return;
3308 3588
3309 if (assign_irq_vector(irq, mask)) 3589 cfg = desc->chip_data;
3310 return;
3311
3312 cfg = irq_cfg(irq);
3313 cpus_and(tmp, cfg->domain, mask);
3314 dest = cpu_mask_to_apicid(tmp);
3315 3590
3316 dmar_msi_read(irq, &msg); 3591 dmar_msi_read(irq, &msg);
3317 3592
@@ -3321,9 +3596,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
3321 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3596 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3322 3597
3323 dmar_msi_write(irq, &msg); 3598 dmar_msi_write(irq, &msg);
3324 desc = irq_to_desc(irq);
3325 desc->affinity = mask;
3326} 3599}
3600
3327#endif /* CONFIG_SMP */ 3601#endif /* CONFIG_SMP */
3328 3602
3329struct irq_chip dmar_msi_type = { 3603struct irq_chip dmar_msi_type = {
@@ -3355,24 +3629,18 @@ int arch_setup_dmar_msi(unsigned int irq)
3355#ifdef CONFIG_HPET_TIMER 3629#ifdef CONFIG_HPET_TIMER
3356 3630
3357#ifdef CONFIG_SMP 3631#ifdef CONFIG_SMP
3358static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) 3632static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3359{ 3633{
3634 struct irq_desc *desc = irq_to_desc(irq);
3360 struct irq_cfg *cfg; 3635 struct irq_cfg *cfg;
3361 struct irq_desc *desc;
3362 struct msi_msg msg; 3636 struct msi_msg msg;
3363 unsigned int dest; 3637 unsigned int dest;
3364 cpumask_t tmp;
3365 3638
3366 cpus_and(tmp, mask, cpu_online_map); 3639 dest = set_desc_affinity(desc, mask);
3367 if (cpus_empty(tmp)) 3640 if (dest == BAD_APICID)
3368 return; 3641 return;
3369 3642
3370 if (assign_irq_vector(irq, mask)) 3643 cfg = desc->chip_data;
3371 return;
3372
3373 cfg = irq_cfg(irq);
3374 cpus_and(tmp, cfg->domain, mask);
3375 dest = cpu_mask_to_apicid(tmp);
3376 3644
3377 hpet_msi_read(irq, &msg); 3645 hpet_msi_read(irq, &msg);
3378 3646
@@ -3382,9 +3650,8 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
3382 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3650 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3383 3651
3384 hpet_msi_write(irq, &msg); 3652 hpet_msi_write(irq, &msg);
3385 desc = irq_to_desc(irq);
3386 desc->affinity = mask;
3387} 3653}
3654
3388#endif /* CONFIG_SMP */ 3655#endif /* CONFIG_SMP */
3389 3656
3390struct irq_chip hpet_msi_type = { 3657struct irq_chip hpet_msi_type = {
@@ -3437,28 +3704,21 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3437 write_ht_irq_msg(irq, &msg); 3704 write_ht_irq_msg(irq, &msg);
3438} 3705}
3439 3706
3440static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) 3707static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
3441{ 3708{
3709 struct irq_desc *desc = irq_to_desc(irq);
3442 struct irq_cfg *cfg; 3710 struct irq_cfg *cfg;
3443 unsigned int dest; 3711 unsigned int dest;
3444 cpumask_t tmp;
3445 struct irq_desc *desc;
3446
3447 cpus_and(tmp, mask, cpu_online_map);
3448 if (cpus_empty(tmp))
3449 return;
3450 3712
3451 if (assign_irq_vector(irq, mask)) 3713 dest = set_desc_affinity(desc, mask);
3714 if (dest == BAD_APICID)
3452 return; 3715 return;
3453 3716
3454 cfg = irq_cfg(irq); 3717 cfg = desc->chip_data;
3455 cpus_and(tmp, cfg->domain, mask);
3456 dest = cpu_mask_to_apicid(tmp);
3457 3718
3458 target_ht_irq(irq, dest, cfg->vector); 3719 target_ht_irq(irq, dest, cfg->vector);
3459 desc = irq_to_desc(irq);
3460 desc->affinity = mask;
3461} 3720}
3721
3462#endif 3722#endif
3463 3723
3464static struct irq_chip ht_irq_chip = { 3724static struct irq_chip ht_irq_chip = {
@@ -3476,17 +3736,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3476{ 3736{
3477 struct irq_cfg *cfg; 3737 struct irq_cfg *cfg;
3478 int err; 3738 int err;
3479 cpumask_t tmp;
3480 3739
3481 tmp = TARGET_CPUS; 3740 cfg = irq_cfg(irq);
3482 err = assign_irq_vector(irq, tmp); 3741 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3483 if (!err) { 3742 if (!err) {
3484 struct ht_irq_msg msg; 3743 struct ht_irq_msg msg;
3485 unsigned dest; 3744 unsigned dest;
3486 3745
3487 cfg = irq_cfg(irq); 3746 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
3488 cpus_and(tmp, cfg->domain, tmp);
3489 dest = cpu_mask_to_apicid(tmp);
3490 3747
3491 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); 3748 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3492 3749
@@ -3522,7 +3779,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3522int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, 3779int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3523 unsigned long mmr_offset) 3780 unsigned long mmr_offset)
3524{ 3781{
3525 const cpumask_t *eligible_cpu = get_cpu_mask(cpu); 3782 const struct cpumask *eligible_cpu = cpumask_of(cpu);
3526 struct irq_cfg *cfg; 3783 struct irq_cfg *cfg;
3527 int mmr_pnode; 3784 int mmr_pnode;
3528 unsigned long mmr_value; 3785 unsigned long mmr_value;
@@ -3530,7 +3787,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3530 unsigned long flags; 3787 unsigned long flags;
3531 int err; 3788 int err;
3532 3789
3533 err = assign_irq_vector(irq, *eligible_cpu); 3790 cfg = irq_cfg(irq);
3791
3792 err = assign_irq_vector(irq, cfg, eligible_cpu);
3534 if (err != 0) 3793 if (err != 0)
3535 return err; 3794 return err;
3536 3795
@@ -3539,8 +3798,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3539 irq_name); 3798 irq_name);
3540 spin_unlock_irqrestore(&vector_lock, flags); 3799 spin_unlock_irqrestore(&vector_lock, flags);
3541 3800
3542 cfg = irq_cfg(irq);
3543
3544 mmr_value = 0; 3801 mmr_value = 0;
3545 entry = (struct uv_IO_APIC_route_entry *)&mmr_value; 3802 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3546 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); 3803 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
@@ -3551,7 +3808,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3551 entry->polarity = 0; 3808 entry->polarity = 0;
3552 entry->trigger = 0; 3809 entry->trigger = 0;
3553 entry->mask = 0; 3810 entry->mask = 0;
3554 entry->dest = cpu_mask_to_apicid(*eligible_cpu); 3811 entry->dest = cpu_mask_to_apicid(eligible_cpu);
3555 3812
3556 mmr_pnode = uv_blade_to_pnode(mmr_blade); 3813 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3557 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 3814 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3592,29 +3849,16 @@ int __init io_apic_get_redir_entries (int ioapic)
3592 return reg_01.bits.entries; 3849 return reg_01.bits.entries;
3593} 3850}
3594 3851
3595int __init probe_nr_irqs(void) 3852void __init probe_nr_irqs_gsi(void)
3596{ 3853{
3597 int idx; 3854 int idx;
3598 int nr = 0; 3855 int nr = 0;
3599#ifndef CONFIG_XEN
3600 int nr_min = 32;
3601#else
3602 int nr_min = NR_IRQS;
3603#endif
3604 3856
3605 for (idx = 0; idx < nr_ioapics; idx++) 3857 for (idx = 0; idx < nr_ioapics; idx++)
3606 nr += io_apic_get_redir_entries(idx) + 1; 3858 nr += io_apic_get_redir_entries(idx) + 1;
3607 3859
3608 /* double it for hotplug and msi and nmi */ 3860 if (nr > nr_irqs_gsi)
3609 nr <<= 1; 3861 nr_irqs_gsi = nr;
3610
3611 /* something wrong ? */
3612 if (nr < nr_min)
3613 nr = nr_min;
3614 if (WARN_ON(nr > NR_IRQS))
3615 nr = NR_IRQS;
3616
3617 return nr;
3618} 3862}
3619 3863
3620/* -------------------------------------------------------------------------- 3864/* --------------------------------------------------------------------------
@@ -3713,19 +3957,31 @@ int __init io_apic_get_version(int ioapic)
3713 3957
3714int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) 3958int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
3715{ 3959{
3960 struct irq_desc *desc;
3961 struct irq_cfg *cfg;
3962 int cpu = boot_cpu_id;
3963
3716 if (!IO_APIC_IRQ(irq)) { 3964 if (!IO_APIC_IRQ(irq)) {
3717 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", 3965 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3718 ioapic); 3966 ioapic);
3719 return -EINVAL; 3967 return -EINVAL;
3720 } 3968 }
3721 3969
3970 desc = irq_to_desc_alloc_cpu(irq, cpu);
3971 if (!desc) {
3972 printk(KERN_INFO "can not get irq_desc %d\n", irq);
3973 return 0;
3974 }
3975
3722 /* 3976 /*
3723 * IRQs < 16 are already in the irq_2_pin[] map 3977 * IRQs < 16 are already in the irq_2_pin[] map
3724 */ 3978 */
3725 if (irq >= 16) 3979 if (irq >= NR_IRQS_LEGACY) {
3726 add_pin_to_irq(irq, ioapic, pin); 3980 cfg = desc->chip_data;
3981 add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
3982 }
3727 3983
3728 setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); 3984 setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
3729 3985
3730 return 0; 3986 return 0;
3731} 3987}
@@ -3761,7 +4017,9 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
3761void __init setup_ioapic_dest(void) 4017void __init setup_ioapic_dest(void)
3762{ 4018{
3763 int pin, ioapic, irq, irq_entry; 4019 int pin, ioapic, irq, irq_entry;
4020 struct irq_desc *desc;
3764 struct irq_cfg *cfg; 4021 struct irq_cfg *cfg;
4022 const struct cpumask *mask;
3765 4023
3766 if (skip_ioapic_setup == 1) 4024 if (skip_ioapic_setup == 1)
3767 return; 4025 return;
@@ -3777,17 +4035,31 @@ void __init setup_ioapic_dest(void)
3777 * when you have too many devices, because at that time only boot 4035 * when you have too many devices, because at that time only boot
3778 * cpu is online. 4036 * cpu is online.
3779 */ 4037 */
3780 cfg = irq_cfg(irq); 4038 desc = irq_to_desc(irq);
3781 if (!cfg->vector) 4039 cfg = desc->chip_data;
3782 setup_IO_APIC_irq(ioapic, pin, irq, 4040 if (!cfg->vector) {
4041 setup_IO_APIC_irq(ioapic, pin, irq, desc,
3783 irq_trigger(irq_entry), 4042 irq_trigger(irq_entry),
3784 irq_polarity(irq_entry)); 4043 irq_polarity(irq_entry));
4044 continue;
4045
4046 }
4047
4048 /*
4049 * Honour affinities which have been set in early boot
4050 */
4051 if (desc->status &
4052 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4053 mask = &desc->affinity;
4054 else
4055 mask = TARGET_CPUS;
4056
3785#ifdef CONFIG_INTR_REMAP 4057#ifdef CONFIG_INTR_REMAP
3786 else if (intr_remapping_enabled) 4058 if (intr_remapping_enabled)
3787 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); 4059 set_ir_ioapic_affinity_irq_desc(desc, mask);
3788#endif
3789 else 4060 else
3790 set_ioapic_affinity_irq(irq, TARGET_CPUS); 4061#endif
4062 set_ioapic_affinity_irq_desc(desc, mask);
3791 } 4063 }
3792 4064
3793 } 4065 }
@@ -3836,7 +4108,6 @@ void __init ioapic_init_mappings(void)
3836 struct resource *ioapic_res; 4108 struct resource *ioapic_res;
3837 int i; 4109 int i;
3838 4110
3839 irq_2_pin_init();
3840 ioapic_res = ioapic_setup_resources(); 4111 ioapic_res = ioapic_setup_resources();
3841 for (i = 0; i < nr_ioapics; i++) { 4112 for (i = 0; i < nr_ioapics; i++) {
3842 if (smp_found_config) { 4113 if (smp_found_config) {
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index f1c688e46f35..285bbf8831fa 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
116/* 116/*
117 * This is only used on smaller machines. 117 * This is only used on smaller machines.
118 */ 118 */
119void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) 119void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
120{ 120{
121 unsigned long mask = cpus_addr(cpumask)[0]; 121 unsigned long mask = cpumask_bits(cpumask)[0];
122 unsigned long flags; 122 unsigned long flags;
123 123
124 local_irq_save(flags); 124 local_irq_save(flags);
125 WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); 125 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
126 __send_IPI_dest_field(mask, vector); 126 __send_IPI_dest_field(mask, vector);
127 local_irq_restore(flags); 127 local_irq_restore(flags);
128} 128}
129 129
130void send_IPI_mask_sequence(cpumask_t mask, int vector) 130void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
131{ 131{
132 unsigned long flags; 132 unsigned long flags;
133 unsigned int query_cpu; 133 unsigned int query_cpu;
@@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
139 */ 139 */
140 140
141 local_irq_save(flags); 141 local_irq_save(flags);
142 for_each_possible_cpu(query_cpu) { 142 for_each_cpu(query_cpu, mask)
143 if (cpu_isset(query_cpu, mask)) { 143 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
144 local_irq_restore(flags);
145}
146
147void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
148{
149 unsigned long flags;
150 unsigned int query_cpu;
151 unsigned int this_cpu = smp_processor_id();
152
153 /* See Hack comment above */
154
155 local_irq_save(flags);
156 for_each_cpu(query_cpu, mask)
157 if (query_cpu != this_cpu)
144 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), 158 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
145 vector); 159 vector);
146 }
147 }
148 local_irq_restore(flags); 160 local_irq_restore(flags);
149} 161}
150 162
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d1d4dc52f649..3f1d9d18df67 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -118,6 +118,9 @@ int show_interrupts(struct seq_file *p, void *v)
118 } 118 }
119 119
120 desc = irq_to_desc(i); 120 desc = irq_to_desc(i);
121 if (!desc)
122 return 0;
123
121 spin_lock_irqsave(&desc->lock, flags); 124 spin_lock_irqsave(&desc->lock, flags);
122#ifndef CONFIG_SMP 125#ifndef CONFIG_SMP
123 any_count = kstat_irqs(i); 126 any_count = kstat_irqs(i);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a51382672de0..9dc5588f336a 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -233,25 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs)
233#ifdef CONFIG_HOTPLUG_CPU 233#ifdef CONFIG_HOTPLUG_CPU
234#include <mach_apic.h> 234#include <mach_apic.h>
235 235
236void fixup_irqs(cpumask_t map) 236/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
237void fixup_irqs(void)
237{ 238{
238 unsigned int irq; 239 unsigned int irq;
239 static int warned; 240 static int warned;
240 struct irq_desc *desc; 241 struct irq_desc *desc;
241 242
242 for_each_irq_desc(irq, desc) { 243 for_each_irq_desc(irq, desc) {
243 cpumask_t mask; 244 const struct cpumask *affinity;
244 245
246 if (!desc)
247 continue;
245 if (irq == 2) 248 if (irq == 2)
246 continue; 249 continue;
247 250
248 cpus_and(mask, desc->affinity, map); 251 affinity = &desc->affinity;
249 if (any_online_cpu(mask) == NR_CPUS) { 252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
250 printk("Breaking affinity for irq %i\n", irq); 253 printk("Breaking affinity for irq %i\n", irq);
251 mask = map; 254 affinity = cpu_all_mask;
252 } 255 }
253 if (desc->chip->set_affinity) 256 if (desc->chip->set_affinity)
254 desc->chip->set_affinity(irq, mask); 257 desc->chip->set_affinity(irq, affinity);
255 else if (desc->action && !(warned++)) 258 else if (desc->action && !(warned++))
256 printk("Cannot set affinity for irq %i\n", irq); 259 printk("Cannot set affinity for irq %i\n", irq);
257 } 260 }
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a0..fca2991443f5 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -83,40 +83,43 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
83} 83}
84 84
85#ifdef CONFIG_HOTPLUG_CPU 85#ifdef CONFIG_HOTPLUG_CPU
86void fixup_irqs(cpumask_t map) 86/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
87void fixup_irqs(void)
87{ 88{
88 unsigned int irq; 89 unsigned int irq;
89 static int warned; 90 static int warned;
90 struct irq_desc *desc; 91 struct irq_desc *desc;
91 92
92 for_each_irq_desc(irq, desc) { 93 for_each_irq_desc(irq, desc) {
93 cpumask_t mask;
94 int break_affinity = 0; 94 int break_affinity = 0;
95 int set_affinity = 1; 95 int set_affinity = 1;
96 const struct cpumask *affinity;
96 97
98 if (!desc)
99 continue;
97 if (irq == 2) 100 if (irq == 2)
98 continue; 101 continue;
99 102
100 /* interrupt's are disabled at this point */ 103 /* interrupt's are disabled at this point */
101 spin_lock(&desc->lock); 104 spin_lock(&desc->lock);
102 105
106 affinity = &desc->affinity;
103 if (!irq_has_action(irq) || 107 if (!irq_has_action(irq) ||
104 cpus_equal(desc->affinity, map)) { 108 cpumask_equal(affinity, cpu_online_mask)) {
105 spin_unlock(&desc->lock); 109 spin_unlock(&desc->lock);
106 continue; 110 continue;
107 } 111 }
108 112
109 cpus_and(mask, desc->affinity, map); 113 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
110 if (cpus_empty(mask)) {
111 break_affinity = 1; 114 break_affinity = 1;
112 mask = map; 115 affinity = cpu_all_mask;
113 } 116 }
114 117
115 if (desc->chip->mask) 118 if (desc->chip->mask)
116 desc->chip->mask(irq); 119 desc->chip->mask(irq);
117 120
118 if (desc->chip->set_affinity) 121 if (desc->chip->set_affinity)
119 desc->chip->set_affinity(irq, mask); 122 desc->chip->set_affinity(irq, affinity);
120 else if (!(warned++)) 123 else if (!(warned++))
121 set_affinity = 0; 124 set_affinity = 0;
122 125
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e80..6a92f47c52e7 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -68,8 +68,7 @@ void __init init_ISA_irqs (void)
68 /* 68 /*
69 * 16 old-style INTA-cycle interrupts: 69 * 16 old-style INTA-cycle interrupts:
70 */ 70 */
71 for (i = 0; i < 16; i++) { 71 for (i = 0; i < NR_IRQS_LEGACY; i++) {
72 /* first time call this irq_desc */
73 struct irq_desc *desc = irq_to_desc(i); 72 struct irq_desc *desc = irq_to_desc(i);
74 73
75 desc->status = IRQ_DISABLED; 74 desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff0235391285..40c1e62ec785 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,8 +142,7 @@ void __init init_ISA_irqs(void)
142 init_bsp_APIC(); 142 init_bsp_APIC();
143 init_8259A(0); 143 init_8259A(0);
144 144
145 for (i = 0; i < 16; i++) { 145 for (i = 0; i < NR_IRQS_LEGACY; i++) {
146 /* first time call this irq_desc */
147 struct irq_desc *desc = irq_to_desc(i); 146 struct irq_desc *desc = irq_to_desc(i);
148 147
149 desc->status = IRQ_DISABLED; 148 desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 774ac4991568..e169ae9b6a62 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt)
128} 128}
129 129
130#ifdef CONFIG_X86_LOCAL_APIC 130#ifdef CONFIG_X86_LOCAL_APIC
131static void kvm_setup_secondary_clock(void) 131static void __cpuinit kvm_setup_secondary_clock(void)
132{ 132{
133 /* 133 /*
134 * Now that the first cpu already had this clocksource initialized, 134 * Now that the first cpu already had this clocksource initialized,
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3b599518c322..c12314c9e86f 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = {
287 .set_mode = mfgpt_set_mode, 287 .set_mode = mfgpt_set_mode,
288 .set_next_event = mfgpt_next_event, 288 .set_next_event = mfgpt_next_event,
289 .rating = 250, 289 .rating = 250,
290 .cpumask = CPU_MASK_ALL, 290 .cpumask = cpu_all_mask,
291 .shift = 32 291 .shift = 32
292}; 292};
293 293
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index f98f4e1dba09..45e3b69808ba 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -586,23 +586,23 @@ static void __init __get_smp_config(unsigned int early)
586{ 586{
587 struct intel_mp_floating *mpf = mpf_found; 587 struct intel_mp_floating *mpf = mpf_found;
588 588
589 if (x86_quirks->mach_get_smp_config) { 589 if (!mpf)
590 if (x86_quirks->mach_get_smp_config(early)) 590 return;
591 return; 591
592 }
593 if (acpi_lapic && early) 592 if (acpi_lapic && early)
594 return; 593 return;
594
595 /* 595 /*
596 * ACPI supports both logical (e.g. Hyper-Threading) and physical 596 * MPS doesn't support hyperthreading, aka only have
597 * processors, where MPS only supports physical. 597 * thread 0 apic id in MPS table
598 */ 598 */
599 if (acpi_lapic && acpi_ioapic) { 599 if (acpi_lapic && acpi_ioapic)
600 printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
601 "information\n");
602 return; 600 return;
603 } else if (acpi_lapic) 601
604 printk(KERN_INFO "Using ACPI for processor (LAPIC) " 602 if (x86_quirks->mach_get_smp_config) {
605 "configuration information\n"); 603 if (x86_quirks->mach_get_smp_config(early))
604 return;
605 }
606 606
607 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 607 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
608 mpf->mpf_specification); 608 mpf->mpf_specification);
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 4caff39078e0..0deea37a53cf 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -31,7 +31,7 @@
31#include <asm/numaq.h> 31#include <asm/numaq.h>
32#include <asm/topology.h> 32#include <asm/topology.h>
33#include <asm/processor.h> 33#include <asm/processor.h>
34#include <asm/mpspec.h> 34#include <asm/genapic.h>
35#include <asm/e820.h> 35#include <asm/e820.h>
36#include <asm/setup.h> 36#include <asm/setup.h>
37 37
@@ -235,6 +235,13 @@ static int __init numaq_setup_ioapic_ids(void)
235 return 1; 235 return 1;
236} 236}
237 237
238static int __init numaq_update_genapic(void)
239{
240 genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
241
242 return 0;
243}
244
238static struct x86_quirks numaq_x86_quirks __initdata = { 245static struct x86_quirks numaq_x86_quirks __initdata = {
239 .arch_pre_time_init = numaq_pre_time_init, 246 .arch_pre_time_init = numaq_pre_time_init,
240 .arch_time_init = NULL, 247 .arch_time_init = NULL,
@@ -250,6 +257,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
250 .mpc_oem_pci_bus = mpc_oem_pci_bus, 257 .mpc_oem_pci_bus = mpc_oem_pci_bus,
251 .smp_read_mpc_oem = smp_read_mpc_oem, 258 .smp_read_mpc_oem = smp_read_mpc_oem,
252 .setup_ioapic_ids = numaq_setup_ioapic_ids, 259 .setup_ioapic_ids = numaq_setup_ioapic_ids,
260 .update_genapic = numaq_update_genapic,
253}; 261};
254 262
255void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, 263void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 0e9f1982b1dd..95777b0faa73 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -7,7 +7,8 @@
7 7
8#include <asm/paravirt.h> 8#include <asm/paravirt.h>
9 9
10static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) 10static inline void
11default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
11{ 12{
12 __raw_spin_lock(lock); 13 __raw_spin_lock(lock);
13} 14}
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index e1e731d78f38..d28bbdc35e4e 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p)
1567 ++p; 1567 ++p;
1568 if (*p == '\0') 1568 if (*p == '\0')
1569 break; 1569 break;
1570 bridge = simple_strtol(p, &endp, 0); 1570 bridge = simple_strtoul(p, &endp, 0);
1571 if (p == endp) 1571 if (p == endp)
1572 break; 1572 break;
1573 1573
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a42b02b4df68..ba7ad83e20a8 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size)
123 123
124 spin_lock_irqsave(&iommu_bitmap_lock, flags); 124 spin_lock_irqsave(&iommu_bitmap_lock, flags);
125 iommu_area_free(iommu_gart_bitmap, offset, size); 125 iommu_area_free(iommu_gart_bitmap, offset, size);
126 if (offset >= next_bit)
127 next_bit = offset + size;
126 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 128 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
127} 129}
128 130
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772744d8..95d811a9594f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,7 +7,9 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/pm.h> 8#include <linux/pm.h>
9#include <linux/clockchips.h> 9#include <linux/clockchips.h>
10#include <linux/ftrace.h>
10#include <asm/system.h> 11#include <asm/system.h>
12#include <asm/apic.h>
11 13
12unsigned long idle_halt; 14unsigned long idle_halt;
13EXPORT_SYMBOL(idle_halt); 15EXPORT_SYMBOL(idle_halt);
@@ -100,6 +102,9 @@ static inline int hlt_use_halt(void)
100void default_idle(void) 102void default_idle(void)
101{ 103{
102 if (hlt_use_halt()) { 104 if (hlt_use_halt()) {
105 struct power_trace it;
106
107 trace_power_start(&it, POWER_CSTATE, 1);
103 current_thread_info()->status &= ~TS_POLLING; 108 current_thread_info()->status &= ~TS_POLLING;
104 /* 109 /*
105 * TS_POLLING-cleared state must be visible before we 110 * TS_POLLING-cleared state must be visible before we
@@ -112,6 +117,7 @@ void default_idle(void)
112 else 117 else
113 local_irq_enable(); 118 local_irq_enable();
114 current_thread_info()->status |= TS_POLLING; 119 current_thread_info()->status |= TS_POLLING;
120 trace_power_end(&it);
115 } else { 121 } else {
116 local_irq_enable(); 122 local_irq_enable();
117 /* loop is done by the caller */ 123 /* loop is done by the caller */
@@ -122,6 +128,21 @@ void default_idle(void)
122EXPORT_SYMBOL(default_idle); 128EXPORT_SYMBOL(default_idle);
123#endif 129#endif
124 130
131void stop_this_cpu(void *dummy)
132{
133 local_irq_disable();
134 /*
135 * Remove this CPU:
136 */
137 cpu_clear(smp_processor_id(), cpu_online_map);
138 disable_local_APIC();
139
140 for (;;) {
141 if (hlt_works(smp_processor_id()))
142 halt();
143 }
144}
145
125static void do_nothing(void *unused) 146static void do_nothing(void *unused)
126{ 147{
127} 148}
@@ -154,24 +175,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
154 */ 175 */
155void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 176void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
156{ 177{
178 struct power_trace it;
179
180 trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
157 if (!need_resched()) { 181 if (!need_resched()) {
158 __monitor((void *)&current_thread_info()->flags, 0, 0); 182 __monitor((void *)&current_thread_info()->flags, 0, 0);
159 smp_mb(); 183 smp_mb();
160 if (!need_resched()) 184 if (!need_resched())
161 __mwait(ax, cx); 185 __mwait(ax, cx);
162 } 186 }
187 trace_power_end(&it);
163} 188}
164 189
165/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 190/* Default MONITOR/MWAIT with no hints, used for default C1 state */
166static void mwait_idle(void) 191static void mwait_idle(void)
167{ 192{
193 struct power_trace it;
168 if (!need_resched()) { 194 if (!need_resched()) {
195 trace_power_start(&it, POWER_CSTATE, 1);
169 __monitor((void *)&current_thread_info()->flags, 0, 0); 196 __monitor((void *)&current_thread_info()->flags, 0, 0);
170 smp_mb(); 197 smp_mb();
171 if (!need_resched()) 198 if (!need_resched())
172 __sti_mwait(0, 0); 199 __sti_mwait(0, 0);
173 else 200 else
174 local_irq_enable(); 201 local_irq_enable();
202 trace_power_end(&it);
175 } else 203 } else
176 local_irq_enable(); 204 local_irq_enable();
177} 205}
@@ -183,9 +211,13 @@ static void mwait_idle(void)
183 */ 211 */
184static void poll_idle(void) 212static void poll_idle(void)
185{ 213{
214 struct power_trace it;
215
216 trace_power_start(&it, POWER_CSTATE, 0);
186 local_irq_enable(); 217 local_irq_enable();
187 while (!need_resched()) 218 while (!need_resched())
188 cpu_relax(); 219 cpu_relax();
220 trace_power_end(&it);
189} 221}
190 222
191/* 223/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 0a1302fe6d45..24c2276aa453 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
38#include <linux/percpu.h> 38#include <linux/percpu.h>
39#include <linux/prctl.h> 39#include <linux/prctl.h>
40#include <linux/dmi.h> 40#include <linux/dmi.h>
41#include <linux/ftrace.h>
41 42
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
43#include <asm/pgtable.h> 44#include <asm/pgtable.h>
@@ -548,7 +549,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
548 * the task-switch, and shows up in ret_from_fork in entry.S, 549 * the task-switch, and shows up in ret_from_fork in entry.S,
549 * for example. 550 * for example.
550 */ 551 */
551struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) 552__notrace_funcgraph struct task_struct *
553__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
552{ 554{
553 struct thread_struct *prev = &prev_p->thread, 555 struct thread_struct *prev = &prev_p->thread,
554 *next = &next_p->thread; 556 *next = &next_p->thread;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c958120fb1b6..fbb321d53d34 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -39,6 +39,7 @@
39#include <linux/prctl.h> 39#include <linux/prctl.h>
40#include <linux/uaccess.h> 40#include <linux/uaccess.h>
41#include <linux/io.h> 41#include <linux/io.h>
42#include <linux/ftrace.h>
42 43
43#include <asm/pgtable.h> 44#include <asm/pgtable.h>
44#include <asm/system.h> 45#include <asm/system.h>
@@ -551,8 +552,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
551 * - could test fs/gs bitsliced 552 * - could test fs/gs bitsliced
552 * 553 *
553 * Kprobes not supported here. Set the probe on schedule instead. 554 * Kprobes not supported here. Set the probe on schedule instead.
555 * Function graph tracer not supported too.
554 */ 556 */
555struct task_struct * 557__notrace_funcgraph struct task_struct *
556__switch_to(struct task_struct *prev_p, struct task_struct *next_p) 558__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
557{ 559{
558 struct thread_struct *prev = &prev_p->thread; 560 struct thread_struct *prev = &prev_p->thread;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a6d8c12e10d..2c8ec1ba75e6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -668,14 +668,14 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
668 size_t bts_index, bts_end; 668 size_t bts_index, bts_end;
669 int error; 669 int error;
670 670
671 error = ds_get_bts_end(child, &bts_end); 671 error = ds_get_bts_end(child->bts, &bts_end);
672 if (error < 0) 672 if (error < 0)
673 return error; 673 return error;
674 674
675 if (bts_end <= index) 675 if (bts_end <= index)
676 return -EINVAL; 676 return -EINVAL;
677 677
678 error = ds_get_bts_index(child, &bts_index); 678 error = ds_get_bts_index(child->bts, &bts_index);
679 if (error < 0) 679 if (error < 0)
680 return error; 680 return error;
681 681
@@ -684,7 +684,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
684 if (bts_end <= bts_index) 684 if (bts_end <= bts_index)
685 bts_index -= bts_end; 685 bts_index -= bts_end;
686 686
687 error = ds_access_bts(child, bts_index, &bts_record); 687 error = ds_access_bts(child->bts, bts_index, &bts_record);
688 if (error < 0) 688 if (error < 0)
689 return error; 689 return error;
690 690
@@ -705,14 +705,14 @@ static int ptrace_bts_drain(struct task_struct *child,
705 size_t end, i; 705 size_t end, i;
706 int error; 706 int error;
707 707
708 error = ds_get_bts_index(child, &end); 708 error = ds_get_bts_index(child->bts, &end);
709 if (error < 0) 709 if (error < 0)
710 return error; 710 return error;
711 711
712 if (size < (end * sizeof(struct bts_struct))) 712 if (size < (end * sizeof(struct bts_struct)))
713 return -EIO; 713 return -EIO;
714 714
715 error = ds_access_bts(child, 0, (const void **)&raw); 715 error = ds_access_bts(child->bts, 0, (const void **)&raw);
716 if (error < 0) 716 if (error < 0)
717 return error; 717 return error;
718 718
@@ -723,18 +723,13 @@ static int ptrace_bts_drain(struct task_struct *child,
723 return -EFAULT; 723 return -EFAULT;
724 } 724 }
725 725
726 error = ds_clear_bts(child); 726 error = ds_clear_bts(child->bts);
727 if (error < 0) 727 if (error < 0)
728 return error; 728 return error;
729 729
730 return end; 730 return end;
731} 731}
732 732
733static void ptrace_bts_ovfl(struct task_struct *child)
734{
735 send_sig(child->thread.bts_ovfl_signal, child, 0);
736}
737
738static int ptrace_bts_config(struct task_struct *child, 733static int ptrace_bts_config(struct task_struct *child,
739 long cfg_size, 734 long cfg_size,
740 const struct ptrace_bts_config __user *ucfg) 735 const struct ptrace_bts_config __user *ucfg)
@@ -760,23 +755,45 @@ static int ptrace_bts_config(struct task_struct *child,
760 goto errout; 755 goto errout;
761 756
762 if (cfg.flags & PTRACE_BTS_O_ALLOC) { 757 if (cfg.flags & PTRACE_BTS_O_ALLOC) {
763 ds_ovfl_callback_t ovfl = NULL; 758 bts_ovfl_callback_t ovfl = NULL;
764 unsigned int sig = 0; 759 unsigned int sig = 0;
765 760
766 /* we ignore the error in case we were not tracing child */ 761 error = -EINVAL;
767 (void)ds_release_bts(child); 762 if (cfg.size < (10 * bts_cfg.sizeof_bts))
763 goto errout;
768 764
769 if (cfg.flags & PTRACE_BTS_O_SIGNAL) { 765 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
770 if (!cfg.signal) 766 if (!cfg.signal)
771 goto errout; 767 goto errout;
772 768
769 error = -EOPNOTSUPP;
770 goto errout;
771
773 sig = cfg.signal; 772 sig = cfg.signal;
774 ovfl = ptrace_bts_ovfl;
775 } 773 }
776 774
777 error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); 775 if (child->bts) {
778 if (error < 0) 776 (void)ds_release_bts(child->bts);
777 kfree(child->bts_buffer);
778
779 child->bts = NULL;
780 child->bts_buffer = NULL;
781 }
782
783 error = -ENOMEM;
784 child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL);
785 if (!child->bts_buffer)
786 goto errout;
787
788 child->bts = ds_request_bts(child, child->bts_buffer, cfg.size,
789 ovfl, /* th = */ (size_t)-1);
790 if (IS_ERR(child->bts)) {
791 error = PTR_ERR(child->bts);
792 kfree(child->bts_buffer);
793 child->bts = NULL;
794 child->bts_buffer = NULL;
779 goto errout; 795 goto errout;
796 }
780 797
781 child->thread.bts_ovfl_signal = sig; 798 child->thread.bts_ovfl_signal = sig;
782 } 799 }
@@ -823,15 +840,15 @@ static int ptrace_bts_status(struct task_struct *child,
823 if (cfg_size < sizeof(cfg)) 840 if (cfg_size < sizeof(cfg))
824 return -EIO; 841 return -EIO;
825 842
826 error = ds_get_bts_end(child, &end); 843 error = ds_get_bts_end(child->bts, &end);
827 if (error < 0) 844 if (error < 0)
828 return error; 845 return error;
829 846
830 error = ds_access_bts(child, /* index = */ 0, &base); 847 error = ds_access_bts(child->bts, /* index = */ 0, &base);
831 if (error < 0) 848 if (error < 0)
832 return error; 849 return error;
833 850
834 error = ds_access_bts(child, /* index = */ end, &max); 851 error = ds_access_bts(child->bts, /* index = */ end, &max);
835 if (error < 0) 852 if (error < 0)
836 return error; 853 return error;
837 854
@@ -884,10 +901,7 @@ static int ptrace_bts_write_record(struct task_struct *child,
884 return -EINVAL; 901 return -EINVAL;
885 } 902 }
886 903
887 /* The writing task will be the switched-to task on a context 904 return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts);
888 * switch. It needs to write into the switched-from task's BTS
889 * buffer. */
890 return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
891} 905}
892 906
893void ptrace_bts_take_timestamp(struct task_struct *tsk, 907void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -929,17 +943,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
929 switch (c->x86) { 943 switch (c->x86) {
930 case 0x6: 944 case 0x6:
931 switch (c->x86_model) { 945 switch (c->x86_model) {
946 case 0 ... 0xC:
947 /* sorry, don't know about them */
948 break;
932 case 0xD: 949 case 0xD:
933 case 0xE: /* Pentium M */ 950 case 0xE: /* Pentium M */
934 bts_configure(&bts_cfg_pentium_m); 951 bts_configure(&bts_cfg_pentium_m);
935 break; 952 break;
936 case 0xF: /* Core2 */ 953 default: /* Core2, Atom, ... */
937 case 0x1C: /* Atom */
938 bts_configure(&bts_cfg_core2); 954 bts_configure(&bts_cfg_core2);
939 break; 955 break;
940 default:
941 /* sorry, don't know about them */
942 break;
943 } 956 }
944 break; 957 break;
945 case 0xF: 958 case 0xF:
@@ -973,13 +986,17 @@ void ptrace_disable(struct task_struct *child)
973 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 986 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
974#endif 987#endif
975#ifdef CONFIG_X86_PTRACE_BTS 988#ifdef CONFIG_X86_PTRACE_BTS
976 (void)ds_release_bts(child); 989 if (child->bts) {
990 (void)ds_release_bts(child->bts);
991 kfree(child->bts_buffer);
992 child->bts_buffer = NULL;
977 993
978 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; 994 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
979 if (!child->thread.debugctlmsr) 995 if (!child->thread.debugctlmsr)
980 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 996 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
981 997
982 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 998 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
999 }
983#endif /* CONFIG_X86_PTRACE_BTS */ 1000#endif /* CONFIG_X86_PTRACE_BTS */
984} 1001}
985 1002
@@ -1111,9 +1128,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1111 (child, data, (struct ptrace_bts_config __user *)addr); 1128 (child, data, (struct ptrace_bts_config __user *)addr);
1112 break; 1129 break;
1113 1130
1114 case PTRACE_BTS_SIZE: 1131 case PTRACE_BTS_SIZE: {
1115 ret = ds_get_bts_index(child, /* pos = */ NULL); 1132 size_t size;
1133
1134 ret = ds_get_bts_index(child->bts, &size);
1135 if (ret == 0) {
1136 BUG_ON(size != (int) size);
1137 ret = (int) size;
1138 }
1116 break; 1139 break;
1140 }
1117 1141
1118 case PTRACE_BTS_GET: 1142 case PTRACE_BTS_GET:
1119 ret = ptrace_bts_read_record 1143 ret = ptrace_bts_read_record
@@ -1121,7 +1145,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1121 break; 1145 break;
1122 1146
1123 case PTRACE_BTS_CLEAR: 1147 case PTRACE_BTS_CLEAR:
1124 ret = ds_clear_bts(child); 1148 ret = ds_clear_bts(child->bts);
1125 break; 1149 break;
1126 1150
1127 case PTRACE_BTS_DRAIN: 1151 case PTRACE_BTS_DRAIN:
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c3cd512484e5..ba7b9a0e6063 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -39,7 +39,10 @@ int reboot_force;
39static int reboot_cpu = -1; 39static int reboot_cpu = -1;
40#endif 40#endif
41 41
42/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] 42/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
43bool port_cf9_safe = false;
44
45/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
43 warm Don't set the cold reboot flag 46 warm Don't set the cold reboot flag
44 cold Set the cold reboot flag 47 cold Set the cold reboot flag
45 bios Reboot by jumping through the BIOS (only for X86_32) 48 bios Reboot by jumping through the BIOS (only for X86_32)
@@ -48,6 +51,7 @@ static int reboot_cpu = -1;
48 kbd Use the keyboard controller. cold reset (default) 51 kbd Use the keyboard controller. cold reset (default)
49 acpi Use the RESET_REG in the FADT 52 acpi Use the RESET_REG in the FADT
50 efi Use efi reset_system runtime service 53 efi Use efi reset_system runtime service
54 pci Use the so-called "PCI reset register", CF9
51 force Avoid anything that could hang. 55 force Avoid anything that could hang.
52 */ 56 */
53static int __init reboot_setup(char *str) 57static int __init reboot_setup(char *str)
@@ -82,6 +86,7 @@ static int __init reboot_setup(char *str)
82 case 'k': 86 case 'k':
83 case 't': 87 case 't':
84 case 'e': 88 case 'e':
89 case 'p':
85 reboot_type = *str; 90 reboot_type = *str;
86 break; 91 break;
87 92
@@ -172,6 +177,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
172 DMI_MATCH(DMI_BOARD_NAME, "0KW626"), 177 DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
173 }, 178 },
174 }, 179 },
180 { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */
181 .callback = set_bios_reboot,
182 .ident = "Dell OptiPlex 330",
183 .matches = {
184 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
185 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 330"),
186 DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
187 },
188 },
175 { /* Handle problems with rebooting on Dell 2400's */ 189 { /* Handle problems with rebooting on Dell 2400's */
176 .callback = set_bios_reboot, 190 .callback = set_bios_reboot,
177 .ident = "Dell PowerEdge 2400", 191 .ident = "Dell PowerEdge 2400",
@@ -398,12 +412,27 @@ static void native_machine_emergency_restart(void)
398 reboot_type = BOOT_KBD; 412 reboot_type = BOOT_KBD;
399 break; 413 break;
400 414
401
402 case BOOT_EFI: 415 case BOOT_EFI:
403 if (efi_enabled) 416 if (efi_enabled)
404 efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD, 417 efi.reset_system(reboot_mode ?
418 EFI_RESET_WARM :
419 EFI_RESET_COLD,
405 EFI_SUCCESS, 0, NULL); 420 EFI_SUCCESS, 0, NULL);
421 reboot_type = BOOT_KBD;
422 break;
406 423
424 case BOOT_CF9:
425 port_cf9_safe = true;
426 /* fall through */
427
428 case BOOT_CF9_COND:
429 if (port_cf9_safe) {
430 u8 cf9 = inb(0xcf9) & ~6;
431 outb(cf9|2, 0xcf9); /* Request hard reset */
432 udelay(50);
433 outb(cf9|6, 0xcf9); /* Actually do the reset */
434 udelay(50);
435 }
407 reboot_type = BOOT_KBD; 436 reboot_type = BOOT_KBD;
408 break; 437 break;
409 } 438 }
@@ -464,6 +493,11 @@ static void native_machine_restart(char *__unused)
464 493
465static void native_machine_halt(void) 494static void native_machine_halt(void)
466{ 495{
496 /* stop other cpus and apics */
497 machine_shutdown();
498
499 /* stop this cpu */
500 stop_this_cpu(NULL);
467} 501}
468 502
469static void native_machine_power_off(void) 503static void native_machine_power_off(void)
@@ -558,10 +592,7 @@ static int crash_nmi_callback(struct notifier_block *self,
558 592
559static void smp_send_nmi_allbutself(void) 593static void smp_send_nmi_allbutself(void)
560{ 594{
561 cpumask_t mask = cpu_online_map; 595 send_IPI_allbutself(NMI_VECTOR);
562 cpu_clear(safe_smp_processor_id(), mask);
563 if (!cpus_empty(mask))
564 send_IPI_mask(mask, NMI_VECTOR);
565} 596}
566 597
567static struct notifier_block crash_nmi_nb = { 598static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0fa6790c1dd3..32fe42f26e79 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -583,7 +583,20 @@ static int __init setup_elfcorehdr(char *arg)
583early_param("elfcorehdr", setup_elfcorehdr); 583early_param("elfcorehdr", setup_elfcorehdr);
584#endif 584#endif
585 585
586static struct x86_quirks default_x86_quirks __initdata; 586static int __init default_update_genapic(void)
587{
588#ifdef CONFIG_X86_SMP
589# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
590 genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
591# endif
592#endif
593
594 return 0;
595}
596
597static struct x86_quirks default_x86_quirks __initdata = {
598 .update_genapic = default_update_genapic,
599};
587 600
588struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; 601struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
589 602
@@ -764,7 +777,7 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
764 .callback = dmi_low_memory_corruption, 777 .callback = dmi_low_memory_corruption,
765 .ident = "Phoenix BIOS", 778 .ident = "Phoenix BIOS",
766 .matches = { 779 .matches = {
767 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), 780 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
768 }, 781 },
769 }, 782 },
770#endif 783#endif
@@ -794,6 +807,9 @@ void __init setup_arch(char **cmdline_p)
794 printk(KERN_INFO "Command line: %s\n", boot_command_line); 807 printk(KERN_INFO "Command line: %s\n", boot_command_line);
795#endif 808#endif
796 809
810 /* VMI may relocate the fixmap; do this before touching ioremap area */
811 vmi_init();
812
797 early_cpu_init(); 813 early_cpu_init();
798 early_ioremap_init(); 814 early_ioremap_init();
799 815
@@ -880,13 +896,8 @@ void __init setup_arch(char **cmdline_p)
880 check_efer(); 896 check_efer();
881#endif 897#endif
882 898
883#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) 899 /* Must be before kernel pagetables are setup */
884 /* 900 vmi_activate();
885 * Must be before kernel pagetables are setup
886 * or fixmap area is touched.
887 */
888 vmi_init();
889#endif
890 901
891 /* after early param, so could get panic from serial */ 902 /* after early param, so could get panic from serial */
892 reserve_early_setup_data(); 903 reserve_early_setup_data();
@@ -1082,7 +1093,7 @@ void __init setup_arch(char **cmdline_p)
1082 ioapic_init_mappings(); 1093 ioapic_init_mappings();
1083 1094
1084 /* need to wait for io_apic is mapped */ 1095 /* need to wait for io_apic is mapped */
1085 nr_irqs = probe_nr_irqs(); 1096 probe_nr_irqs_gsi();
1086 1097
1087 kvm_guest_init(); 1098 kvm_guest_init();
1088 1099
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ae0c0d3bb770..0b63b08e7530 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -152,6 +152,11 @@ void __init setup_per_cpu_areas(void)
152 old_size = PERCPU_ENOUGH_ROOM; 152 old_size = PERCPU_ENOUGH_ROOM;
153 align = max_t(unsigned long, PAGE_SIZE, align); 153 align = max_t(unsigned long, PAGE_SIZE, align);
154 size = roundup(old_size, align); 154 size = roundup(old_size, align);
155
156 printk(KERN_INFO
157 "NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
158 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
159
155 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 160 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
156 size); 161 size);
157 162
@@ -168,24 +173,24 @@ void __init setup_per_cpu_areas(void)
168 "cpu %d has no node %d or node-local memory\n", 173 "cpu %d has no node %d or node-local memory\n",
169 cpu, node); 174 cpu, node);
170 if (ptr) 175 if (ptr)
171 printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", 176 printk(KERN_DEBUG
177 "per cpu data for cpu%d at %016lx\n",
172 cpu, __pa(ptr)); 178 cpu, __pa(ptr));
173 } 179 }
174 else { 180 else {
175 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 181 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
176 __pa(MAX_DMA_ADDRESS)); 182 __pa(MAX_DMA_ADDRESS));
177 if (ptr) 183 if (ptr)
178 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", 184 printk(KERN_DEBUG
179 cpu, node, __pa(ptr)); 185 "per cpu data for cpu%d on node%d "
186 "at %016lx\n",
187 cpu, node, __pa(ptr));
180 } 188 }
181#endif 189#endif
182 per_cpu_offset(cpu) = ptr - __per_cpu_start; 190 per_cpu_offset(cpu) = ptr - __per_cpu_start;
183 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 191 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
184 } 192 }
185 193
186 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
187 NR_CPUS, nr_cpu_ids, nr_node_ids);
188
189 /* Setup percpu data maps */ 194 /* Setup percpu data maps */
190 setup_per_cpu_maps(); 195 setup_per_cpu_maps();
191 196
@@ -282,7 +287,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
282 else 287 else
283 cpu_clear(cpu, *mask); 288 cpu_clear(cpu, *mask);
284 289
285 cpulist_scnprintf(buf, sizeof(buf), *mask); 290 cpulist_scnprintf(buf, sizeof(buf), mask);
286 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 291 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
287 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); 292 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
288 } 293 }
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 18f9b19f5f8f..49ed667b06f3 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -118,41 +118,28 @@ static void native_smp_send_reschedule(int cpu)
118 WARN_ON(1); 118 WARN_ON(1);
119 return; 119 return;
120 } 120 }
121 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 121 send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
122} 122}
123 123
124void native_send_call_func_single_ipi(int cpu) 124void native_send_call_func_single_ipi(int cpu)
125{ 125{
126 send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); 126 send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
127} 127}
128 128
129void native_send_call_func_ipi(cpumask_t mask) 129void native_send_call_func_ipi(const struct cpumask *mask)
130{ 130{
131 cpumask_t allbutself; 131 cpumask_t allbutself;
132 132
133 allbutself = cpu_online_map; 133 allbutself = cpu_online_map;
134 cpu_clear(smp_processor_id(), allbutself); 134 cpu_clear(smp_processor_id(), allbutself);
135 135
136 if (cpus_equal(mask, allbutself) && 136 if (cpus_equal(*mask, allbutself) &&
137 cpus_equal(cpu_online_map, cpu_callout_map)) 137 cpus_equal(cpu_online_map, cpu_callout_map))
138 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 138 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
139 else 139 else
140 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 140 send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
141} 141}
142 142
143static void stop_this_cpu(void *dummy)
144{
145 local_irq_disable();
146 /*
147 * Remove this CPU:
148 */
149 cpu_clear(smp_processor_id(), cpu_online_map);
150 disable_local_APIC();
151 if (hlt_works(smp_processor_id()))
152 for (;;) halt();
153 for (;;);
154}
155
156/* 143/*
157 * this function calls the 'stop' function on all other CPUs in the system. 144 * this function calls the 'stop' function on all other CPUs in the system.
158 */ 145 */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7b1093397319..be9466788043 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
62#include <asm/mtrr.h> 62#include <asm/mtrr.h>
63#include <asm/vmi.h> 63#include <asm/vmi.h>
64#include <asm/genapic.h> 64#include <asm/genapic.h>
65#include <asm/setup.h>
65#include <linux/mc146818rtc.h> 66#include <linux/mc146818rtc.h>
66 67
67#include <mach_apic.h> 68#include <mach_apic.h>
@@ -101,14 +102,8 @@ EXPORT_SYMBOL(smp_num_siblings);
101/* Last level cache ID of each logical CPU */ 102/* Last level cache ID of each logical CPU */
102DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; 103DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
103 104
104/* bitmap of online cpus */
105cpumask_t cpu_online_map __read_mostly;
106EXPORT_SYMBOL(cpu_online_map);
107
108cpumask_t cpu_callin_map; 105cpumask_t cpu_callin_map;
109cpumask_t cpu_callout_map; 106cpumask_t cpu_callout_map;
110cpumask_t cpu_possible_map;
111EXPORT_SYMBOL(cpu_possible_map);
112 107
113/* representing HT siblings of each logical CPU */ 108/* representing HT siblings of each logical CPU */
114DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); 109DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
@@ -294,9 +289,7 @@ static void __cpuinit start_secondary(void *unused)
294 * fragile that we want to limit the things done here to the 289 * fragile that we want to limit the things done here to the
295 * most necessary things. 290 * most necessary things.
296 */ 291 */
297#ifdef CONFIG_VMI
298 vmi_bringup(); 292 vmi_bringup();
299#endif
300 cpu_init(); 293 cpu_init();
301 preempt_disable(); 294 preempt_disable();
302 smp_callin(); 295 smp_callin();
@@ -536,7 +529,7 @@ static void impress_friends(void)
536 pr_debug("Before bogocount - setting activated=1.\n"); 529 pr_debug("Before bogocount - setting activated=1.\n");
537} 530}
538 531
539static inline void __inquire_remote_apic(int apicid) 532void __inquire_remote_apic(int apicid)
540{ 533{
541 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; 534 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
542 char *names[] = { "ID", "VERSION", "SPIV" }; 535 char *names[] = { "ID", "VERSION", "SPIV" };
@@ -575,14 +568,13 @@ static inline void __inquire_remote_apic(int apicid)
575 } 568 }
576} 569}
577 570
578#ifdef WAKE_SECONDARY_VIA_NMI
579/* 571/*
580 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal 572 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
581 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this 573 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
582 * won't ... remember to clear down the APIC, etc later. 574 * won't ... remember to clear down the APIC, etc later.
583 */ 575 */
584static int __devinit 576int __devinit
585wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) 577wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
586{ 578{
587 unsigned long send_status, accept_status = 0; 579 unsigned long send_status, accept_status = 0;
588 int maxlvt; 580 int maxlvt;
@@ -599,7 +591,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
599 * Give the other CPU some time to accept the IPI. 591 * Give the other CPU some time to accept the IPI.
600 */ 592 */
601 udelay(200); 593 udelay(200);
602 if (APIC_INTEGRATED(apic_version[phys_apicid])) { 594 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
603 maxlvt = lapic_get_maxlvt(); 595 maxlvt = lapic_get_maxlvt();
604 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 596 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
605 apic_write(APIC_ESR, 0); 597 apic_write(APIC_ESR, 0);
@@ -614,11 +606,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
614 606
615 return (send_status | accept_status); 607 return (send_status | accept_status);
616} 608}
617#endif /* WAKE_SECONDARY_VIA_NMI */
618 609
619#ifdef WAKE_SECONDARY_VIA_INIT 610int __devinit
620static int __devinit 611wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
621wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
622{ 612{
623 unsigned long send_status, accept_status = 0; 613 unsigned long send_status, accept_status = 0;
624 int maxlvt, num_starts, j; 614 int maxlvt, num_starts, j;
@@ -737,7 +727,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
737 727
738 return (send_status | accept_status); 728 return (send_status | accept_status);
739} 729}
740#endif /* WAKE_SECONDARY_VIA_INIT */
741 730
742struct create_idle { 731struct create_idle {
743 struct work_struct work; 732 struct work_struct work;
@@ -1355,7 +1344,7 @@ void cpu_disable_common(void)
1355 lock_vector_lock(); 1344 lock_vector_lock();
1356 remove_cpu_from_maps(cpu); 1345 remove_cpu_from_maps(cpu);
1357 unlock_vector_lock(); 1346 unlock_vector_lock();
1358 fixup_irqs(cpu_online_map); 1347 fixup_irqs();
1359} 1348}
1360 1349
1361int native_cpu_disable(void) 1350int native_cpu_disable(void)
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index a03e7f6d90c3..10786af95545 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -6,6 +6,7 @@
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/stacktrace.h> 7#include <linux/stacktrace.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/uaccess.h>
9#include <asm/stacktrace.h> 10#include <asm/stacktrace.h>
10 11
11static void save_stack_warning(void *data, char *msg) 12static void save_stack_warning(void *data, char *msg)
@@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
83 trace->entries[trace->nr_entries++] = ULONG_MAX; 84 trace->entries[trace->nr_entries++] = ULONG_MAX;
84} 85}
85EXPORT_SYMBOL_GPL(save_stack_trace_tsk); 86EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
87
88/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
89
90struct stack_frame {
91 const void __user *next_fp;
92 unsigned long ret_addr;
93};
94
95static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
96{
97 int ret;
98
99 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
100 return 0;
101
102 ret = 1;
103 pagefault_disable();
104 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
105 ret = 0;
106 pagefault_enable();
107
108 return ret;
109}
110
111static inline void __save_stack_trace_user(struct stack_trace *trace)
112{
113 const struct pt_regs *regs = task_pt_regs(current);
114 const void __user *fp = (const void __user *)regs->bp;
115
116 if (trace->nr_entries < trace->max_entries)
117 trace->entries[trace->nr_entries++] = regs->ip;
118
119 while (trace->nr_entries < trace->max_entries) {
120 struct stack_frame frame;
121
122 frame.next_fp = NULL;
123 frame.ret_addr = 0;
124 if (!copy_stack_frame(fp, &frame))
125 break;
126 if ((unsigned long)fp < regs->sp)
127 break;
128 if (frame.ret_addr) {
129 trace->entries[trace->nr_entries++] =
130 frame.ret_addr;
131 }
132 if (fp == frame.next_fp)
133 break;
134 fp = frame.next_fp;
135 }
136}
137
138void save_stack_trace_user(struct stack_trace *trace)
139{
140 /*
141 * Trace user stack if we are not a kernel thread
142 */
143 if (current->mm) {
144 __save_stack_trace_user(trace);
145 }
146 if (trace->nr_entries < trace->max_entries)
147 trace->entries[trace->nr_entries++] = ULONG_MAX;
148}
149
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index f4049f3513b6..174ea90d1cbd 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -164,7 +164,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
164 * We have to send the IPI only to 164 * We have to send the IPI only to
165 * CPUs affected. 165 * CPUs affected.
166 */ 166 */
167 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); 167 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
168 168
169 while (!cpus_empty(flush_cpumask)) 169 while (!cpus_empty(flush_cpumask))
170 /* nothing. lockup detection does not belong here */ 170 /* nothing. lockup detection does not belong here */
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 8f919ca69494..de6f1bda0c50 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
191 * We have to send the IPI only to 191 * We have to send the IPI only to
192 * CPUs affected. 192 * CPUs affected.
193 */ 193 */
194 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); 194 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
195 195
196 while (!cpus_empty(f->flush_cpumask)) 196 while (!cpus_empty(f->flush_cpumask))
197 cpu_relax(); 197 cpu_relax();
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9ffb01c31c40..1c0dfbca87c1 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -46,7 +46,9 @@ static __cpuinit void check_tsc_warp(void)
46 cycles_t start, now, prev, end; 46 cycles_t start, now, prev, end;
47 int i; 47 int i;
48 48
49 rdtsc_barrier();
49 start = get_cycles(); 50 start = get_cycles();
51 rdtsc_barrier();
50 /* 52 /*
51 * The measurement runs for 20 msecs: 53 * The measurement runs for 20 msecs:
52 */ 54 */
@@ -61,7 +63,9 @@ static __cpuinit void check_tsc_warp(void)
61 */ 63 */
62 __raw_spin_lock(&sync_lock); 64 __raw_spin_lock(&sync_lock);
63 prev = last_tsc; 65 prev = last_tsc;
66 rdtsc_barrier();
64 now = get_cycles(); 67 now = get_cycles();
68 rdtsc_barrier();
65 last_tsc = now; 69 last_tsc = now;
66 __raw_spin_unlock(&sync_lock); 70 __raw_spin_unlock(&sync_lock);
67 71
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 8b6c393ab9fd..22fd6577156a 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -960,8 +960,6 @@ static inline int __init activate_vmi(void)
960 960
961void __init vmi_init(void) 961void __init vmi_init(void)
962{ 962{
963 unsigned long flags;
964
965 if (!vmi_rom) 963 if (!vmi_rom)
966 probe_vmi_rom(); 964 probe_vmi_rom();
967 else 965 else
@@ -973,13 +971,21 @@ void __init vmi_init(void)
973 971
974 reserve_top_address(-vmi_rom->virtual_top); 972 reserve_top_address(-vmi_rom->virtual_top);
975 973
976 local_irq_save(flags);
977 activate_vmi();
978
979#ifdef CONFIG_X86_IO_APIC 974#ifdef CONFIG_X86_IO_APIC
980 /* This is virtual hardware; timer routing is wired correctly */ 975 /* This is virtual hardware; timer routing is wired correctly */
981 no_timer_check = 1; 976 no_timer_check = 1;
982#endif 977#endif
978}
979
980void vmi_activate(void)
981{
982 unsigned long flags;
983
984 if (!vmi_rom)
985 return;
986
987 local_irq_save(flags);
988 activate_vmi();
983 local_irq_restore(flags & X86_EFLAGS_IF); 989 local_irq_restore(flags & X86_EFLAGS_IF);
984} 990}
985 991
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 254ee07f8635..c4c1f9e09402 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
226 /* Upper bound is clockevent's use of ulong for cycle deltas. */ 226 /* Upper bound is clockevent's use of ulong for cycle deltas. */
227 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); 227 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
228 evt->min_delta_ns = clockevent_delta2ns(1, evt); 228 evt->min_delta_ns = clockevent_delta2ns(1, evt);
229 evt->cpumask = cpumask_of_cpu(cpu); 229 evt->cpumask = cpumask_of(cpu);
230 230
231 printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", 231 printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
232 evt->name, evt->mult, evt->shift); 232 evt->name, evt->mult, evt->shift);
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b8b6690a86d..6f3d3d4cd973 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -17,6 +17,9 @@
17 * want per guest time just set the kernel.vsyscall64 sysctl to 0. 17 * want per guest time just set the kernel.vsyscall64 sysctl to 0.
18 */ 18 */
19 19
20/* Disable profiling for userspace code: */
21#define DISABLE_BRANCH_PROFILING
22
20#include <linux/time.h> 23#include <linux/time.h>
21#include <linux/init.h> 24#include <linux/init.h>
22#include <linux/kernel.h> 25#include <linux/kernel.h>
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index b13acb75e822..15c3e6999182 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -310,7 +310,7 @@ static void __init setup_xstate_init(void)
310/* 310/*
311 * Enable and initialize the xsave feature. 311 * Enable and initialize the xsave feature.
312 */ 312 */
313void __init xsave_cntxt_init(void) 313void __ref xsave_cntxt_init(void)
314{ 314{
315 unsigned int eax, ebx, ecx, edx; 315 unsigned int eax, ebx, ecx, edx;
316 316
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ce3251ce5504..b81125f0bdee 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -20,6 +20,8 @@ if VIRTUALIZATION
20config KVM 20config KVM
21 tristate "Kernel-based Virtual Machine (KVM) support" 21 tristate "Kernel-based Virtual Machine (KVM) support"
22 depends on HAVE_KVM 22 depends on HAVE_KVM
23 # for device assignment:
24 depends on PCI
23 select PREEMPT_NOTIFIERS 25 select PREEMPT_NOTIFIERS
24 select MMU_NOTIFIER 26 select MMU_NOTIFIER
25 select ANON_INODES 27 select ANON_INODES
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 8772dc946823..59ebd37ad79e 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -548,8 +548,10 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
548 mutex_lock(&kvm->lock); 548 mutex_lock(&kvm->lock);
549 pit->irq_source_id = kvm_request_irq_source_id(kvm); 549 pit->irq_source_id = kvm_request_irq_source_id(kvm);
550 mutex_unlock(&kvm->lock); 550 mutex_unlock(&kvm->lock);
551 if (pit->irq_source_id < 0) 551 if (pit->irq_source_id < 0) {
552 kfree(pit);
552 return NULL; 553 return NULL;
554 }
553 555
554 mutex_init(&pit->pit_state.lock); 556 mutex_init(&pit->pit_state.lock);
555 mutex_lock(&pit->pit_state.lock); 557 mutex_lock(&pit->pit_state.lock);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2a5e64881d9b..410ddbc1aa2e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -314,7 +314,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
314 if (r) 314 if (r)
315 goto out; 315 goto out;
316 r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, 316 r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
317 rmap_desc_cache, 1); 317 rmap_desc_cache, 4);
318 if (r) 318 if (r)
319 goto out; 319 goto out;
320 r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); 320 r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
@@ -1038,13 +1038,13 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1038 } 1038 }
1039 1039
1040 rmap_write_protect(vcpu->kvm, sp->gfn); 1040 rmap_write_protect(vcpu->kvm, sp->gfn);
1041 kvm_unlink_unsync_page(vcpu->kvm, sp);
1041 if (vcpu->arch.mmu.sync_page(vcpu, sp)) { 1042 if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
1042 kvm_mmu_zap_page(vcpu->kvm, sp); 1043 kvm_mmu_zap_page(vcpu->kvm, sp);
1043 return 1; 1044 return 1;
1044 } 1045 }
1045 1046
1046 kvm_mmu_flush_tlb(vcpu); 1047 kvm_mmu_flush_tlb(vcpu);
1047 kvm_unlink_unsync_page(vcpu->kvm, sp);
1048 return 0; 1048 return 0;
1049} 1049}
1050 1050
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 613ec9aa674a..84eee43bbe74 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -331,6 +331,7 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
331 r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], 331 r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
332 &curr_pte, sizeof(curr_pte)); 332 &curr_pte, sizeof(curr_pte));
333 if (r || curr_pte != gw->ptes[level - 2]) { 333 if (r || curr_pte != gw->ptes[level - 2]) {
334 kvm_mmu_put_page(shadow_page, sptep);
334 kvm_release_pfn_clean(sw->pfn); 335 kvm_release_pfn_clean(sw->pfn);
335 sw->sptep = NULL; 336 sw->sptep = NULL;
336 return 1; 337 return 1;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2643b430d83a..a4018b01e1f9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3149,7 +3149,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3149 3149
3150 if (cpu_has_virtual_nmis()) { 3150 if (cpu_has_virtual_nmis()) {
3151 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 3151 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
3152 if (vmx_nmi_enabled(vcpu)) { 3152 if (vcpu->arch.interrupt.pending) {
3153 enable_nmi_window(vcpu);
3154 } else if (vmx_nmi_enabled(vcpu)) {
3153 vcpu->arch.nmi_pending = false; 3155 vcpu->arch.nmi_pending = false;
3154 vcpu->arch.nmi_injected = true; 3156 vcpu->arch.nmi_injected = true;
3155 } else { 3157 } else {
@@ -3564,7 +3566,8 @@ static int __init vmx_init(void)
3564 bypass_guest_pf = 0; 3566 bypass_guest_pf = 0;
3565 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3567 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3566 VMX_EPT_WRITABLE_MASK | 3568 VMX_EPT_WRITABLE_MASK |
3567 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); 3569 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
3570 VMX_EPT_IGMT_BIT);
3568 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 3571 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3569 VMX_EPT_EXECUTABLE_MASK); 3572 VMX_EPT_EXECUTABLE_MASK);
3570 kvm_enable_tdp(); 3573 kvm_enable_tdp();
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 3e010d21fdd7..ec5edc339da6 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -352,6 +352,7 @@ enum vmcs_field {
352#define VMX_EPT_READABLE_MASK 0x1ull 352#define VMX_EPT_READABLE_MASK 0x1ull
353#define VMX_EPT_WRITABLE_MASK 0x2ull 353#define VMX_EPT_WRITABLE_MASK 0x2ull
354#define VMX_EPT_EXECUTABLE_MASK 0x4ull 354#define VMX_EPT_EXECUTABLE_MASK 0x4ull
355#define VMX_EPT_IGMT_BIT (1ull << 6)
355 356
356#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul 357#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
357 358
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a5d8e1ace1cf..104c8220a383 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -737,7 +737,7 @@ static void lguest_time_init(void)
737 737
738 /* We can't set cpumask in the initializer: damn C limitations! Set it 738 /* We can't set cpumask in the initializer: damn C limitations! Set it
739 * here and register our timer device. */ 739 * here and register our timer device. */
740 lguest_clockevent.cpumask = cpumask_of_cpu(0); 740 lguest_clockevent.cpumask = cpumask_of(0);
741 clockevents_register_device(&lguest_clockevent); 741 clockevents_register_device(&lguest_clockevent);
742 742
743 /* Finally, we unblock the timer interrupt. */ 743 /* Finally, we unblock the timer interrupt. */
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9e68075544f6..4a20b2f9a381 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
39#define __do_strncpy_from_user(dst, src, count, res) \ 39#define __do_strncpy_from_user(dst, src, count, res) \
40do { \ 40do { \
41 int __d0, __d1, __d2; \ 41 int __d0, __d1, __d2; \
42 might_sleep(); \ 42 might_fault(); \
43 __asm__ __volatile__( \ 43 __asm__ __volatile__( \
44 " testl %1,%1\n" \ 44 " testl %1,%1\n" \
45 " jz 2f\n" \ 45 " jz 2f\n" \
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user);
126#define __do_clear_user(addr,size) \ 126#define __do_clear_user(addr,size) \
127do { \ 127do { \
128 int __d0; \ 128 int __d0; \
129 might_sleep(); \ 129 might_fault(); \
130 __asm__ __volatile__( \ 130 __asm__ __volatile__( \
131 "0: rep; stosl\n" \ 131 "0: rep; stosl\n" \
132 " movl %2,%0\n" \ 132 " movl %2,%0\n" \
@@ -155,7 +155,7 @@ do { \
155unsigned long 155unsigned long
156clear_user(void __user *to, unsigned long n) 156clear_user(void __user *to, unsigned long n)
157{ 157{
158 might_sleep(); 158 might_fault();
159 if (access_ok(VERIFY_WRITE, to, n)) 159 if (access_ok(VERIFY_WRITE, to, n))
160 __do_clear_user(to, n); 160 __do_clear_user(to, n);
161 return n; 161 return n;
@@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n)
197 unsigned long mask = -__addr_ok(s); 197 unsigned long mask = -__addr_ok(s);
198 unsigned long res, tmp; 198 unsigned long res, tmp;
199 199
200 might_sleep(); 200 might_fault();
201 201
202 __asm__ __volatile__( 202 __asm__ __volatile__(
203 " testl %0, %0\n" 203 " testl %0, %0\n"
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index f4df6e7c718b..64d6c84e6353 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -15,7 +15,7 @@
15#define __do_strncpy_from_user(dst,src,count,res) \ 15#define __do_strncpy_from_user(dst,src,count,res) \
16do { \ 16do { \
17 long __d0, __d1, __d2; \ 17 long __d0, __d1, __d2; \
18 might_sleep(); \ 18 might_fault(); \
19 __asm__ __volatile__( \ 19 __asm__ __volatile__( \
20 " testq %1,%1\n" \ 20 " testq %1,%1\n" \
21 " jz 2f\n" \ 21 " jz 2f\n" \
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user);
64unsigned long __clear_user(void __user *addr, unsigned long size) 64unsigned long __clear_user(void __user *addr, unsigned long size)
65{ 65{
66 long __d0; 66 long __d0;
67 might_sleep(); 67 might_fault();
68 /* no memory constraint because it doesn't change any memory gcc knows 68 /* no memory constraint because it doesn't change any memory gcc knows
69 about */ 69 about */
70 asm volatile( 70 asm volatile(
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 3c3b471ea496..bc4c7840b2a8 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -17,6 +17,7 @@
17#include <asm/bigsmp/apic.h> 17#include <asm/bigsmp/apic.h>
18#include <asm/bigsmp/ipi.h> 18#include <asm/bigsmp/ipi.h>
19#include <asm/mach-default/mach_mpparse.h> 19#include <asm/mach-default/mach_mpparse.h>
20#include <asm/mach-default/mach_wakecpu.h>
20 21
21static int dmi_bigsmp; /* can be set by dmi scanners */ 22static int dmi_bigsmp; /* can be set by dmi scanners */
22 23
@@ -41,9 +42,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
41 { } 42 { }
42}; 43};
43 44
44static cpumask_t vector_allocation_domain(int cpu) 45static void vector_allocation_domain(int cpu, cpumask_t *retmask)
45{ 46{
46 return cpumask_of_cpu(cpu); 47 cpus_clear(*retmask);
48 cpu_set(cpu, *retmask);
47} 49}
48 50
49static int probe_bigsmp(void) 51static int probe_bigsmp(void)
diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c
index 9e835a11a13a..e63a4a76d8cd 100644
--- a/arch/x86/mach-generic/default.c
+++ b/arch/x86/mach-generic/default.c
@@ -16,6 +16,7 @@
16#include <asm/mach-default/mach_apic.h> 16#include <asm/mach-default/mach_apic.h>
17#include <asm/mach-default/mach_ipi.h> 17#include <asm/mach-default/mach_ipi.h>
18#include <asm/mach-default/mach_mpparse.h> 18#include <asm/mach-default/mach_mpparse.h>
19#include <asm/mach-default/mach_wakecpu.h>
19 20
20/* should be called last. */ 21/* should be called last. */
21static int probe_default(void) 22static int probe_default(void)
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 28459cab3ddb..4ba5ccaa1584 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -16,7 +16,19 @@
16#include <asm/es7000/apic.h> 16#include <asm/es7000/apic.h>
17#include <asm/es7000/ipi.h> 17#include <asm/es7000/ipi.h>
18#include <asm/es7000/mpparse.h> 18#include <asm/es7000/mpparse.h>
19#include <asm/es7000/wakecpu.h> 19#include <asm/mach-default/mach_wakecpu.h>
20
21void __init es7000_update_genapic_to_cluster(void)
22{
23 genapic->target_cpus = target_cpus_cluster;
24 genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER;
25 genapic->int_dest_mode = INT_DEST_MODE_CLUSTER;
26 genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER;
27
28 genapic->init_apic_ldr = init_apic_ldr_cluster;
29
30 genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster;
31}
20 32
21static int probe_es7000(void) 33static int probe_es7000(void)
22{ 34{
@@ -75,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
75} 87}
76#endif 88#endif
77 89
78static cpumask_t vector_allocation_domain(int cpu) 90static void vector_allocation_domain(int cpu, cpumask_t *retmask)
79{ 91{
80 /* Careful. Some cpus do not strictly honor the set of cpus 92 /* Careful. Some cpus do not strictly honor the set of cpus
81 * specified in the interrupt destination when using lowest 93 * specified in the interrupt destination when using lowest
@@ -85,8 +97,7 @@ static cpumask_t vector_allocation_domain(int cpu)
85 * deliver interrupts to the wrong hyperthread when only one 97 * deliver interrupts to the wrong hyperthread when only one
86 * hyperthread was specified in the interrupt desitination. 98 * hyperthread was specified in the interrupt desitination.
87 */ 99 */
88 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 100 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
89 return domain;
90} 101}
91 102
92struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); 103struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 71a309b122e6..511d7941364f 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
38 return 0; 38 return 0;
39} 39}
40 40
41static cpumask_t vector_allocation_domain(int cpu) 41static void vector_allocation_domain(int cpu, cpumask_t *retmask)
42{ 42{
43 /* Careful. Some cpus do not strictly honor the set of cpus 43 /* Careful. Some cpus do not strictly honor the set of cpus
44 * specified in the interrupt destination when using lowest 44 * specified in the interrupt destination when using lowest
@@ -48,8 +48,7 @@ static cpumask_t vector_allocation_domain(int cpu)
48 * deliver interrupts to the wrong hyperthread when only one 48 * deliver interrupts to the wrong hyperthread when only one
49 * hyperthread was specified in the interrupt desitination. 49 * hyperthread was specified in the interrupt desitination.
50 */ 50 */
51 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 51 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
52 return domain;
53} 52}
54 53
55struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); 54struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
index 5a7e4619e1c4..c346d9d0226f 100644
--- a/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@ -15,6 +15,7 @@
15#include <asm/mpspec.h> 15#include <asm/mpspec.h>
16#include <asm/apicdef.h> 16#include <asm/apicdef.h>
17#include <asm/genapic.h> 17#include <asm/genapic.h>
18#include <asm/setup.h>
18 19
19extern struct genapic apic_numaq; 20extern struct genapic apic_numaq;
20extern struct genapic apic_summit; 21extern struct genapic apic_summit;
@@ -57,6 +58,9 @@ static int __init parse_apic(char *arg)
57 } 58 }
58 } 59 }
59 60
61 if (x86_quirks->update_genapic)
62 x86_quirks->update_genapic();
63
60 /* Parsed again by __setup for debug/verbose */ 64 /* Parsed again by __setup for debug/verbose */
61 return 0; 65 return 0;
62} 66}
@@ -72,12 +76,15 @@ void __init generic_bigsmp_probe(void)
72 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support 76 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
73 */ 77 */
74 78
75 if (!cmdline_apic && genapic == &apic_default) 79 if (!cmdline_apic && genapic == &apic_default) {
76 if (apic_bigsmp.probe()) { 80 if (apic_bigsmp.probe()) {
77 genapic = &apic_bigsmp; 81 genapic = &apic_bigsmp;
82 if (x86_quirks->update_genapic)
83 x86_quirks->update_genapic();
78 printk(KERN_INFO "Overriding APIC driver with %s\n", 84 printk(KERN_INFO "Overriding APIC driver with %s\n",
79 genapic->name); 85 genapic->name);
80 } 86 }
87 }
81#endif 88#endif
82} 89}
83 90
@@ -94,6 +101,9 @@ void __init generic_apic_probe(void)
94 /* Not visible without early console */ 101 /* Not visible without early console */
95 if (!apic_probe[i]) 102 if (!apic_probe[i])
96 panic("Didn't find an APIC driver"); 103 panic("Didn't find an APIC driver");
104
105 if (x86_quirks->update_genapic)
106 x86_quirks->update_genapic();
97 } 107 }
98 printk(KERN_INFO "Using APIC driver %s\n", genapic->name); 108 printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
99} 109}
@@ -108,6 +118,8 @@ int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
108 if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { 118 if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) {
109 if (!cmdline_apic) { 119 if (!cmdline_apic) {
110 genapic = apic_probe[i]; 120 genapic = apic_probe[i];
121 if (x86_quirks->update_genapic)
122 x86_quirks->update_genapic();
111 printk(KERN_INFO "Switched to APIC driver `%s'.\n", 123 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
112 genapic->name); 124 genapic->name);
113 } 125 }
@@ -124,6 +136,8 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
124 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { 136 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
125 if (!cmdline_apic) { 137 if (!cmdline_apic) {
126 genapic = apic_probe[i]; 138 genapic = apic_probe[i];
139 if (x86_quirks->update_genapic)
140 x86_quirks->update_genapic();
127 printk(KERN_INFO "Switched to APIC driver `%s'.\n", 141 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
128 genapic->name); 142 genapic->name);
129 } 143 }
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 6272b5e69da6..2821ffc188b5 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -16,6 +16,7 @@
16#include <asm/summit/apic.h> 16#include <asm/summit/apic.h>
17#include <asm/summit/ipi.h> 17#include <asm/summit/ipi.h>
18#include <asm/summit/mpparse.h> 18#include <asm/summit/mpparse.h>
19#include <asm/mach-default/mach_wakecpu.h>
19 20
20static int probe_summit(void) 21static int probe_summit(void)
21{ 22{
@@ -23,7 +24,7 @@ static int probe_summit(void)
23 return 0; 24 return 0;
24} 25}
25 26
26static cpumask_t vector_allocation_domain(int cpu) 27static void vector_allocation_domain(int cpu, cpumask_t *retmask)
27{ 28{
28 /* Careful. Some cpus do not strictly honor the set of cpus 29 /* Careful. Some cpus do not strictly honor the set of cpus
29 * specified in the interrupt destination when using lowest 30 * specified in the interrupt destination when using lowest
@@ -33,8 +34,7 @@ static cpumask_t vector_allocation_domain(int cpu)
33 * deliver interrupts to the wrong hyperthread when only one 34 * deliver interrupts to the wrong hyperthread when only one
34 * hyperthread was specified in the interrupt desitination. 35 * hyperthread was specified in the interrupt desitination.
35 */ 36 */
36 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; 37 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
37 return domain;
38} 38}
39 39
40struct genapic apic_summit = APIC_INIT("summit", probe_summit); 40struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 0e331652681e..a5bc05492b1e 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -7,6 +7,7 @@
7 * This file provides all the same external entries as smp.c but uses 7 * This file provides all the same external entries as smp.c but uses
8 * the voyager hal to provide the functionality 8 * the voyager hal to provide the functionality
9 */ 9 */
10#include <linux/cpu.h>
10#include <linux/module.h> 11#include <linux/module.h>
11#include <linux/mm.h> 12#include <linux/mm.h>
12#include <linux/kernel_stat.h> 13#include <linux/kernel_stat.h>
@@ -62,11 +63,6 @@ static int voyager_extended_cpus = 1;
62/* Used for the invalidate map that's also checked in the spinlock */ 63/* Used for the invalidate map that's also checked in the spinlock */
63static volatile unsigned long smp_invalidate_needed; 64static volatile unsigned long smp_invalidate_needed;
64 65
65/* Bitmask of currently online CPUs - used by setup.c for
66 /proc/cpuinfo, visible externally but still physical */
67cpumask_t cpu_online_map = CPU_MASK_NONE;
68EXPORT_SYMBOL(cpu_online_map);
69
70/* Bitmask of CPUs present in the system - exported by i386_syms.c, used 66/* Bitmask of CPUs present in the system - exported by i386_syms.c, used
71 * by scheduler but indexed physically */ 67 * by scheduler but indexed physically */
72cpumask_t phys_cpu_present_map = CPU_MASK_NONE; 68cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
@@ -217,8 +213,6 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
217/* This is for the new dynamic CPU boot code */ 213/* This is for the new dynamic CPU boot code */
218cpumask_t cpu_callin_map = CPU_MASK_NONE; 214cpumask_t cpu_callin_map = CPU_MASK_NONE;
219cpumask_t cpu_callout_map = CPU_MASK_NONE; 215cpumask_t cpu_callout_map = CPU_MASK_NONE;
220cpumask_t cpu_possible_map = CPU_MASK_NONE;
221EXPORT_SYMBOL(cpu_possible_map);
222 216
223/* The per processor IRQ masks (these are usually kept in sync) */ 217/* The per processor IRQ masks (these are usually kept in sync) */
224static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; 218static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
@@ -678,7 +672,7 @@ void __init smp_boot_cpus(void)
678 672
679 /* loop over all the extended VIC CPUs and boot them. The 673 /* loop over all the extended VIC CPUs and boot them. The
680 * Quad CPUs must be bootstrapped by their extended VIC cpu */ 674 * Quad CPUs must be bootstrapped by their extended VIC cpu */
681 for (i = 0; i < NR_CPUS; i++) { 675 for (i = 0; i < nr_cpu_ids; i++) {
682 if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) 676 if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
683 continue; 677 continue;
684 do_boot_cpu(i); 678 do_boot_cpu(i);
@@ -1790,6 +1784,17 @@ void __init smp_setup_processor_id(void)
1790 x86_write_percpu(cpu_number, hard_smp_processor_id()); 1784 x86_write_percpu(cpu_number, hard_smp_processor_id());
1791} 1785}
1792 1786
1787static void voyager_send_call_func(cpumask_t callmask)
1788{
1789 __u32 mask = cpus_addr(callmask)[0] & ~(1 << smp_processor_id());
1790 send_CPI(mask, VIC_CALL_FUNCTION_CPI);
1791}
1792
1793static void voyager_send_call_func_single(int cpu)
1794{
1795 send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI);
1796}
1797
1793struct smp_ops smp_ops = { 1798struct smp_ops smp_ops = {
1794 .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu, 1799 .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu,
1795 .smp_prepare_cpus = voyager_smp_prepare_cpus, 1800 .smp_prepare_cpus = voyager_smp_prepare_cpus,
@@ -1799,6 +1804,6 @@ struct smp_ops smp_ops = {
1799 .smp_send_stop = voyager_smp_send_stop, 1804 .smp_send_stop = voyager_smp_send_stop,
1800 .smp_send_reschedule = voyager_smp_send_reschedule, 1805 .smp_send_reschedule = voyager_smp_send_reschedule,
1801 1806
1802 .send_call_func_ipi = native_send_call_func_ipi, 1807 .send_call_func_ipi = voyager_send_call_func,
1803 .send_call_func_single_ipi = native_send_call_func_single_ipi, 1808 .send_call_func_single_ipi = voyager_send_call_func_single,
1804}; 1809};
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fea4565ff576..d8cc96a2738f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
8 8
9obj-$(CONFIG_HIGHMEM) += highmem_32.o 9obj-$(CONFIG_HIGHMEM) += highmem_32.o
10 10
11obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
12obj-$(CONFIG_MMIOTRACE) += mmiotrace.o 11obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
13mmiotrace-y := pf_in.o mmio-mod.o 12mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
14obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 13obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
15 14
16obj-$(CONFIG_NUMA) += numa_$(BITS).o 15obj-$(CONFIG_NUMA) += numa_$(BITS).o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 31e8730fa246..21e996a70d68 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -53,7 +53,7 @@
53 53
54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) 54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
55{ 55{
56#ifdef CONFIG_MMIOTRACE_HOOKS 56#ifdef CONFIG_MMIOTRACE
57 if (unlikely(is_kmmio_active())) 57 if (unlikely(is_kmmio_active()))
58 if (kmmio_handler(regs, addr) == 1) 58 if (kmmio_handler(regs, addr) == 1)
59 return -1; 59 return -1;
@@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
413 unsigned long error_code) 413 unsigned long error_code)
414{ 414{
415 unsigned long flags = oops_begin(); 415 unsigned long flags = oops_begin();
416 int sig = SIGKILL;
416 struct task_struct *tsk; 417 struct task_struct *tsk;
417 418
418 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", 419 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
@@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
423 tsk->thread.trap_no = 14; 424 tsk->thread.trap_no = 14;
424 tsk->thread.error_code = error_code; 425 tsk->thread.error_code = error_code;
425 if (__die("Bad pagetable", regs, error_code)) 426 if (__die("Bad pagetable", regs, error_code))
426 regs = NULL; 427 sig = 0;
427 oops_end(flags, regs, SIGKILL); 428 oops_end(flags, regs, sig);
428} 429}
429#endif 430#endif
430 431
@@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
590 int fault; 591 int fault;
591#ifdef CONFIG_X86_64 592#ifdef CONFIG_X86_64
592 unsigned long flags; 593 unsigned long flags;
594 int sig;
593#endif 595#endif
594 596
595 tsk = current; 597 tsk = current;
@@ -849,11 +851,12 @@ no_context:
849 bust_spinlocks(0); 851 bust_spinlocks(0);
850 do_exit(SIGKILL); 852 do_exit(SIGKILL);
851#else 853#else
854 sig = SIGKILL;
852 if (__die("Oops", regs, error_code)) 855 if (__die("Oops", regs, error_code))
853 regs = NULL; 856 sig = 0;
854 /* Executive summary in case the body of the oops scrolled away */ 857 /* Executive summary in case the body of the oops scrolled away */
855 printk(KERN_EMERG "CR2: %016lx\n", address); 858 printk(KERN_EMERG "CR2: %016lx\n", address);
856 oops_end(flags, regs, SIGKILL); 859 oops_end(flags, regs, sig);
857#endif 860#endif
858 861
859/* 862/*
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 847c164725f4..8518c678d83f 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -222,6 +222,41 @@ static void __init remap_numa_kva(void)
222 } 222 }
223} 223}
224 224
225#ifdef CONFIG_HIBERNATION
226/**
227 * resume_map_numa_kva - add KVA mapping to the temporary page tables created
228 * during resume from hibernation
229 * @pgd_base - temporary resume page directory
230 */
231void resume_map_numa_kva(pgd_t *pgd_base)
232{
233 int node;
234
235 for_each_online_node(node) {
236 unsigned long start_va, start_pfn, size, pfn;
237
238 start_va = (unsigned long)node_remap_start_vaddr[node];
239 start_pfn = node_remap_start_pfn[node];
240 size = node_remap_size[node];
241
242 printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node);
243
244 for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
245 unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
246 pgd_t *pgd = pgd_base + pgd_index(vaddr);
247 pud_t *pud = pud_offset(pgd, vaddr);
248 pmd_t *pmd = pmd_offset(pud, vaddr);
249
250 set_pmd(pmd, pfn_pmd(start_pfn + pfn,
251 PAGE_KERNEL_LARGE_EXEC));
252
253 printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
254 __FUNCTION__, vaddr, start_pfn + pfn);
255 }
256 }
257}
258#endif
259
225static unsigned long calculate_numa_remap_pages(void) 260static unsigned long calculate_numa_remap_pages(void)
226{ 261{
227 int nid; 262 int nid;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index cebcbf152d46..71a14f89f89e 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -278,7 +278,7 @@ void __init numa_init_array(void)
278 int rr, i; 278 int rr, i;
279 279
280 rr = first_node(node_online_map); 280 rr = first_node(node_online_map);
281 for (i = 0; i < NR_CPUS; i++) { 281 for (i = 0; i < nr_cpu_ids; i++) {
282 if (early_cpu_to_node(i) != NUMA_NO_NODE) 282 if (early_cpu_to_node(i) != NUMA_NO_NODE)
283 continue; 283 continue;
284 numa_set_node(i, rr); 284 numa_set_node(i, rr);
@@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
549 memnodemap[0] = 0; 549 memnodemap[0] = 0;
550 node_set_online(0); 550 node_set_online(0);
551 node_set(0, node_possible_map); 551 node_set(0, node_possible_map);
552 for (i = 0; i < NR_CPUS; i++) 552 for (i = 0; i < nr_cpu_ids; i++)
553 numa_set_node(i, 0); 553 numa_set_node(i, 0);
554 e820_register_active_regions(0, start_pfn, last_pfn); 554 e820_register_active_regions(0, start_pfn, last_pfn);
555 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); 555 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 51c0a2fc14fe..09737c8af074 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
382 if (!node_online(i)) 382 if (!node_online(i))
383 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 383 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
384 384
385 for (i = 0; i < NR_CPUS; i++) { 385 for (i = 0; i < nr_cpu_ids; i++) {
386 int node = early_cpu_to_node(i); 386 int node = early_cpu_to_node(i);
387 387
388 if (node == NUMA_NO_NODE) 388 if (node == NUMA_NO_NODE)
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 022cd41ea9b4..202864ad49a7 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -401,14 +401,13 @@ static int __init ppro_init(char **cpu_type)
401 *cpu_type = "i386/pii"; 401 *cpu_type = "i386/pii";
402 break; 402 break;
403 case 6 ... 8: 403 case 6 ... 8:
404 case 10 ... 11:
404 *cpu_type = "i386/piii"; 405 *cpu_type = "i386/piii";
405 break; 406 break;
406 case 9: 407 case 9:
408 case 13:
407 *cpu_type = "i386/p6_mobile"; 409 *cpu_type = "i386/p6_mobile";
408 break; 410 break;
409 case 10 ... 13:
410 *cpu_type = "i386/p6";
411 break;
412 case 14: 411 case 14:
413 *cpu_type = "i386/core"; 412 *cpu_type = "i386/core";
414 break; 413 break;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 3f1b81a83e2e..e9f80c744cf3 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -69,7 +69,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
69 int i; 69 int i;
70 70
71 if (!reset_value) { 71 if (!reset_value) {
72 reset_value = kmalloc(sizeof(unsigned) * num_counters, 72 reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
73 GFP_ATOMIC); 73 GFP_ATOMIC);
74 if (!reset_value) 74 if (!reset_value)
75 return; 75 return;
@@ -156,6 +156,8 @@ static void ppro_start(struct op_msrs const * const msrs)
156 unsigned int low, high; 156 unsigned int low, high;
157 int i; 157 int i;
158 158
159 if (!reset_value)
160 return;
159 for (i = 0; i < num_counters; ++i) { 161 for (i = 0; i < num_counters; ++i) {
160 if (reset_value[i]) { 162 if (reset_value[i]) {
161 CTRL_READ(low, high, msrs, i); 163 CTRL_READ(low, high, msrs, i);
@@ -171,6 +173,8 @@ static void ppro_stop(struct op_msrs const * const msrs)
171 unsigned int low, high; 173 unsigned int low, high;
172 int i; 174 int i;
173 175
176 if (!reset_value)
177 return;
174 for (i = 0; i < num_counters; ++i) { 178 for (i = 0; i < num_counters; ++i) {
175 if (!reset_value[i]) 179 if (!reset_value[i])
176 continue; 180 continue;
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 9915293500fb..9a5af6c8fbe9 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -173,7 +173,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus,
173 173
174#undef PCI_CONF2_ADDRESS 174#undef PCI_CONF2_ADDRESS
175 175
176static struct pci_raw_ops pci_direct_conf2 = { 176struct pci_raw_ops pci_direct_conf2 = {
177 .read = pci_conf2_read, 177 .read = pci_conf2_read,
178 .write = pci_conf2_write, 178 .write = pci_conf2_write,
179}; 179};
@@ -289,6 +289,7 @@ int __init pci_direct_probe(void)
289 289
290 if (pci_check_type1()) { 290 if (pci_check_type1()) {
291 raw_pci_ops = &pci_direct_conf1; 291 raw_pci_ops = &pci_direct_conf1;
292 port_cf9_safe = true;
292 return 1; 293 return 1;
293 } 294 }
294 release_resource(region); 295 release_resource(region);
@@ -305,6 +306,7 @@ int __init pci_direct_probe(void)
305 306
306 if (pci_check_type2()) { 307 if (pci_check_type2()) {
307 raw_pci_ops = &pci_direct_conf2; 308 raw_pci_ops = &pci_direct_conf2;
309 port_cf9_safe = true;
308 return 2; 310 return 2;
309 } 311 }
310 312
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 3c27a809393b..2051dc96b8e9 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -496,21 +496,24 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
496 pci_siemens_interrupt_controller); 496 pci_siemens_interrupt_controller);
497 497
498/* 498/*
499 * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config 499 * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have
500 * have 4096 bytes. Even if the device is capable, that doesn't mean we can 500 * 4096 bytes configuration space for each function of their processor
501 * access it. Maybe we don't have a way to generate extended config space 501 * configuration space.
502 * accesses. So check it
503 */ 502 */
504static void fam10h_pci_cfg_space_size(struct pci_dev *dev) 503static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev)
505{ 504{
506 dev->cfg_size = pci_cfg_space_size_ext(dev); 505 dev->cfg_size = pci_cfg_space_size_ext(dev);
507} 506}
508 507DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size);
509DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); 508DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size);
510DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size); 509DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size);
511DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); 510DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size);
512DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); 511DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size);
513DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); 512DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size);
513DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size);
514DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size);
515DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size);
516DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size);
514 517
515/* 518/*
516 * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from 519 * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index 15b9cf6be729..1959018aac02 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -96,6 +96,7 @@ extern struct pci_raw_ops *raw_pci_ops;
96extern struct pci_raw_ops *raw_pci_ext_ops; 96extern struct pci_raw_ops *raw_pci_ext_ops;
97 97
98extern struct pci_raw_ops pci_direct_conf1; 98extern struct pci_raw_ops pci_direct_conf1;
99extern bool port_cf9_safe;
99 100
100/* arch_initcall level */ 101/* arch_initcall level */
101extern int pci_direct_probe(void); 102extern int pci_direct_probe(void);
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index f2b6e3f11bfc..81197c62d5b3 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -12,6 +12,7 @@
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/page.h> 13#include <asm/page.h>
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15#include <asm/mmzone.h>
15 16
16/* Defined in hibernate_asm_32.S */ 17/* Defined in hibernate_asm_32.S */
17extern int restore_image(void); 18extern int restore_image(void);
@@ -127,6 +128,9 @@ static int resume_physical_mapping_init(pgd_t *pgd_base)
127 } 128 }
128 } 129 }
129 } 130 }
131
132 resume_map_numa_kva(pgd_base);
133
130 return 0; 134 return 0;
131} 135}
132 136
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 1ef0f90813d6..d9d35824c56f 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -9,6 +9,9 @@
9 * Also alternative() doesn't work. 9 * Also alternative() doesn't work.
10 */ 10 */
11 11
12/* Disable profiling for userspace code: */
13#define DISABLE_BRANCH_PROFILING
14
12#include <linux/kernel.h> 15#include <linux/kernel.h>
13#include <linux/posix-timers.h> 16#include <linux/posix-timers.h>
14#include <linux/time.h> 17#include <linux/time.h>
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 688936044dc9..e59e53b11e2b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -661,12 +661,11 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
661 * For 64-bit, we must skip the Xen hole in the middle of the address 661 * For 64-bit, we must skip the Xen hole in the middle of the address
662 * space, just after the big x86-64 virtual hole. 662 * space, just after the big x86-64 virtual hole.
663 */ 663 */
664static int xen_pgd_walk(struct mm_struct *mm, 664static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
665 int (*func)(struct mm_struct *mm, struct page *, 665 int (*func)(struct mm_struct *mm, struct page *,
666 enum pt_level), 666 enum pt_level),
667 unsigned long limit) 667 unsigned long limit)
668{ 668{
669 pgd_t *pgd = mm->pgd;
670 int flush = 0; 669 int flush = 0;
671 unsigned hole_low, hole_high; 670 unsigned hole_low, hole_high;
672 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; 671 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
@@ -753,6 +752,14 @@ out:
753 return flush; 752 return flush;
754} 753}
755 754
755static int xen_pgd_walk(struct mm_struct *mm,
756 int (*func)(struct mm_struct *mm, struct page *,
757 enum pt_level),
758 unsigned long limit)
759{
760 return __xen_pgd_walk(mm, mm->pgd, func, limit);
761}
762
756/* If we're using split pte locks, then take the page's lock and 763/* If we're using split pte locks, then take the page's lock and
757 return a pointer to it. Otherwise return NULL. */ 764 return a pointer to it. Otherwise return NULL. */
758static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) 765static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
@@ -854,7 +861,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
854 861
855 xen_mc_batch(); 862 xen_mc_batch();
856 863
857 if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) { 864 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
858 /* re-enable interrupts for flushing */ 865 /* re-enable interrupts for flushing */
859 xen_mc_issue(0); 866 xen_mc_issue(0);
860 867
@@ -998,7 +1005,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
998 PT_PMD); 1005 PT_PMD);
999#endif 1006#endif
1000 1007
1001 xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT); 1008 __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
1002 1009
1003 xen_mc_issue(0); 1010 xen_mc_issue(0);
1004} 1011}
@@ -1072,7 +1079,7 @@ static void drop_other_mm_ref(void *info)
1072 1079
1073static void xen_drop_mm_ref(struct mm_struct *mm) 1080static void xen_drop_mm_ref(struct mm_struct *mm)
1074{ 1081{
1075 cpumask_t mask; 1082 cpumask_var_t mask;
1076 unsigned cpu; 1083 unsigned cpu;
1077 1084
1078 if (current->active_mm == mm) { 1085 if (current->active_mm == mm) {
@@ -1084,7 +1091,16 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
1084 } 1091 }
1085 1092
1086 /* Get the "official" set of cpus referring to our pagetable. */ 1093 /* Get the "official" set of cpus referring to our pagetable. */
1087 mask = mm->cpu_vm_mask; 1094 if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
1095 for_each_online_cpu(cpu) {
1096 if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
1097 && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
1098 continue;
1099 smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
1100 }
1101 return;
1102 }
1103 cpumask_copy(mask, &mm->cpu_vm_mask);
1088 1104
1089 /* It's possible that a vcpu may have a stale reference to our 1105 /* It's possible that a vcpu may have a stale reference to our
1090 cr3, because its in lazy mode, and it hasn't yet flushed 1106 cr3, because its in lazy mode, and it hasn't yet flushed
@@ -1093,11 +1109,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
1093 if needed. */ 1109 if needed. */
1094 for_each_online_cpu(cpu) { 1110 for_each_online_cpu(cpu) {
1095 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) 1111 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
1096 cpu_set(cpu, mask); 1112 cpumask_set_cpu(cpu, mask);
1097 } 1113 }
1098 1114
1099 if (!cpus_empty(mask)) 1115 if (!cpumask_empty(mask))
1100 smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); 1116 smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
1117 free_cpumask_var(mask);
1101} 1118}
1102#else 1119#else
1103static void xen_drop_mm_ref(struct mm_struct *mm) 1120static void xen_drop_mm_ref(struct mm_struct *mm)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d77da613b1d2..c44e2069c7c7 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -33,7 +33,7 @@
33#include "xen-ops.h" 33#include "xen-ops.h"
34#include "mmu.h" 34#include "mmu.h"
35 35
36cpumask_t xen_cpu_initialized_map; 36cpumask_var_t xen_cpu_initialized_map;
37 37
38static DEFINE_PER_CPU(int, resched_irq); 38static DEFINE_PER_CPU(int, resched_irq);
39static DEFINE_PER_CPU(int, callfunc_irq); 39static DEFINE_PER_CPU(int, callfunc_irq);
@@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void)
158{ 158{
159 int i, rc; 159 int i, rc;
160 160
161 for (i = 0; i < NR_CPUS; i++) { 161 for (i = 0; i < nr_cpu_ids; i++) {
162 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); 162 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
163 if (rc >= 0) { 163 if (rc >= 0) {
164 num_processors++; 164 num_processors++;
@@ -192,11 +192,14 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
192 if (xen_smp_intr_init(0)) 192 if (xen_smp_intr_init(0))
193 BUG(); 193 BUG();
194 194
195 xen_cpu_initialized_map = cpumask_of_cpu(0); 195 if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
196 panic("could not allocate xen_cpu_initialized_map\n");
197
198 cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
196 199
197 /* Restrict the possible_map according to max_cpus. */ 200 /* Restrict the possible_map according to max_cpus. */
198 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { 201 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
199 for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) 202 for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
200 continue; 203 continue;
201 cpu_clear(cpu, cpu_possible_map); 204 cpu_clear(cpu, cpu_possible_map);
202 } 205 }
@@ -221,7 +224,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
221 struct vcpu_guest_context *ctxt; 224 struct vcpu_guest_context *ctxt;
222 struct desc_struct *gdt; 225 struct desc_struct *gdt;
223 226
224 if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) 227 if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
225 return 0; 228 return 0;
226 229
227 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); 230 ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
@@ -362,7 +365,7 @@ static void xen_cpu_die(unsigned int cpu)
362 alternatives_smp_switch(0); 365 alternatives_smp_switch(0);
363} 366}
364 367
365static void xen_play_dead(void) 368static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */
366{ 369{
367 play_dead_common(); 370 play_dead_common();
368 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); 371 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
@@ -408,24 +411,23 @@ static void xen_smp_send_reschedule(int cpu)
408 xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); 411 xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
409} 412}
410 413
411static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) 414static void xen_send_IPI_mask(const struct cpumask *mask,
415 enum ipi_vector vector)
412{ 416{
413 unsigned cpu; 417 unsigned cpu;
414 418
415 cpus_and(mask, mask, cpu_online_map); 419 for_each_cpu_and(cpu, mask, cpu_online_mask)
416
417 for_each_cpu_mask_nr(cpu, mask)
418 xen_send_IPI_one(cpu, vector); 420 xen_send_IPI_one(cpu, vector);
419} 421}
420 422
421static void xen_smp_send_call_function_ipi(cpumask_t mask) 423static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
422{ 424{
423 int cpu; 425 int cpu;
424 426
425 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); 427 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
426 428
427 /* Make sure other vcpus get a chance to run if they need to. */ 429 /* Make sure other vcpus get a chance to run if they need to. */
428 for_each_cpu_mask_nr(cpu, mask) { 430 for_each_cpu(cpu, mask) {
429 if (xen_vcpu_stolen(cpu)) { 431 if (xen_vcpu_stolen(cpu)) {
430 HYPERVISOR_sched_op(SCHEDOP_yield, 0); 432 HYPERVISOR_sched_op(SCHEDOP_yield, 0);
431 break; 433 break;
@@ -435,7 +437,8 @@ static void xen_smp_send_call_function_ipi(cpumask_t mask)
435 437
436static void xen_smp_send_call_function_single_ipi(int cpu) 438static void xen_smp_send_call_function_single_ipi(int cpu)
437{ 439{
438 xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); 440 xen_send_IPI_mask(cpumask_of(cpu),
441 XEN_CALL_FUNCTION_SINGLE_VECTOR);
439} 442}
440 443
441static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) 444static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 2a234db5949b..212ffe012b76 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -35,7 +35,8 @@ void xen_post_suspend(int suspend_cancelled)
35 pfn_to_mfn(xen_start_info->console.domU.mfn); 35 pfn_to_mfn(xen_start_info->console.domU.mfn);
36 } else { 36 } else {
37#ifdef CONFIG_SMP 37#ifdef CONFIG_SMP
38 xen_cpu_initialized_map = cpu_online_map; 38 BUG_ON(xen_cpu_initialized_map == NULL);
39 cpumask_copy(xen_cpu_initialized_map, cpu_online_mask);
39#endif 40#endif
40 xen_vcpu_restore(); 41 xen_vcpu_restore();
41 } 42 }
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c9f7cda48ed7..65d75a6be0ba 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -437,7 +437,7 @@ void xen_setup_timer(int cpu)
437 evt = &per_cpu(xen_clock_events, cpu); 437 evt = &per_cpu(xen_clock_events, cpu);
438 memcpy(evt, xen_clockevent, sizeof(*evt)); 438 memcpy(evt, xen_clockevent, sizeof(*evt));
439 439
440 evt->cpumask = cpumask_of_cpu(cpu); 440 evt->cpumask = cpumask_of(cpu);
441 evt->irq = irq; 441 evt->irq = irq;
442 442
443 setup_runstate_info(cpu); 443 setup_runstate_info(cpu);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d7422dc2a55c..c1f8faf0a2c5 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -49,7 +49,7 @@ bool xen_vcpu_stolen(int vcpu);
49 49
50void xen_mark_init_mm_pinned(void); 50void xen_mark_init_mm_pinned(void);
51 51
52void __init xen_setup_vcpu_info_placement(void); 52void xen_setup_vcpu_info_placement(void);
53 53
54#ifdef CONFIG_SMP 54#ifdef CONFIG_SMP
55void xen_smp_init(void); 55void xen_smp_init(void);
@@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void);
58__cpuinit void xen_init_lock_cpu(int cpu); 58__cpuinit void xen_init_lock_cpu(int cpu);
59void xen_uninit_lock_cpu(int cpu); 59void xen_uninit_lock_cpu(int cpu);
60 60
61extern cpumask_t xen_cpu_initialized_map; 61extern cpumask_var_t xen_cpu_initialized_map;
62#else 62#else
63static inline void xen_smp_init(void) {} 63static inline void xen_smp_init(void) {}
64#endif 64#endif