aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig26
-rw-r--r--arch/x86/Kconfig.cpu1
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/boot/tty.c2
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h24
-rw-r--r--arch/x86/include/asm/bigsmp/apic.h2
-rw-r--r--arch/x86/include/asm/dma-mapping.h6
-rw-r--r--arch/x86/include/asm/ds.h140
-rw-r--r--arch/x86/include/asm/es7000/apic.h79
-rw-r--r--arch/x86/include/asm/es7000/wakecpu.h41
-rw-r--r--arch/x86/include/asm/ftrace.h34
-rw-r--r--arch/x86/include/asm/genapic_32.h19
-rw-r--r--arch/x86/include/asm/genapic_64.h2
-rw-r--r--arch/x86/include/asm/io_apic.h9
-rw-r--r--arch/x86/include/asm/iomap.h30
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/include/asm/irq_vectors.h11
-rw-r--r--arch/x86/include/asm/mach-default/mach_apic.h2
-rw-r--r--arch/x86/include/asm/mach-default/mach_wakecpu.h24
-rw-r--r--arch/x86/include/asm/mach-default/smpboot_hooks.h8
-rw-r--r--arch/x86/include/asm/mach-generic/mach_apic.h1
-rw-r--r--arch/x86/include/asm/mach-generic/mach_wakecpu.h12
-rw-r--r--arch/x86/include/asm/mmzone_32.h4
-rw-r--r--arch/x86/include/asm/numaq/wakecpu.h24
-rw-r--r--arch/x86/include/asm/pci_64.h14
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/setup.h3
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/topology.h2
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h8
-rw-r--r--arch/x86/include/asm/uaccess_64.h8
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c12
-rw-r--r--arch/x86/kernel/amd_iommu.c51
-rw-r--r--arch/x86/kernel/amd_iommu_init.c6
-rw-r--r--arch/x86/kernel/apic.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c18
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h17
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/ds.c741
-rw-r--r--arch/x86/kernel/dumpstack.c351
-rw-r--r--arch/x86/kernel/dumpstack.h39
-rw-r--r--arch/x86/kernel/dumpstack_32.c307
-rw-r--r--arch/x86/kernel/dumpstack_64.c289
-rw-r--r--arch/x86/kernel/early-quirks.c18
-rw-r--r--arch/x86/kernel/entry_32.S51
-rw-r--r--arch/x86/kernel/entry_64.S83
-rw-r--r--arch/x86/kernel/es7000_32.c71
-rw-r--r--arch/x86/kernel/ftrace.c390
-rw-r--r--arch/x86/kernel/genapic_64.c4
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/io_apic.c667
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/irqinit_32.c3
-rw-r--r--arch/x86/kernel/irqinit_64.c3
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/mpparse.c24
-rw-r--r--arch/x86/kernel/numaq_32.c10
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c3
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/pci-gart_64.c2
-rw-r--r--arch/x86/kernel/process.c16
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c98
-rw-r--r--arch/x86/kernel/reboot.c9
-rw-r--r--arch/x86/kernel/setup.c19
-rw-r--r--arch/x86/kernel/smpboot.c17
-rw-r--r--arch/x86/kernel/stacktrace.c64
-rw-r--r--arch/x86/kernel/tsc_sync.c4
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/i8254.c4
-rw-r--r--arch/x86/kvm/mmu.c4
-rw-r--r--arch/x86/kvm/paging_tmpl.h1
-rw-r--r--arch/x86/kvm/vmx.c7
-rw-r--r--arch/x86/kvm/vmx.h1
-rw-r--r--arch/x86/lib/usercopy_32.c8
-rw-r--r--arch/x86/lib/usercopy_64.c4
-rw-r--r--arch/x86/mach-generic/bigsmp.c1
-rw-r--r--arch/x86/mach-generic/default.c1
-rw-r--r--arch/x86/mach-generic/es7000.c14
-rw-r--r--arch/x86/mach-generic/probe.c16
-rw-r--r--arch/x86/mach-generic/summit.c1
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c16
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c13
-rw-r--r--arch/x86/mm/numa_32.c35
-rw-r--r--arch/x86/oprofile/nmi_int.c5
-rw-r--r--arch/x86/oprofile/op_model_ppro.c6
-rw-r--r--arch/x86/pci/fixup.c25
-rw-r--r--arch/x86/power/hibernate_32.c4
-rw-r--r--arch/x86/vdso/vclock_gettime.c3
-rw-r--r--arch/x86/xen/mmu.c21
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/x86/xen/xen-ops.h2
104 files changed, 2502 insertions, 1680 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ebcad15ccf35..d99eeb7915c6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,11 +29,14 @@ config X86
29 select HAVE_FTRACE_MCOUNT_RECORD 29 select HAVE_FTRACE_MCOUNT_RECORD
30 select HAVE_DYNAMIC_FTRACE 30 select HAVE_DYNAMIC_FTRACE
31 select HAVE_FUNCTION_TRACER 31 select HAVE_FUNCTION_TRACER
32 select HAVE_FUNCTION_GRAPH_TRACER
33 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
32 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 34 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
33 select HAVE_ARCH_KGDB if !X86_VOYAGER 35 select HAVE_ARCH_KGDB if !X86_VOYAGER
34 select HAVE_ARCH_TRACEHOOK 36 select HAVE_ARCH_TRACEHOOK
35 select HAVE_GENERIC_DMA_COHERENT if X86_32 37 select HAVE_GENERIC_DMA_COHERENT if X86_32
36 select HAVE_EFFICIENT_UNALIGNED_ACCESS 38 select HAVE_EFFICIENT_UNALIGNED_ACCESS
39 select USER_STACKTRACE_SUPPORT
37 40
38config ARCH_DEFCONFIG 41config ARCH_DEFCONFIG
39 string 42 string
@@ -167,9 +170,12 @@ config GENERIC_PENDING_IRQ
167config X86_SMP 170config X86_SMP
168 bool 171 bool
169 depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64) 172 depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
170 select USE_GENERIC_SMP_HELPERS
171 default y 173 default y
172 174
175config USE_GENERIC_SMP_HELPERS
176 def_bool y
177 depends on SMP
178
173config X86_32_SMP 179config X86_32_SMP
174 def_bool y 180 def_bool y
175 depends on X86_32 && SMP 181 depends on X86_32 && SMP
@@ -235,6 +241,16 @@ config X86_HAS_BOOT_CPU_ID
235 def_bool y 241 def_bool y
236 depends on X86_VOYAGER 242 depends on X86_VOYAGER
237 243
244config SPARSE_IRQ
245 bool "Support sparse irq numbering"
246 depends on (PCI_MSI || HT_IRQ) && SMP
247 default y
248 help
249 This enables support for sparse irq, esp for msi/msi-x. You may need
250 if you have lots of cards supports msi-x installed.
251
252 If you don't know what to do here, say Y.
253
238config X86_FIND_SMP_CONFIG 254config X86_FIND_SMP_CONFIG
239 def_bool y 255 def_bool y
240 depends on X86_MPPARSE || X86_VOYAGER 256 depends on X86_MPPARSE || X86_VOYAGER
@@ -364,10 +380,10 @@ config X86_RDC321X
364 as R-8610-(G). 380 as R-8610-(G).
365 If you don't have one of these chips, you should say N here. 381 If you don't have one of these chips, you should say N here.
366 382
367config SCHED_NO_NO_OMIT_FRAME_POINTER 383config SCHED_OMIT_FRAME_POINTER
368 def_bool y 384 def_bool y
369 prompt "Single-depth WCHAN output" 385 prompt "Single-depth WCHAN output"
370 depends on X86_32 386 depends on X86
371 help 387 help
372 Calculate simpler /proc/<PID>/wchan values. If this option 388 Calculate simpler /proc/<PID>/wchan values. If this option
373 is disabled then wchan values will recurse back to the 389 is disabled then wchan values will recurse back to the
@@ -462,10 +478,6 @@ config X86_CYCLONE_TIMER
462 def_bool y 478 def_bool y
463 depends on X86_GENERICARCH 479 depends on X86_GENERICARCH
464 480
465config ES7000_CLUSTERED_APIC
466 def_bool y
467 depends on SMP && X86_ES7000 && MPENTIUMIII
468
469source "arch/x86/Kconfig.cpu" 481source "arch/x86/Kconfig.cpu"
470 482
471config HPET_TIMER 483config HPET_TIMER
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b815664fe370..85a78575956c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -515,6 +515,7 @@ config CPU_SUP_UMC_32
515config X86_DS 515config X86_DS
516 def_bool X86_PTRACE_BTS 516 def_bool X86_PTRACE_BTS
517 depends on X86_DEBUGCTLMSR 517 depends on X86_DEBUGCTLMSR
518 select HAVE_HW_BRANCH_TRACER
518 519
519config X86_PTRACE_BTS 520config X86_PTRACE_BTS
520 bool "Branch Trace Store" 521 bool "Branch Trace Store"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2a3dfbd5e677..fa013f529b74 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -186,14 +186,10 @@ config IOMMU_LEAK
186 Add a simple leak tracer to the IOMMU code. This is useful when you 186 Add a simple leak tracer to the IOMMU code. This is useful when you
187 are debugging a buggy device driver that leaks IOMMU mappings. 187 are debugging a buggy device driver that leaks IOMMU mappings.
188 188
189config MMIOTRACE_HOOKS
190 bool
191
192config MMIOTRACE 189config MMIOTRACE
193 bool "Memory mapped IO tracing" 190 bool "Memory mapped IO tracing"
194 depends on DEBUG_KERNEL && PCI 191 depends on DEBUG_KERNEL && PCI
195 select TRACING 192 select TRACING
196 select MMIOTRACE_HOOKS
197 help 193 help
198 Mmiotrace traces Memory Mapped I/O access and is meant for 194 Mmiotrace traces Memory Mapped I/O access and is meant for
199 debugging and reverse engineering. It is called from the ioremap 195 debugging and reverse engineering. It is called from the ioremap
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 0be77b39328a..7e8e8b25f5f6 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -74,7 +74,7 @@ static int kbd_pending(void)
74{ 74{
75 u8 pending; 75 u8 pending;
76 asm volatile("int $0x16; setnz %0" 76 asm volatile("int $0x16; setnz %0"
77 : "=rm" (pending) 77 : "=qm" (pending)
78 : "a" (0x0100)); 78 : "a" (0x0100));
79 return pending; 79 return pending;
80} 80}
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 8d676d8ecde9..9830681446ad 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -113,7 +113,6 @@ static inline void acpi_disable_pci(void)
113 acpi_pci_disabled = 1; 113 acpi_pci_disabled = 1;
114 acpi_noirq_set(); 114 acpi_noirq_set();
115} 115}
116extern int acpi_irq_balance_set(char *str);
117 116
118/* routines for saving/restoring kernel state */ 117/* routines for saving/restoring kernel state */
119extern int acpi_save_state_mem(void); 118extern int acpi_save_state_mem(void);
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 1a30c0440c6b..ac302a2fa339 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -251,13 +251,6 @@ struct amd_iommu {
251 /* Pointer to PCI device of this IOMMU */ 251 /* Pointer to PCI device of this IOMMU */
252 struct pci_dev *dev; 252 struct pci_dev *dev;
253 253
254 /*
255 * Capability pointer. There could be more than one IOMMU per PCI
256 * device function if there are more than one AMD IOMMU capability
257 * pointers.
258 */
259 u16 cap_ptr;
260
261 /* physical address of MMIO space */ 254 /* physical address of MMIO space */
262 u64 mmio_phys; 255 u64 mmio_phys;
263 /* virtual address of MMIO space */ 256 /* virtual address of MMIO space */
@@ -266,6 +259,13 @@ struct amd_iommu {
266 /* capabilities of that IOMMU read from ACPI */ 259 /* capabilities of that IOMMU read from ACPI */
267 u32 cap; 260 u32 cap;
268 261
262 /*
263 * Capability pointer. There could be more than one IOMMU per PCI
264 * device function if there are more than one AMD IOMMU capability
265 * pointers.
266 */
267 u16 cap_ptr;
268
269 /* pci domain of this IOMMU */ 269 /* pci domain of this IOMMU */
270 u16 pci_seg; 270 u16 pci_seg;
271 271
@@ -284,19 +284,19 @@ struct amd_iommu {
284 /* size of command buffer */ 284 /* size of command buffer */
285 u32 cmd_buf_size; 285 u32 cmd_buf_size;
286 286
287 /* event buffer virtual address */
288 u8 *evt_buf;
289 /* size of event buffer */ 287 /* size of event buffer */
290 u32 evt_buf_size; 288 u32 evt_buf_size;
289 /* event buffer virtual address */
290 u8 *evt_buf;
291 /* MSI number for event interrupt */ 291 /* MSI number for event interrupt */
292 u16 evt_msi_num; 292 u16 evt_msi_num;
293 293
294 /* if one, we need to send a completion wait command */
295 int need_sync;
296
297 /* true if interrupts for this IOMMU are already enabled */ 294 /* true if interrupts for this IOMMU are already enabled */
298 bool int_enabled; 295 bool int_enabled;
299 296
297 /* if one, we need to send a completion wait command */
298 int need_sync;
299
300 /* default dma_ops domain for that IOMMU */ 300 /* default dma_ops domain for that IOMMU */
301 struct dma_ops_domain *default_dom; 301 struct dma_ops_domain *default_dom;
302}; 302};
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index 1d9543b9d358..ce547f24a1cd 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -24,8 +24,6 @@ static inline cpumask_t target_cpus(void)
24#define INT_DELIVERY_MODE (dest_Fixed) 24#define INT_DELIVERY_MODE (dest_Fixed)
25#define INT_DEST_MODE (0) /* phys delivery to target proc */ 25#define INT_DEST_MODE (0) /* phys delivery to target proc */
26#define NO_BALANCE_IRQ (0) 26#define NO_BALANCE_IRQ (0)
27#define WAKE_SECONDARY_VIA_INIT
28
29 27
30static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 28static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
31{ 29{
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 7f225a4b2a26..097794ff6b79 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -71,15 +71,13 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
71/* Make sure we keep the same behaviour */ 71/* Make sure we keep the same behaviour */
72static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 72static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
73{ 73{
74#ifdef CONFIG_X86_32 74#ifdef CONFIG_X86_64
75 return 0;
76#else
77 struct dma_mapping_ops *ops = get_dma_ops(dev); 75 struct dma_mapping_ops *ops = get_dma_ops(dev);
78 if (ops->mapping_error) 76 if (ops->mapping_error)
79 return ops->mapping_error(dev, dma_addr); 77 return ops->mapping_error(dev, dma_addr);
80 78
81 return (dma_addr == bad_dma_address);
82#endif 79#endif
80 return (dma_addr == bad_dma_address);
83} 81}
84 82
85#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) 83#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 72c5a190bf48..99b6c39774a4 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -7,13 +7,12 @@
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It assumes:
15 * - get_task_struct on all parameter tasks 14 * - get_task_struct on all traced tasks
16 * - current is allowed to trace parameter tasks 15 * - current is allowed to trace tasks
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -23,13 +22,21 @@
23#ifndef _ASM_X86_DS_H 22#ifndef _ASM_X86_DS_H
24#define _ASM_X86_DS_H 23#define _ASM_X86_DS_H
25 24
26#ifdef CONFIG_X86_DS
27 25
28#include <linux/types.h> 26#include <linux/types.h>
29#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/err.h>
29
30 30
31#ifdef CONFIG_X86_DS
31 32
32struct task_struct; 33struct task_struct;
34struct ds_tracer;
35struct bts_tracer;
36struct pebs_tracer;
37
38typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
39typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
33 40
34/* 41/*
35 * Request BTS or PEBS 42 * Request BTS or PEBS
@@ -37,60 +44,62 @@ struct task_struct;
37 * Due to alignement constraints, the actual buffer may be slightly 44 * Due to alignement constraints, the actual buffer may be slightly
38 * smaller than the requested or provided buffer. 45 * smaller than the requested or provided buffer.
39 * 46 *
40 * Returns 0 on success; -Eerrno otherwise 47 * Returns a pointer to a tracer structure on success, or
48 * ERR_PTR(errcode) on failure.
49 *
50 * The interrupt threshold is independent from the overflow callback
51 * to allow users to use their own overflow interrupt handling mechanism.
41 * 52 *
42 * task: the task to request recording for; 53 * task: the task to request recording for;
43 * NULL for per-cpu recording on the current cpu 54 * NULL for per-cpu recording on the current cpu
44 * base: the base pointer for the (non-pageable) buffer; 55 * base: the base pointer for the (non-pageable) buffer;
45 * NULL if buffer allocation requested 56 * size: the size of the provided buffer in bytes
46 * size: the size of the requested or provided buffer
47 * ovfl: pointer to a function to be called on buffer overflow; 57 * ovfl: pointer to a function to be called on buffer overflow;
48 * NULL if cyclic buffer requested 58 * NULL if cyclic buffer requested
59 * th: the interrupt threshold in records from the end of the buffer;
60 * -1 if no interrupt threshold is requested.
49 */ 61 */
50typedef void (*ds_ovfl_callback_t)(struct task_struct *); 62extern struct bts_tracer *ds_request_bts(struct task_struct *task,
51extern int ds_request_bts(struct task_struct *task, void *base, size_t size, 63 void *base, size_t size,
52 ds_ovfl_callback_t ovfl); 64 bts_ovfl_callback_t ovfl, size_t th);
53extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, 65extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
54 ds_ovfl_callback_t ovfl); 66 void *base, size_t size,
67 pebs_ovfl_callback_t ovfl,
68 size_t th);
55 69
56/* 70/*
57 * Release BTS or PEBS resources 71 * Release BTS or PEBS resources
58 * 72 *
59 * Frees buffers allocated on ds_request.
60 *
61 * Returns 0 on success; -Eerrno otherwise 73 * Returns 0 on success; -Eerrno otherwise
62 * 74 *
63 * task: the task to release resources for; 75 * tracer: the tracer handle returned from ds_request_~()
64 * NULL to release resources for the current cpu
65 */ 76 */
66extern int ds_release_bts(struct task_struct *task); 77extern int ds_release_bts(struct bts_tracer *tracer);
67extern int ds_release_pebs(struct task_struct *task); 78extern int ds_release_pebs(struct pebs_tracer *tracer);
68 79
69/* 80/*
70 * Return the (array) index of the write pointer. 81 * Get the (array) index of the write pointer.
71 * (assuming an array of BTS/PEBS records) 82 * (assuming an array of BTS/PEBS records)
72 * 83 *
73 * Returns -Eerrno on error 84 * Returns 0 on success; -Eerrno on error
74 * 85 *
75 * task: the task to access; 86 * tracer: the tracer handle returned from ds_request_~()
76 * NULL to access the current cpu 87 * pos (out): will hold the result
77 * pos (out): if not NULL, will hold the result
78 */ 88 */
79extern int ds_get_bts_index(struct task_struct *task, size_t *pos); 89extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos);
80extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); 90extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos);
81 91
82/* 92/*
83 * Return the (array) index one record beyond the end of the array. 93 * Get the (array) index one record beyond the end of the array.
84 * (assuming an array of BTS/PEBS records) 94 * (assuming an array of BTS/PEBS records)
85 * 95 *
86 * Returns -Eerrno on error 96 * Returns 0 on success; -Eerrno on error
87 * 97 *
88 * task: the task to access; 98 * tracer: the tracer handle returned from ds_request_~()
89 * NULL to access the current cpu 99 * pos (out): will hold the result
90 * pos (out): if not NULL, will hold the result
91 */ 100 */
92extern int ds_get_bts_end(struct task_struct *task, size_t *pos); 101extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos);
93extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); 102extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos);
94 103
95/* 104/*
96 * Provide a pointer to the BTS/PEBS record at parameter index. 105 * Provide a pointer to the BTS/PEBS record at parameter index.
@@ -101,14 +110,13 @@ extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
101 * 110 *
102 * Returns the size of a single record on success; -Eerrno on error 111 * Returns the size of a single record on success; -Eerrno on error
103 * 112 *
104 * task: the task to access; 113 * tracer: the tracer handle returned from ds_request_~()
105 * NULL to access the current cpu
106 * index: the index of the requested record 114 * index: the index of the requested record
107 * record (out): pointer to the requested record 115 * record (out): pointer to the requested record
108 */ 116 */
109extern int ds_access_bts(struct task_struct *task, 117extern int ds_access_bts(struct bts_tracer *tracer,
110 size_t index, const void **record); 118 size_t index, const void **record);
111extern int ds_access_pebs(struct task_struct *task, 119extern int ds_access_pebs(struct pebs_tracer *tracer,
112 size_t index, const void **record); 120 size_t index, const void **record);
113 121
114/* 122/*
@@ -128,38 +136,24 @@ extern int ds_access_pebs(struct task_struct *task,
128 * 136 *
129 * Returns the number of bytes written or -Eerrno. 137 * Returns the number of bytes written or -Eerrno.
130 * 138 *
131 * task: the task to access; 139 * tracer: the tracer handle returned from ds_request_~()
132 * NULL to access the current cpu
133 * buffer: the buffer to write 140 * buffer: the buffer to write
134 * size: the size of the buffer 141 * size: the size of the buffer
135 */ 142 */
136extern int ds_write_bts(struct task_struct *task, 143extern int ds_write_bts(struct bts_tracer *tracer,
137 const void *buffer, size_t size); 144 const void *buffer, size_t size);
138extern int ds_write_pebs(struct task_struct *task, 145extern int ds_write_pebs(struct pebs_tracer *tracer,
139 const void *buffer, size_t size); 146 const void *buffer, size_t size);
140 147
141/* 148/*
142 * Same as ds_write_bts/pebs, but omit ownership checks.
143 *
144 * This is needed to have some other task than the owner of the
145 * BTS/PEBS buffer or the parameter task itself write into the
146 * respective buffer.
147 */
148extern int ds_unchecked_write_bts(struct task_struct *task,
149 const void *buffer, size_t size);
150extern int ds_unchecked_write_pebs(struct task_struct *task,
151 const void *buffer, size_t size);
152
153/*
154 * Reset the write pointer of the BTS/PEBS buffer. 149 * Reset the write pointer of the BTS/PEBS buffer.
155 * 150 *
156 * Returns 0 on success; -Eerrno on error 151 * Returns 0 on success; -Eerrno on error
157 * 152 *
158 * task: the task to access; 153 * tracer: the tracer handle returned from ds_request_~()
159 * NULL to access the current cpu
160 */ 154 */
161extern int ds_reset_bts(struct task_struct *task); 155extern int ds_reset_bts(struct bts_tracer *tracer);
162extern int ds_reset_pebs(struct task_struct *task); 156extern int ds_reset_pebs(struct pebs_tracer *tracer);
163 157
164/* 158/*
165 * Clear the BTS/PEBS buffer and reset the write pointer. 159 * Clear the BTS/PEBS buffer and reset the write pointer.
@@ -167,33 +161,30 @@ extern int ds_reset_pebs(struct task_struct *task);
167 * 161 *
168 * Returns 0 on success; -Eerrno on error 162 * Returns 0 on success; -Eerrno on error
169 * 163 *
170 * task: the task to access; 164 * tracer: the tracer handle returned from ds_request_~()
171 * NULL to access the current cpu
172 */ 165 */
173extern int ds_clear_bts(struct task_struct *task); 166extern int ds_clear_bts(struct bts_tracer *tracer);
174extern int ds_clear_pebs(struct task_struct *task); 167extern int ds_clear_pebs(struct pebs_tracer *tracer);
175 168
176/* 169/*
177 * Provide the PEBS counter reset value. 170 * Provide the PEBS counter reset value.
178 * 171 *
179 * Returns 0 on success; -Eerrno on error 172 * Returns 0 on success; -Eerrno on error
180 * 173 *
181 * task: the task to access; 174 * tracer: the tracer handle returned from ds_request_pebs()
182 * NULL to access the current cpu
183 * value (out): the counter reset value 175 * value (out): the counter reset value
184 */ 176 */
185extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); 177extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value);
186 178
187/* 179/*
188 * Set the PEBS counter reset value. 180 * Set the PEBS counter reset value.
189 * 181 *
190 * Returns 0 on success; -Eerrno on error 182 * Returns 0 on success; -Eerrno on error
191 * 183 *
192 * task: the task to access; 184 * tracer: the tracer handle returned from ds_request_pebs()
193 * NULL to access the current cpu
194 * value: the new counter reset value 185 * value: the new counter reset value
195 */ 186 */
196extern int ds_set_pebs_reset(struct task_struct *task, u64 value); 187extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
197 188
198/* 189/*
199 * Initialization 190 * Initialization
@@ -206,17 +197,13 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
206/* 197/*
207 * The DS context - part of struct thread_struct. 198 * The DS context - part of struct thread_struct.
208 */ 199 */
200#define MAX_SIZEOF_DS (12 * 8)
201
209struct ds_context { 202struct ds_context {
210 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ 203 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
211 unsigned char *ds; 204 unsigned char ds[MAX_SIZEOF_DS];
212 /* the owner of the BTS and PEBS configuration, respectively */ 205 /* the owner of the BTS and PEBS configuration, respectively */
213 struct task_struct *owner[2]; 206 struct ds_tracer *owner[2];
214 /* buffer overflow notification function for BTS and PEBS */
215 ds_ovfl_callback_t callback[2];
216 /* the original buffer address */
217 void *buffer[2];
218 /* the number of allocated pages for on-request allocated buffers */
219 unsigned int pages[2];
220 /* use count */ 207 /* use count */
221 unsigned long count; 208 unsigned long count;
222 /* a pointer to the context location inside the thread_struct 209 /* a pointer to the context location inside the thread_struct
@@ -232,7 +219,8 @@ extern void ds_free(struct ds_context *context);
232 219
233#else /* CONFIG_X86_DS */ 220#else /* CONFIG_X86_DS */
234 221
235#define ds_init_intel(config) do {} while (0) 222struct cpuinfo_x86;
223static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
236 224
237#endif /* CONFIG_X86_DS */ 225#endif /* CONFIG_X86_DS */
238#endif /* _ASM_X86_DS_H */ 226#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index 380f0b4f17ed..e24ef876915f 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -9,31 +9,27 @@ static inline int apic_id_registered(void)
9 return (1); 9 return (1);
10} 10}
11 11
12static inline cpumask_t target_cpus(void) 12static inline cpumask_t target_cpus_cluster(void)
13{ 13{
14#if defined CONFIG_ES7000_CLUSTERED_APIC
15 return CPU_MASK_ALL; 14 return CPU_MASK_ALL;
16#else 15}
16
17static inline cpumask_t target_cpus(void)
18{
17 return cpumask_of_cpu(smp_processor_id()); 19 return cpumask_of_cpu(smp_processor_id());
18#endif
19} 20}
20 21
21#if defined CONFIG_ES7000_CLUSTERED_APIC 22#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER)
22#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 23#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio)
23#define INT_DELIVERY_MODE (dest_LowestPrio) 24#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */
24#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ 25#define NO_BALANCE_IRQ_CLUSTER (1)
25#define NO_BALANCE_IRQ (1) 26
26#undef WAKE_SECONDARY_VIA_INIT
27#define WAKE_SECONDARY_VIA_MIP
28#else
29#define APIC_DFR_VALUE (APIC_DFR_FLAT) 27#define APIC_DFR_VALUE (APIC_DFR_FLAT)
30#define INT_DELIVERY_MODE (dest_Fixed) 28#define INT_DELIVERY_MODE (dest_Fixed)
31#define INT_DEST_MODE (0) /* phys delivery to target procs */ 29#define INT_DEST_MODE (0) /* phys delivery to target procs */
32#define NO_BALANCE_IRQ (0) 30#define NO_BALANCE_IRQ (0)
33#undef APIC_DEST_LOGICAL 31#undef APIC_DEST_LOGICAL
34#define APIC_DEST_LOGICAL 0x0 32#define APIC_DEST_LOGICAL 0x0
35#define WAKE_SECONDARY_VIA_INIT
36#endif
37 33
38static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 34static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
39{ 35{
@@ -60,6 +56,16 @@ static inline unsigned long calculate_ldr(int cpu)
60 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel 56 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
61 * document number 292116). So here it goes... 57 * document number 292116). So here it goes...
62 */ 58 */
59static inline void init_apic_ldr_cluster(void)
60{
61 unsigned long val;
62 int cpu = smp_processor_id();
63
64 apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER);
65 val = calculate_ldr(cpu);
66 apic_write(APIC_LDR, val);
67}
68
63static inline void init_apic_ldr(void) 69static inline void init_apic_ldr(void)
64{ 70{
65 unsigned long val; 71 unsigned long val;
@@ -70,10 +76,6 @@ static inline void init_apic_ldr(void)
70 apic_write(APIC_LDR, val); 76 apic_write(APIC_LDR, val);
71} 77}
72 78
73#ifndef CONFIG_X86_GENERICARCH
74extern void enable_apic_mode(void);
75#endif
76
77extern int apic_version [MAX_APICS]; 79extern int apic_version [MAX_APICS];
78static inline void setup_apic_routing(void) 80static inline void setup_apic_routing(void)
79{ 81{
@@ -144,7 +146,7 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
144 return (1); 146 return (1);
145} 147}
146 148
147static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 149static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
148{ 150{
149 int num_bits_set; 151 int num_bits_set;
150 int cpus_found = 0; 152 int cpus_found = 0;
@@ -154,11 +156,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
154 num_bits_set = cpus_weight(cpumask); 156 num_bits_set = cpus_weight(cpumask);
155 /* Return id to all */ 157 /* Return id to all */
156 if (num_bits_set == NR_CPUS) 158 if (num_bits_set == NR_CPUS)
157#if defined CONFIG_ES7000_CLUSTERED_APIC
158 return 0xFF; 159 return 0xFF;
159#else
160 return cpu_to_logical_apicid(0);
161#endif
162 /* 160 /*
163 * The cpus in the mask must all be on the apic cluster. If are not 161 * The cpus in the mask must all be on the apic cluster. If are not
164 * on the same apicid cluster return default value of TARGET_CPUS. 162 * on the same apicid cluster return default value of TARGET_CPUS.
@@ -171,11 +169,40 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
171 if (apicid_cluster(apicid) != 169 if (apicid_cluster(apicid) !=
172 apicid_cluster(new_apicid)){ 170 apicid_cluster(new_apicid)){
173 printk ("%s: Not a valid mask!\n", __func__); 171 printk ("%s: Not a valid mask!\n", __func__);
174#if defined CONFIG_ES7000_CLUSTERED_APIC
175 return 0xFF; 172 return 0xFF;
176#else 173 }
174 apicid = new_apicid;
175 cpus_found++;
176 }
177 cpu++;
178 }
179 return apicid;
180}
181
182static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
183{
184 int num_bits_set;
185 int cpus_found = 0;
186 int cpu;
187 int apicid;
188
189 num_bits_set = cpus_weight(cpumask);
190 /* Return id to all */
191 if (num_bits_set == NR_CPUS)
192 return cpu_to_logical_apicid(0);
193 /*
194 * The cpus in the mask must all be on the apic cluster. If are not
195 * on the same apicid cluster return default value of TARGET_CPUS.
196 */
197 cpu = first_cpu(cpumask);
198 apicid = cpu_to_logical_apicid(cpu);
199 while (cpus_found < num_bits_set) {
200 if (cpu_isset(cpu, cpumask)) {
201 int new_apicid = cpu_to_logical_apicid(cpu);
202 if (apicid_cluster(apicid) !=
203 apicid_cluster(new_apicid)){
204 printk ("%s: Not a valid mask!\n", __func__);
177 return cpu_to_logical_apicid(0); 205 return cpu_to_logical_apicid(0);
178#endif
179 } 206 }
180 apicid = new_apicid; 207 apicid = new_apicid;
181 cpus_found++; 208 cpus_found++;
diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h
index 398493461913..78f0daaee436 100644
--- a/arch/x86/include/asm/es7000/wakecpu.h
+++ b/arch/x86/include/asm/es7000/wakecpu.h
@@ -1,36 +1,12 @@
1#ifndef __ASM_ES7000_WAKECPU_H 1#ifndef __ASM_ES7000_WAKECPU_H
2#define __ASM_ES7000_WAKECPU_H 2#define __ASM_ES7000_WAKECPU_H
3 3
4/* 4#define TRAMPOLINE_PHYS_LOW 0x467
5 * This file copes with machines that wakeup secondary CPUs by the 5#define TRAMPOLINE_PHYS_HIGH 0x469
6 * INIT, INIT, STARTUP sequence.
7 */
8
9#ifdef CONFIG_ES7000_CLUSTERED_APIC
10#define WAKE_SECONDARY_VIA_MIP
11#else
12#define WAKE_SECONDARY_VIA_INIT
13#endif
14
15#ifdef WAKE_SECONDARY_VIA_MIP
16extern int es7000_start_cpu(int cpu, unsigned long eip);
17static inline int
18wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
19{
20 int boot_error = 0;
21 boot_error = es7000_start_cpu(phys_apicid, start_eip);
22 return boot_error;
23}
24#endif
25
26#define TRAMPOLINE_LOW phys_to_virt(0x467)
27#define TRAMPOLINE_HIGH phys_to_virt(0x469)
28
29#define boot_cpu_apicid boot_cpu_physical_apicid
30 6
31static inline void wait_for_init_deassert(atomic_t *deassert) 7static inline void wait_for_init_deassert(atomic_t *deassert)
32{ 8{
33#ifdef WAKE_SECONDARY_VIA_INIT 9#ifndef CONFIG_ES7000_CLUSTERED_APIC
34 while (!atomic_read(deassert)) 10 while (!atomic_read(deassert))
35 cpu_relax(); 11 cpu_relax();
36#endif 12#endif
@@ -50,9 +26,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
50{ 26{
51} 27}
52 28
53#define inquire_remote_apic(apicid) do { \ 29extern void __inquire_remote_apic(int apicid);
54 if (apic_verbosity >= APIC_DEBUG) \ 30
55 __inquire_remote_apic(apicid); \ 31static inline void inquire_remote_apic(int apicid)
56 } while (0) 32{
33 if (apic_verbosity >= APIC_DEBUG)
34 __inquire_remote_apic(apicid);
35}
57 36
58#endif /* __ASM_MACH_WAKECPU_H */ 37#endif /* __ASM_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9e8bc29b8b17..7e61b4ceb9a4 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,8 +17,40 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
17 */ 17 */
18 return addr - 1; 18 return addr - 1;
19} 19}
20#endif
21 20
21#ifdef CONFIG_DYNAMIC_FTRACE
22
23struct dyn_arch_ftrace {
24 /* No extra data needed for x86 */
25};
26
27#endif /* CONFIG_DYNAMIC_FTRACE */
28#endif /* __ASSEMBLY__ */
22#endif /* CONFIG_FUNCTION_TRACER */ 29#endif /* CONFIG_FUNCTION_TRACER */
23 30
31#ifdef CONFIG_FUNCTION_GRAPH_TRACER
32
33#ifndef __ASSEMBLY__
34
35/*
36 * Stack of return addresses for functions
37 * of a thread.
38 * Used in struct thread_info
39 */
40struct ftrace_ret_stack {
41 unsigned long ret;
42 unsigned long func;
43 unsigned long long calltime;
44};
45
46/*
47 * Primary handler of a function return.
48 * It relays on ftrace_return_to_handler.
49 * Defined in entry32.S
50 */
51extern void return_to_handler(void);
52
53#endif /* __ASSEMBLY__ */
54#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
55
24#endif /* _ASM_X86_FTRACE_H */ 56#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 5cbd4fcc06fd..0ac17d33a8c7 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -2,6 +2,7 @@
2#define _ASM_X86_GENAPIC_32_H 2#define _ASM_X86_GENAPIC_32_H
3 3
4#include <asm/mpspec.h> 4#include <asm/mpspec.h>
5#include <asm/atomic.h>
5 6
6/* 7/*
7 * Generic APIC driver interface. 8 * Generic APIC driver interface.
@@ -65,6 +66,14 @@ struct genapic {
65 void (*send_IPI_allbutself)(int vector); 66 void (*send_IPI_allbutself)(int vector);
66 void (*send_IPI_all)(int vector); 67 void (*send_IPI_all)(int vector);
67#endif 68#endif
69 int (*wakeup_cpu)(int apicid, unsigned long start_eip);
70 int trampoline_phys_low;
71 int trampoline_phys_high;
72 void (*wait_for_init_deassert)(atomic_t *deassert);
73 void (*smp_callin_clear_local_apic)(void);
74 void (*store_NMI_vector)(unsigned short *high, unsigned short *low);
75 void (*restore_NMI_vector)(unsigned short *high, unsigned short *low);
76 void (*inquire_remote_apic)(int apicid);
68}; 77};
69 78
70#define APICFUNC(x) .x = x, 79#define APICFUNC(x) .x = x,
@@ -105,16 +114,24 @@ struct genapic {
105 APICFUNC(get_apic_id) \ 114 APICFUNC(get_apic_id) \
106 .apic_id_mask = APIC_ID_MASK, \ 115 .apic_id_mask = APIC_ID_MASK, \
107 APICFUNC(cpu_mask_to_apicid) \ 116 APICFUNC(cpu_mask_to_apicid) \
108 APICFUNC(vector_allocation_domain) \ 117 APICFUNC(vector_allocation_domain) \
109 APICFUNC(acpi_madt_oem_check) \ 118 APICFUNC(acpi_madt_oem_check) \
110 IPIFUNC(send_IPI_mask) \ 119 IPIFUNC(send_IPI_mask) \
111 IPIFUNC(send_IPI_allbutself) \ 120 IPIFUNC(send_IPI_allbutself) \
112 IPIFUNC(send_IPI_all) \ 121 IPIFUNC(send_IPI_all) \
113 APICFUNC(enable_apic_mode) \ 122 APICFUNC(enable_apic_mode) \
114 APICFUNC(phys_pkg_id) \ 123 APICFUNC(phys_pkg_id) \
124 .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \
125 .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \
126 APICFUNC(wait_for_init_deassert) \
127 APICFUNC(smp_callin_clear_local_apic) \
128 APICFUNC(store_NMI_vector) \
129 APICFUNC(restore_NMI_vector) \
130 APICFUNC(inquire_remote_apic) \
115} 131}
116 132
117extern struct genapic *genapic; 133extern struct genapic *genapic;
134extern void es7000_update_genapic_to_cluster(void);
118 135
119enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; 136enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
120#define get_uv_system_type() UV_NONE 137#define get_uv_system_type() UV_NONE
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 13c4e96199ea..2cae011668b7 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -32,6 +32,8 @@ struct genapic {
32 unsigned int (*get_apic_id)(unsigned long x); 32 unsigned int (*get_apic_id)(unsigned long x);
33 unsigned long (*set_apic_id)(unsigned int id); 33 unsigned long (*set_apic_id)(unsigned int id);
34 unsigned long apic_id_mask; 34 unsigned long apic_id_mask;
35 /* wakeup_secondary_cpu */
36 int (*wakeup_cpu)(int apicid, unsigned long start_eip);
35}; 37};
36 38
37extern struct genapic *genapic; 39extern struct genapic *genapic;
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 6afd9933a7dd..25d527ca1362 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -188,17 +188,14 @@ extern void restore_IO_APIC_setup(void);
188extern void reinit_intr_remapped_IO_APIC(int); 188extern void reinit_intr_remapped_IO_APIC(int);
189#endif 189#endif
190 190
191extern int probe_nr_irqs(void); 191extern void probe_nr_irqs_gsi(void);
192 192
193#else /* !CONFIG_X86_IO_APIC */ 193#else /* !CONFIG_X86_IO_APIC */
194#define io_apic_assign_pci_irqs 0 194#define io_apic_assign_pci_irqs 0
195static const int timer_through_8259 = 0; 195static const int timer_through_8259 = 0;
196static inline void ioapic_init_mappings(void) { } 196static inline void ioapic_init_mappings(void) { }
197 197
198static inline int probe_nr_irqs(void) 198static inline void probe_nr_irqs_gsi(void) { }
199{
200 return NR_IRQS;
201}
202#endif 199#endif
203 200
204#endif /* _ASM_X86_IO_APIC_H */ 201#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
new file mode 100644
index 000000000000..c1f06289b14b
--- /dev/null
+++ b/arch/x86/include/asm/iomap.h
@@ -0,0 +1,30 @@
1/*
2 * Copyright © 2008 Ingo Molnar
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
17 */
18
19#include <linux/fs.h>
20#include <linux/mm.h>
21#include <linux/uaccess.h>
22#include <asm/cacheflush.h>
23#include <asm/pgtable.h>
24#include <asm/tlbflush.h>
25
26void *
27iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
28
29void
30iounmap_atomic(void *kvaddr, enum km_type type);
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index e4a552d44465..0b500c5b6446 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -6,7 +6,6 @@ extern void no_iommu_init(void);
6extern struct dma_mapping_ops nommu_dma_ops; 6extern struct dma_mapping_ops nommu_dma_ops;
7extern int force_iommu, no_iommu; 7extern int force_iommu, no_iommu;
8extern int iommu_detected; 8extern int iommu_detected;
9extern int dmar_disabled;
10 9
11extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); 10extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
12 11
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 0005adb0f941..f7ff65032b9d 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -101,12 +101,23 @@
101#define LAST_VM86_IRQ 15 101#define LAST_VM86_IRQ 15
102#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) 102#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
103 103
104#define NR_IRQS_LEGACY 16
105
104#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) 106#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
107
108#ifndef CONFIG_SPARSE_IRQ
105# if NR_CPUS < MAX_IO_APICS 109# if NR_CPUS < MAX_IO_APICS
106# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) 110# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
107# else 111# else
108# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) 112# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
109# endif 113# endif
114#else
115# if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
116# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
117# else
118# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
119# endif
120#endif
110 121
111#elif defined(CONFIG_X86_VOYAGER) 122#elif defined(CONFIG_X86_VOYAGER)
112 123
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index ff3a6c236c00..6cb3a467e067 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -32,11 +32,13 @@ static inline cpumask_t target_cpus(void)
32#define vector_allocation_domain (genapic->vector_allocation_domain) 32#define vector_allocation_domain (genapic->vector_allocation_domain)
33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) 33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
34#define send_IPI_self (genapic->send_IPI_self) 34#define send_IPI_self (genapic->send_IPI_self)
35#define wakeup_secondary_cpu (genapic->wakeup_cpu)
35extern void setup_apic_routing(void); 36extern void setup_apic_routing(void);
36#else 37#else
37#define INT_DELIVERY_MODE dest_LowestPrio 38#define INT_DELIVERY_MODE dest_LowestPrio
38#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ 39#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
39#define TARGET_CPUS (target_cpus()) 40#define TARGET_CPUS (target_cpus())
41#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init
40/* 42/*
41 * Set up the logical destination ID. 43 * Set up the logical destination ID.
42 * 44 *
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h
index 9d80db91e992..ceb013660146 100644
--- a/arch/x86/include/asm/mach-default/mach_wakecpu.h
+++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h
@@ -1,17 +1,8 @@
1#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H 1#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
2#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H 2#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
3 3
4/* 4#define TRAMPOLINE_PHYS_LOW (0x467)
5 * This file copes with machines that wakeup secondary CPUs by the 5#define TRAMPOLINE_PHYS_HIGH (0x469)
6 * INIT, INIT, STARTUP sequence.
7 */
8
9#define WAKE_SECONDARY_VIA_INIT
10
11#define TRAMPOLINE_LOW phys_to_virt(0x467)
12#define TRAMPOLINE_HIGH phys_to_virt(0x469)
13
14#define boot_cpu_apicid boot_cpu_physical_apicid
15 6
16static inline void wait_for_init_deassert(atomic_t *deassert) 7static inline void wait_for_init_deassert(atomic_t *deassert)
17{ 8{
@@ -33,9 +24,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
33{ 24{
34} 25}
35 26
36#define inquire_remote_apic(apicid) do { \ 27extern void __inquire_remote_apic(int apicid);
37 if (apic_verbosity >= APIC_DEBUG) \ 28
38 __inquire_remote_apic(apicid); \ 29static inline void inquire_remote_apic(int apicid)
39 } while (0) 30{
31 if (apic_verbosity >= APIC_DEBUG)
32 __inquire_remote_apic(apicid);
33}
40 34
41#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ 35#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/mach-default/smpboot_hooks.h
index dbab36d64d48..23bf52103b89 100644
--- a/arch/x86/include/asm/mach-default/smpboot_hooks.h
+++ b/arch/x86/include/asm/mach-default/smpboot_hooks.h
@@ -13,9 +13,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
13 CMOS_WRITE(0xa, 0xf); 13 CMOS_WRITE(0xa, 0xf);
14 local_flush_tlb(); 14 local_flush_tlb();
15 pr_debug("1.\n"); 15 pr_debug("1.\n");
16 *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; 16 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
17 start_eip >> 4;
17 pr_debug("2.\n"); 18 pr_debug("2.\n");
18 *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; 19 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
20 start_eip & 0xf;
19 pr_debug("3.\n"); 21 pr_debug("3.\n");
20} 22}
21 23
@@ -32,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
32 */ 34 */
33 CMOS_WRITE(0, 0xf); 35 CMOS_WRITE(0, 0xf);
34 36
35 *((volatile long *) phys_to_virt(0x467)) = 0; 37 *((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
36} 38}
37 39
38static inline void __init smpboot_setup_io_apic(void) 40static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
index 5180bd7478fb..e430f47df667 100644
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -27,6 +27,7 @@
27#define vector_allocation_domain (genapic->vector_allocation_domain) 27#define vector_allocation_domain (genapic->vector_allocation_domain)
28#define enable_apic_mode (genapic->enable_apic_mode) 28#define enable_apic_mode (genapic->enable_apic_mode)
29#define phys_pkg_id (genapic->phys_pkg_id) 29#define phys_pkg_id (genapic->phys_pkg_id)
30#define wakeup_secondary_cpu (genapic->wakeup_cpu)
30 31
31extern void generic_bigsmp_probe(void); 32extern void generic_bigsmp_probe(void);
32 33
diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
new file mode 100644
index 000000000000..1ab16b168c8a
--- /dev/null
+++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
@@ -0,0 +1,12 @@
1#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
2#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
3
4#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low)
5#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high)
6#define wait_for_init_deassert (genapic->wait_for_init_deassert)
7#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic)
8#define store_NMI_vector (genapic->store_NMI_vector)
9#define restore_NMI_vector (genapic->restore_NMI_vector)
10#define inquire_remote_apic (genapic->inquire_remote_apic)
11
12#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 485bdf059ffb..07f1af494ca5 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -34,10 +34,14 @@ static inline void get_memcfg_numa(void)
34 34
35extern int early_pfn_to_nid(unsigned long pfn); 35extern int early_pfn_to_nid(unsigned long pfn);
36 36
37extern void resume_map_numa_kva(pgd_t *pgd);
38
37#else /* !CONFIG_NUMA */ 39#else /* !CONFIG_NUMA */
38 40
39#define get_memcfg_numa get_memcfg_numa_flat 41#define get_memcfg_numa get_memcfg_numa_flat
40 42
43static inline void resume_map_numa_kva(pgd_t *pgd) {}
44
41#endif /* CONFIG_NUMA */ 45#endif /* CONFIG_NUMA */
42 46
43#ifdef CONFIG_DISCONTIGMEM 47#ifdef CONFIG_DISCONTIGMEM
diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h
index c577bda5b1c5..6f499df8eddb 100644
--- a/arch/x86/include/asm/numaq/wakecpu.h
+++ b/arch/x86/include/asm/numaq/wakecpu.h
@@ -3,12 +3,8 @@
3 3
4/* This file copes with machines that wakeup secondary CPUs by NMIs */ 4/* This file copes with machines that wakeup secondary CPUs by NMIs */
5 5
6#define WAKE_SECONDARY_VIA_NMI 6#define TRAMPOLINE_PHYS_LOW (0x8)
7 7#define TRAMPOLINE_PHYS_HIGH (0xa)
8#define TRAMPOLINE_LOW phys_to_virt(0x8)
9#define TRAMPOLINE_HIGH phys_to_virt(0xa)
10
11#define boot_cpu_apicid boot_cpu_logical_apicid
12 8
13/* We don't do anything here because we use NMI's to boot instead */ 9/* We don't do anything here because we use NMI's to boot instead */
14static inline void wait_for_init_deassert(atomic_t *deassert) 10static inline void wait_for_init_deassert(atomic_t *deassert)
@@ -27,17 +23,23 @@ static inline void smp_callin_clear_local_apic(void)
27static inline void store_NMI_vector(unsigned short *high, unsigned short *low) 23static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
28{ 24{
29 printk("Storing NMI vector\n"); 25 printk("Storing NMI vector\n");
30 *high = *((volatile unsigned short *) TRAMPOLINE_HIGH); 26 *high =
31 *low = *((volatile unsigned short *) TRAMPOLINE_LOW); 27 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH));
28 *low =
29 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW));
32} 30}
33 31
34static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) 32static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
35{ 33{
36 printk("Restoring NMI vector\n"); 34 printk("Restoring NMI vector\n");
37 *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high; 35 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
38 *((volatile unsigned short *) TRAMPOLINE_LOW) = *low; 36 *high;
37 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
38 *low;
39} 39}
40 40
41#define inquire_remote_apic(apicid) {} 41static inline void inquire_remote_apic(int apicid)
42{
43}
42 44
43#endif /* __ASM_NUMAQ_WAKECPU_H */ 45#endif /* __ASM_NUMAQ_WAKECPU_H */
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
index 5b28995d664e..d02d936840a3 100644
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -34,8 +34,6 @@ extern void pci_iommu_alloc(void);
34 */ 34 */
35#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) 35#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
36 36
37#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU)
38
39#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ 37#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
40 dma_addr_t ADDR_NAME; 38 dma_addr_t ADDR_NAME;
41#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ 39#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
@@ -49,18 +47,6 @@ extern void pci_iommu_alloc(void);
49#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ 47#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
50 (((PTR)->LEN_NAME) = (VAL)) 48 (((PTR)->LEN_NAME) = (VAL))
51 49
52#else
53/* No IOMMU */
54
55#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
56#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
57#define pci_unmap_addr(PTR, ADDR_NAME) (0)
58#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
59#define pci_unmap_len(PTR, LEN_NAME) (0)
60#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
61
62#endif
63
64#endif /* __KERNEL__ */ 50#endif /* __KERNEL__ */
65 51
66#endif /* _ASM_X86_PCI_64_H */ 52#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index d1531c8480b7..eefb0594b058 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -271,8 +271,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
271extern int do_set_thread_area(struct task_struct *p, int idx, 271extern int do_set_thread_area(struct task_struct *p, int idx,
272 struct user_desc __user *info, int can_allocate); 272 struct user_desc __user *info, int can_allocate);
273 273
274#define __ARCH_WANT_COMPAT_SYS_PTRACE
275
276#endif /* __KERNEL__ */ 274#endif /* __KERNEL__ */
277 275
278#endif /* !__ASSEMBLY__ */ 276#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index f12d37237465..294daeb3a006 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -16,6 +16,8 @@ static inline void visws_early_detect(void) { }
16static inline int is_visws_box(void) { return 0; } 16static inline int is_visws_box(void) { return 0; }
17#endif 17#endif
18 18
19extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
20extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
19/* 21/*
20 * Any setup quirks to be performed? 22 * Any setup quirks to be performed?
21 */ 23 */
@@ -39,6 +41,7 @@ struct x86_quirks {
39 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, 41 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
40 unsigned short oemsize); 42 unsigned short oemsize);
41 int (*setup_ioapic_ids)(void); 43 int (*setup_ioapic_ids)(void);
44 int (*update_genapic)(void);
42}; 45};
43 46
44extern struct x86_quirks *x86_quirks; 47extern struct x86_quirks *x86_quirks;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e44d379faad2..0921b4018c11 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -20,6 +20,8 @@
20struct task_struct; 20struct task_struct;
21struct exec_domain; 21struct exec_domain;
22#include <asm/processor.h> 22#include <asm/processor.h>
23#include <asm/ftrace.h>
24#include <asm/atomic.h>
23 25
24struct thread_info { 26struct thread_info {
25 struct task_struct *task; /* main task structure */ 27 struct task_struct *task; /* main task structure */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4850e4b02b61..ff386ff50ed7 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -239,7 +239,7 @@ struct pci_bus;
239void set_pci_bus_resources_arch_default(struct pci_bus *b); 239void set_pci_bus_resources_arch_default(struct pci_bus *b);
240 240
241#ifdef CONFIG_SMP 241#ifdef CONFIG_SMP
242#define mc_capable() (boot_cpu_data.x86_max_cores > 1) 242#define mc_capable() (cpus_weight(per_cpu(cpu_core_map, 0)) != nr_cpu_ids)
243#define smt_capable() (smp_num_siblings > 1) 243#define smt_capable() (smp_num_siblings > 1)
244#endif 244#endif
245 245
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 35c54921b2e4..99192bb55a53 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -157,6 +157,7 @@ extern int __get_user_bad(void);
157 int __ret_gu; \ 157 int __ret_gu; \
158 unsigned long __val_gu; \ 158 unsigned long __val_gu; \
159 __chk_user_ptr(ptr); \ 159 __chk_user_ptr(ptr); \
160 might_fault(); \
160 switch (sizeof(*(ptr))) { \ 161 switch (sizeof(*(ptr))) { \
161 case 1: \ 162 case 1: \
162 __get_user_x(1, __ret_gu, __val_gu, ptr); \ 163 __get_user_x(1, __ret_gu, __val_gu, ptr); \
@@ -241,6 +242,7 @@ extern void __put_user_8(void);
241 int __ret_pu; \ 242 int __ret_pu; \
242 __typeof__(*(ptr)) __pu_val; \ 243 __typeof__(*(ptr)) __pu_val; \
243 __chk_user_ptr(ptr); \ 244 __chk_user_ptr(ptr); \
245 might_fault(); \
244 __pu_val = x; \ 246 __pu_val = x; \
245 switch (sizeof(*(ptr))) { \ 247 switch (sizeof(*(ptr))) { \
246 case 1: \ 248 case 1: \
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index d095a3aeea1b..5e06259e90e5 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
82static __always_inline unsigned long __must_check 82static __always_inline unsigned long __must_check
83__copy_to_user(void __user *to, const void *from, unsigned long n) 83__copy_to_user(void __user *to, const void *from, unsigned long n)
84{ 84{
85 might_sleep(); 85 might_fault();
86 return __copy_to_user_inatomic(to, from, n); 86 return __copy_to_user_inatomic(to, from, n);
87} 87}
88 88
89static __always_inline unsigned long 89static __always_inline unsigned long
@@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
137static __always_inline unsigned long 137static __always_inline unsigned long
138__copy_from_user(void *to, const void __user *from, unsigned long n) 138__copy_from_user(void *to, const void __user *from, unsigned long n)
139{ 139{
140 might_sleep(); 140 might_fault();
141 if (__builtin_constant_p(n)) { 141 if (__builtin_constant_p(n)) {
142 unsigned long ret; 142 unsigned long ret;
143 143
@@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
159static __always_inline unsigned long __copy_from_user_nocache(void *to, 159static __always_inline unsigned long __copy_from_user_nocache(void *to,
160 const void __user *from, unsigned long n) 160 const void __user *from, unsigned long n)
161{ 161{
162 might_sleep(); 162 might_fault();
163 if (__builtin_constant_p(n)) { 163 if (__builtin_constant_p(n)) {
164 unsigned long ret; 164 unsigned long ret;
165 165
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 664f15280f14..84210c479fca 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -29,6 +29,8 @@ static __always_inline __must_check
29int __copy_from_user(void *dst, const void __user *src, unsigned size) 29int __copy_from_user(void *dst, const void __user *src, unsigned size)
30{ 30{
31 int ret = 0; 31 int ret = 0;
32
33 might_fault();
32 if (!__builtin_constant_p(size)) 34 if (!__builtin_constant_p(size))
33 return copy_user_generic(dst, (__force void *)src, size); 35 return copy_user_generic(dst, (__force void *)src, size);
34 switch (size) { 36 switch (size) {
@@ -46,7 +48,7 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size)
46 return ret; 48 return ret;
47 case 10: 49 case 10:
48 __get_user_asm(*(u64 *)dst, (u64 __user *)src, 50 __get_user_asm(*(u64 *)dst, (u64 __user *)src,
49 ret, "q", "", "=r", 16); 51 ret, "q", "", "=r", 10);
50 if (unlikely(ret)) 52 if (unlikely(ret))
51 return ret; 53 return ret;
52 __get_user_asm(*(u16 *)(8 + (char *)dst), 54 __get_user_asm(*(u16 *)(8 + (char *)dst),
@@ -71,6 +73,8 @@ static __always_inline __must_check
71int __copy_to_user(void __user *dst, const void *src, unsigned size) 73int __copy_to_user(void __user *dst, const void *src, unsigned size)
72{ 74{
73 int ret = 0; 75 int ret = 0;
76
77 might_fault();
74 if (!__builtin_constant_p(size)) 78 if (!__builtin_constant_p(size))
75 return copy_user_generic((__force void *)dst, src, size); 79 return copy_user_generic((__force void *)dst, src, size);
76 switch (size) { 80 switch (size) {
@@ -113,6 +117,8 @@ static __always_inline __must_check
113int __copy_in_user(void __user *dst, const void __user *src, unsigned size) 117int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
114{ 118{
115 int ret = 0; 119 int ret = 0;
120
121 might_fault();
116 if (!__builtin_constant_p(size)) 122 if (!__builtin_constant_p(size))
117 return copy_user_generic((__force void *)dst, 123 return copy_user_generic((__force void *)dst,
118 (__force void *)src, size); 124 (__force void *)src, size);
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 834b2c1d89fb..d2e415e6666f 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
639__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) 639__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
640#define __NR_timerfd_gettime 287 640#define __NR_timerfd_gettime 287
641__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) 641__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
642#define __NR_paccept 288 642#define __NR_accept4 288
643__SYSCALL(__NR_paccept, sys_paccept) 643__SYSCALL(__NR_accept4, sys_accept4)
644#define __NR_signalfd4 289 644#define __NR_signalfd4 289
645__SYSCALL(__NR_signalfd4, sys_signalfd4) 645__SYSCALL(__NR_signalfd4, sys_signalfd4)
646#define __NR_eventfd2 290 646#define __NR_eventfd2 290
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e489ff9cb3e2..1cad9318d217 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -25,7 +25,7 @@ CFLAGS_tsc.o := $(nostackp)
25 25
26obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o 26obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
27obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 27obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
28obj-y += time_$(BITS).o ioport.o ldt.o 28obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
29obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 29obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
30obj-$(CONFIG_X86_VISWS) += visws_quirks.o 30obj-$(CONFIG_X86_VISWS) += visws_quirks.o
31obj-$(CONFIG_X86_32) += probe_roms_32.o 31obj-$(CONFIG_X86_32) += probe_roms_32.o
@@ -41,7 +41,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
41obj-y += process.o 41obj-y += process.o
42obj-y += i387.o xsave.o 42obj-y += i387.o xsave.o
43obj-y += ptrace.o 43obj-y += ptrace.o
44obj-y += ds.o 44obj-$(CONFIG_X86_DS) += ds.o
45obj-$(CONFIG_X86_32) += tls.o 45obj-$(CONFIG_X86_32) += tls.o
46obj-$(CONFIG_IA32_EMULATION) += tls.o 46obj-$(CONFIG_IA32_EMULATION) += tls.o
47obj-y += step.o 47obj-y += step.o
@@ -65,6 +65,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
65obj-$(CONFIG_X86_IO_APIC) += io_apic.o 65obj-$(CONFIG_X86_IO_APIC) += io_apic.o
66obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 66obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
67obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 67obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
68obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
68obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 69obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 70obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 71obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8c1f76abae9e..65d0b72777ea 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1343,7 +1343,6 @@ static void __init acpi_process_madt(void)
1343 error = acpi_parse_madt_ioapic_entries(); 1343 error = acpi_parse_madt_ioapic_entries();
1344 if (!error) { 1344 if (!error) {
1345 acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; 1345 acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
1346 acpi_irq_balance_set(NULL);
1347 acpi_ioapic = 1; 1346 acpi_ioapic = 1;
1348 1347
1349 smp_found_config = 1; 1348 smp_found_config = 1;
@@ -1361,6 +1360,17 @@ static void __init acpi_process_madt(void)
1361 disable_acpi(); 1360 disable_acpi();
1362 } 1361 }
1363 } 1362 }
1363
1364 /*
1365 * ACPI supports both logical (e.g. Hyper-Threading) and physical
1366 * processors, where MPS only supports physical.
1367 */
1368 if (acpi_lapic && acpi_ioapic)
1369 printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
1370 "information\n");
1371 else if (acpi_lapic)
1372 printk(KERN_INFO "Using ACPI for processor (LAPIC) "
1373 "configuration information\n");
1364#endif 1374#endif
1365 return; 1375 return;
1366} 1376}
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 331b318304eb..a7b6dec6fc3f 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -187,6 +187,8 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
187 187
188 spin_lock_irqsave(&iommu->lock, flags); 188 spin_lock_irqsave(&iommu->lock, flags);
189 ret = __iommu_queue_command(iommu, cmd); 189 ret = __iommu_queue_command(iommu, cmd);
190 if (!ret)
191 iommu->need_sync = 1;
190 spin_unlock_irqrestore(&iommu->lock, flags); 192 spin_unlock_irqrestore(&iommu->lock, flags);
191 193
192 return ret; 194 return ret;
@@ -210,10 +212,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
210 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; 212 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
211 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); 213 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
212 214
213 iommu->need_sync = 0;
214
215 spin_lock_irqsave(&iommu->lock, flags); 215 spin_lock_irqsave(&iommu->lock, flags);
216 216
217 if (!iommu->need_sync)
218 goto out;
219
220 iommu->need_sync = 0;
221
217 ret = __iommu_queue_command(iommu, &cmd); 222 ret = __iommu_queue_command(iommu, &cmd);
218 223
219 if (ret) 224 if (ret)
@@ -254,8 +259,6 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
254 259
255 ret = iommu_queue_command(iommu, &cmd); 260 ret = iommu_queue_command(iommu, &cmd);
256 261
257 iommu->need_sync = 1;
258
259 return ret; 262 return ret;
260} 263}
261 264
@@ -281,8 +284,6 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
281 284
282 ret = iommu_queue_command(iommu, &cmd); 285 ret = iommu_queue_command(iommu, &cmd);
283 286
284 iommu->need_sync = 1;
285
286 return ret; 287 return ret;
287} 288}
288 289
@@ -343,7 +344,7 @@ static int iommu_map(struct protection_domain *dom,
343 u64 __pte, *pte, *page; 344 u64 __pte, *pte, *page;
344 345
345 bus_addr = PAGE_ALIGN(bus_addr); 346 bus_addr = PAGE_ALIGN(bus_addr);
346 phys_addr = PAGE_ALIGN(bus_addr); 347 phys_addr = PAGE_ALIGN(phys_addr);
347 348
348 /* only support 512GB address spaces for now */ 349 /* only support 512GB address spaces for now */
349 if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) 350 if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
@@ -537,7 +538,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
537 address >>= PAGE_SHIFT; 538 address >>= PAGE_SHIFT;
538 iommu_area_free(dom->bitmap, address, pages); 539 iommu_area_free(dom->bitmap, address, pages);
539 540
540 if (address + pages >= dom->next_bit) 541 if (address >= dom->next_bit)
541 dom->need_flush = true; 542 dom->need_flush = true;
542} 543}
543 544
@@ -599,7 +600,7 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
599 continue; 600 continue;
600 601
601 p2 = IOMMU_PTE_PAGE(p1[i]); 602 p2 = IOMMU_PTE_PAGE(p1[i]);
602 for (j = 0; j < 512; ++i) { 603 for (j = 0; j < 512; ++j) {
603 if (!IOMMU_PTE_PRESENT(p2[j])) 604 if (!IOMMU_PTE_PRESENT(p2[j]))
604 continue; 605 continue;
605 p3 = IOMMU_PTE_PAGE(p2[j]); 606 p3 = IOMMU_PTE_PAGE(p2[j]);
@@ -762,8 +763,6 @@ static void set_device_domain(struct amd_iommu *iommu,
762 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); 763 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
763 764
764 iommu_queue_inv_dev_entry(iommu, devid); 765 iommu_queue_inv_dev_entry(iommu, devid);
765
766 iommu->need_sync = 1;
767} 766}
768 767
769/***************************************************************************** 768/*****************************************************************************
@@ -858,6 +857,9 @@ static int get_device_resources(struct device *dev,
858 print_devid(_bdf, 1); 857 print_devid(_bdf, 1);
859 } 858 }
860 859
860 if (domain_for_device(_bdf) == NULL)
861 set_device_domain(*iommu, *domain, _bdf);
862
861 return 1; 863 return 1;
862} 864}
863 865
@@ -908,7 +910,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
908 if (address >= dom->aperture_size) 910 if (address >= dom->aperture_size)
909 return; 911 return;
910 912
911 WARN_ON(address & 0xfffULL || address > dom->aperture_size); 913 WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
912 914
913 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; 915 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
914 pte += IOMMU_PTE_L0_INDEX(address); 916 pte += IOMMU_PTE_L0_INDEX(address);
@@ -920,8 +922,8 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
920 922
921/* 923/*
922 * This function contains common code for mapping of a physically 924 * This function contains common code for mapping of a physically
923 * contiguous memory region into DMA address space. It is uses by all 925 * contiguous memory region into DMA address space. It is used by all
924 * mapping functions provided by this IOMMU driver. 926 * mapping functions provided with this IOMMU driver.
925 * Must be called with the domain lock held. 927 * Must be called with the domain lock held.
926 */ 928 */
927static dma_addr_t __map_single(struct device *dev, 929static dma_addr_t __map_single(struct device *dev,
@@ -981,7 +983,8 @@ static void __unmap_single(struct amd_iommu *iommu,
981 dma_addr_t i, start; 983 dma_addr_t i, start;
982 unsigned int pages; 984 unsigned int pages;
983 985
984 if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) 986 if ((dma_addr == bad_dma_address) ||
987 (dma_addr + size > dma_dom->aperture_size))
985 return; 988 return;
986 989
987 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 990 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
@@ -1031,8 +1034,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1031 if (addr == bad_dma_address) 1034 if (addr == bad_dma_address)
1032 goto out; 1035 goto out;
1033 1036
1034 if (unlikely(iommu->need_sync)) 1037 iommu_completion_wait(iommu);
1035 iommu_completion_wait(iommu);
1036 1038
1037out: 1039out:
1038 spin_unlock_irqrestore(&domain->lock, flags); 1040 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1060,8 +1062,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
1060 1062
1061 __unmap_single(iommu, domain->priv, dma_addr, size, dir); 1063 __unmap_single(iommu, domain->priv, dma_addr, size, dir);
1062 1064
1063 if (unlikely(iommu->need_sync)) 1065 iommu_completion_wait(iommu);
1064 iommu_completion_wait(iommu);
1065 1066
1066 spin_unlock_irqrestore(&domain->lock, flags); 1067 spin_unlock_irqrestore(&domain->lock, flags);
1067} 1068}
@@ -1127,8 +1128,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1127 goto unmap; 1128 goto unmap;
1128 } 1129 }
1129 1130
1130 if (unlikely(iommu->need_sync)) 1131 iommu_completion_wait(iommu);
1131 iommu_completion_wait(iommu);
1132 1132
1133out: 1133out:
1134 spin_unlock_irqrestore(&domain->lock, flags); 1134 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1173,8 +1173,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1173 s->dma_address = s->dma_length = 0; 1173 s->dma_address = s->dma_length = 0;
1174 } 1174 }
1175 1175
1176 if (unlikely(iommu->need_sync)) 1176 iommu_completion_wait(iommu);
1177 iommu_completion_wait(iommu);
1178 1177
1179 spin_unlock_irqrestore(&domain->lock, flags); 1178 spin_unlock_irqrestore(&domain->lock, flags);
1180} 1179}
@@ -1225,8 +1224,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
1225 goto out; 1224 goto out;
1226 } 1225 }
1227 1226
1228 if (unlikely(iommu->need_sync)) 1227 iommu_completion_wait(iommu);
1229 iommu_completion_wait(iommu);
1230 1228
1231out: 1229out:
1232 spin_unlock_irqrestore(&domain->lock, flags); 1230 spin_unlock_irqrestore(&domain->lock, flags);
@@ -1257,8 +1255,7 @@ static void free_coherent(struct device *dev, size_t size,
1257 1255
1258 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 1256 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
1259 1257
1260 if (unlikely(iommu->need_sync)) 1258 iommu_completion_wait(iommu);
1261 iommu_completion_wait(iommu);
1262 1259
1263 spin_unlock_irqrestore(&domain->lock, flags); 1260 spin_unlock_irqrestore(&domain->lock, flags);
1264 1261
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 0cdcda35a05f..30ae2701b3df 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -121,7 +121,7 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
121LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 121LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
122 we find in ACPI */ 122 we find in ACPI */
123unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 123unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
124int amd_iommu_isolate; /* if 1, device isolation is enabled */ 124int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */
125bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 125bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
126 126
127LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 127LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -1213,7 +1213,9 @@ static int __init parse_amd_iommu_options(char *str)
1213 for (; *str; ++str) { 1213 for (; *str; ++str) {
1214 if (strncmp(str, "isolate", 7) == 0) 1214 if (strncmp(str, "isolate", 7) == 0)
1215 amd_iommu_isolate = 1; 1215 amd_iommu_isolate = 1;
1216 if (strncmp(str, "fullflush", 11) == 0) 1216 if (strncmp(str, "share", 5) == 0)
1217 amd_iommu_isolate = 0;
1218 if (strncmp(str, "fullflush", 9) == 0)
1217 amd_iommu_unmap_flush = true; 1219 amd_iommu_unmap_flush = true;
1218 } 1220 }
1219 1221
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 04a7f960bbc0..16f94879b525 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1315,7 +1315,7 @@ void enable_x2apic(void)
1315 } 1315 }
1316} 1316}
1317 1317
1318void enable_IR_x2apic(void) 1318void __init enable_IR_x2apic(void)
1319{ 1319{
1320#ifdef CONFIG_INTR_REMAP 1320#ifdef CONFIG_INTR_REMAP
1321 int ret; 1321 int ret;
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8e48c5d4467d..88ea02dcb622 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
33#include <linux/cpufreq.h> 33#include <linux/cpufreq.h>
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <linux/ftrace.h>
36 37
37#include <linux/acpi.h> 38#include <linux/acpi.h>
38#include <acpi/processor.h> 39#include <acpi/processor.h>
@@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
391 unsigned int next_perf_state = 0; /* Index into perf table */ 392 unsigned int next_perf_state = 0; /* Index into perf table */
392 unsigned int i; 393 unsigned int i;
393 int result = 0; 394 int result = 0;
395 struct power_trace it;
394 396
395 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); 397 dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
396 398
@@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
427 } 429 }
428 } 430 }
429 431
432 trace_power_mark(&it, POWER_PSTATE, next_perf_state);
433
430 switch (data->cpu_feature) { 434 switch (data->cpu_feature) {
431 case SYSTEM_INTEL_MSR_CAPABLE: 435 case SYSTEM_INTEL_MSR_CAPABLE:
432 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 436 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index d3dcd58b87cd..7f05f44b97e9 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -115,9 +115,20 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
115 u32 i = 0; 115 u32 i = 0;
116 116
117 if (cpu_family == CPU_HW_PSTATE) { 117 if (cpu_family == CPU_HW_PSTATE) {
118 rdmsr(MSR_PSTATE_STATUS, lo, hi); 118 if (data->currpstate == HW_PSTATE_INVALID) {
119 i = lo & HW_PSTATE_MASK; 119 /* read (initial) hw pstate if not yet set */
120 data->currpstate = i; 120 rdmsr(MSR_PSTATE_STATUS, lo, hi);
121 i = lo & HW_PSTATE_MASK;
122
123 /*
124 * a workaround for family 11h erratum 311 might cause
125 * an "out-of-range Pstate if the core is in Pstate-0
126 */
127 if (i >= data->numps)
128 data->currpstate = HW_PSTATE_0;
129 else
130 data->currpstate = i;
131 }
121 return 0; 132 return 0;
122 } 133 }
123 do { 134 do {
@@ -1121,6 +1132,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1121 } 1132 }
1122 1133
1123 data->cpu = pol->cpu; 1134 data->cpu = pol->cpu;
1135 data->currpstate = HW_PSTATE_INVALID;
1124 1136
1125 if (powernow_k8_cpu_init_acpi(data)) { 1137 if (powernow_k8_cpu_init_acpi(data)) {
1126 /* 1138 /*
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index ab48cfed4d96..65cfb5d7f77f 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -5,6 +5,19 @@
5 * http://www.gnu.org/licenses/gpl.html 5 * http://www.gnu.org/licenses/gpl.html
6 */ 6 */
7 7
8
9enum pstate {
10 HW_PSTATE_INVALID = 0xff,
11 HW_PSTATE_0 = 0,
12 HW_PSTATE_1 = 1,
13 HW_PSTATE_2 = 2,
14 HW_PSTATE_3 = 3,
15 HW_PSTATE_4 = 4,
16 HW_PSTATE_5 = 5,
17 HW_PSTATE_6 = 6,
18 HW_PSTATE_7 = 7,
19};
20
8struct powernow_k8_data { 21struct powernow_k8_data {
9 unsigned int cpu; 22 unsigned int cpu;
10 23
@@ -23,7 +36,9 @@ struct powernow_k8_data {
23 u32 exttype; /* extended interface = 1 */ 36 u32 exttype; /* extended interface = 1 */
24 37
25 /* keep track of the current fid / vid or pstate */ 38 /* keep track of the current fid / vid or pstate */
26 u32 currvid, currfid, currpstate; 39 u32 currvid;
40 u32 currfid;
41 enum pstate currpstate;
27 42
28 /* the powernow_table includes all frequency and vid/fid pairings: 43 /* the powernow_table includes all frequency and vid/fid pairings:
29 * fid are the lower 8 bits of the index, vid are the upper 8 bits. 44 * fid are the lower 8 bits of the index, vid are the upper 8 bits.
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d55..816f27f289b1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -307,12 +307,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
307 set_cpu_cap(c, X86_FEATURE_P4); 307 set_cpu_cap(c, X86_FEATURE_P4);
308 if (c->x86 == 6) 308 if (c->x86 == 6)
309 set_cpu_cap(c, X86_FEATURE_P3); 309 set_cpu_cap(c, X86_FEATURE_P3);
310#endif
310 311
311 if (cpu_has_bts) 312 if (cpu_has_bts)
312 ptrace_bts_init_intel(c); 313 ptrace_bts_init_intel(c);
313 314
314#endif
315
316 detect_extended_topology(c); 315 detect_extended_topology(c);
317 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { 316 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
318 /* 317 /*
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 2b69994fd3a8..19a8c2c0389f 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -7,13 +7,12 @@
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - per-thread and per-cpu allocation of BTS and PEBS
10 * - buffer memory allocation (optional) 10 * - buffer overflow handling (to be done)
11 * - buffer overflow handling
12 * - buffer access 11 * - buffer access
13 * 12 *
14 * It assumes: 13 * It assumes:
15 * - get_task_struct on all parameter tasks 14 * - get_task_struct on all traced tasks
16 * - current is allowed to trace parameter tasks 15 * - current is allowed to trace tasks
17 * 16 *
18 * 17 *
19 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -21,8 +20,6 @@
21 */ 20 */
22 21
23 22
24#ifdef CONFIG_X86_DS
25
26#include <asm/ds.h> 23#include <asm/ds.h>
27 24
28#include <linux/errno.h> 25#include <linux/errno.h>
@@ -30,6 +27,7 @@
30#include <linux/slab.h> 27#include <linux/slab.h>
31#include <linux/sched.h> 28#include <linux/sched.h>
32#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/kernel.h>
33 31
34 32
35/* 33/*
@@ -46,6 +44,33 @@ struct ds_configuration {
46}; 44};
47static struct ds_configuration ds_cfg; 45static struct ds_configuration ds_cfg;
48 46
47/*
48 * A BTS or PEBS tracer.
49 *
50 * This holds the configuration of the tracer and serves as a handle
51 * to identify tracers.
52 */
53struct ds_tracer {
54 /* the DS context (partially) owned by this tracer */
55 struct ds_context *context;
56 /* the buffer provided on ds_request() and its size in bytes */
57 void *buffer;
58 size_t size;
59};
60
61struct bts_tracer {
62 /* the common DS part */
63 struct ds_tracer ds;
64 /* buffer overflow notification function */
65 bts_ovfl_callback_t ovfl;
66};
67
68struct pebs_tracer {
69 /* the common DS part */
70 struct ds_tracer ds;
71 /* buffer overflow notification function */
72 pebs_ovfl_callback_t ovfl;
73};
49 74
50/* 75/*
51 * Debug Store (DS) save area configuration (see Intel64 and IA32 76 * Debug Store (DS) save area configuration (see Intel64 and IA32
@@ -109,34 +134,13 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
109 (*(unsigned long *)base) = value; 134 (*(unsigned long *)base) = value;
110} 135}
111 136
137#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
112 138
113/*
114 * Locking is done only for allocating BTS or PEBS resources and for
115 * guarding context and buffer memory allocation.
116 *
117 * Most functions require the current task to own the ds context part
118 * they are going to access. All the locking is done when validating
119 * access to the context.
120 */
121static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
122 139
123/* 140/*
124 * Validate that the current task is allowed to access the BTS/PEBS 141 * Locking is done only for allocating BTS or PEBS resources.
125 * buffer of the parameter task.
126 *
127 * Returns 0, if access is granted; -Eerrno, otherwise.
128 */ 142 */
129static inline int ds_validate_access(struct ds_context *context, 143static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
130 enum ds_qualifier qual)
131{
132 if (!context)
133 return -EPERM;
134
135 if (context->owner[qual] == current)
136 return 0;
137
138 return -EPERM;
139}
140 144
141 145
142/* 146/*
@@ -185,80 +189,43 @@ static inline int check_tracer(struct task_struct *task)
185 * 189 *
186 * Contexts are use-counted. They are allocated on first access and 190 * Contexts are use-counted. They are allocated on first access and
187 * deallocated when the last user puts the context. 191 * deallocated when the last user puts the context.
188 *
189 * We distinguish between an allocating and a non-allocating get of a
190 * context:
191 * - the allocating get is used for requesting BTS/PEBS resources. It
192 * requires the caller to hold the global ds_lock.
193 * - the non-allocating get is used for all other cases. A
194 * non-existing context indicates an error. It acquires and releases
195 * the ds_lock itself for obtaining the context.
196 *
197 * A context and its DS configuration are allocated and deallocated
198 * together. A context always has a DS configuration of the
199 * appropriate size.
200 */ 192 */
201static DEFINE_PER_CPU(struct ds_context *, system_context); 193static DEFINE_PER_CPU(struct ds_context *, system_context);
202 194
203#define this_system_context per_cpu(system_context, smp_processor_id()) 195#define this_system_context per_cpu(system_context, smp_processor_id())
204 196
205/*
206 * Returns the pointer to the parameter task's context or to the
207 * system-wide context, if task is NULL.
208 *
209 * Increases the use count of the returned context, if not NULL.
210 */
211static inline struct ds_context *ds_get_context(struct task_struct *task) 197static inline struct ds_context *ds_get_context(struct task_struct *task)
212{ 198{
213 struct ds_context *context;
214
215 spin_lock(&ds_lock);
216
217 context = (task ? task->thread.ds_ctx : this_system_context);
218 if (context)
219 context->count++;
220
221 spin_unlock(&ds_lock);
222
223 return context;
224}
225
226/*
227 * Same as ds_get_context, but allocates the context and it's DS
228 * structure, if necessary; returns NULL; if out of memory.
229 *
230 * pre: requires ds_lock to be held
231 */
232static inline struct ds_context *ds_alloc_context(struct task_struct *task)
233{
234 struct ds_context **p_context = 199 struct ds_context **p_context =
235 (task ? &task->thread.ds_ctx : &this_system_context); 200 (task ? &task->thread.ds_ctx : &this_system_context);
236 struct ds_context *context = *p_context; 201 struct ds_context *context = *p_context;
202 unsigned long irq;
237 203
238 if (!context) { 204 if (!context) {
239 context = kzalloc(sizeof(*context), GFP_KERNEL); 205 context = kzalloc(sizeof(*context), GFP_KERNEL);
240
241 if (!context) 206 if (!context)
242 return NULL; 207 return NULL;
243 208
244 context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); 209 spin_lock_irqsave(&ds_lock, irq);
245 if (!context->ds) {
246 kfree(context);
247 return NULL;
248 }
249 210
250 *p_context = context; 211 if (*p_context) {
212 kfree(context);
251 213
252 context->this = p_context; 214 context = *p_context;
253 context->task = task; 215 } else {
216 *p_context = context;
254 217
255 if (task) 218 context->this = p_context;
256 set_tsk_thread_flag(task, TIF_DS_AREA_MSR); 219 context->task = task;
257 220
258 if (!task || (task == current)) 221 if (task)
259 wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); 222 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
260 223
261 get_tracer(task); 224 if (!task || (task == current))
225 wrmsrl(MSR_IA32_DS_AREA,
226 (unsigned long)context->ds);
227 }
228 spin_unlock_irqrestore(&ds_lock, irq);
262 } 229 }
263 230
264 context->count++; 231 context->count++;
@@ -266,16 +233,14 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
266 return context; 233 return context;
267} 234}
268 235
269/*
270 * Decreases the use count of the parameter context, if not NULL.
271 * Deallocates the context, if the use count reaches zero.
272 */
273static inline void ds_put_context(struct ds_context *context) 236static inline void ds_put_context(struct ds_context *context)
274{ 237{
238 unsigned long irq;
239
275 if (!context) 240 if (!context)
276 return; 241 return;
277 242
278 spin_lock(&ds_lock); 243 spin_lock_irqsave(&ds_lock, irq);
279 244
280 if (--context->count) 245 if (--context->count)
281 goto out; 246 goto out;
@@ -288,351 +253,351 @@ static inline void ds_put_context(struct ds_context *context)
288 if (!context->task || (context->task == current)) 253 if (!context->task || (context->task == current))
289 wrmsrl(MSR_IA32_DS_AREA, 0); 254 wrmsrl(MSR_IA32_DS_AREA, 0);
290 255
291 put_tracer(context->task);
292
293 /* free any leftover buffers from tracers that did not
294 * deallocate them properly. */
295 kfree(context->buffer[ds_bts]);
296 kfree(context->buffer[ds_pebs]);
297 kfree(context->ds);
298 kfree(context); 256 kfree(context);
299 out: 257 out:
300 spin_unlock(&ds_lock); 258 spin_unlock_irqrestore(&ds_lock, irq);
301} 259}
302 260
303 261
304/* 262/*
305 * Handle a buffer overflow 263 * Handle a buffer overflow
306 * 264 *
307 * task: the task whose buffers are overflowing;
308 * NULL for a buffer overflow on the current cpu
309 * context: the ds context 265 * context: the ds context
310 * qual: the buffer type 266 * qual: the buffer type
311 */ 267 */
312static void ds_overflow(struct task_struct *task, struct ds_context *context, 268static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
313 enum ds_qualifier qual) 269{
314{ 270 switch (qual) {
315 if (!context) 271 case ds_bts: {
316 return; 272 struct bts_tracer *tracer =
317 273 container_of(context->owner[qual],
318 if (context->callback[qual]) 274 struct bts_tracer, ds);
319 (*context->callback[qual])(task); 275 if (tracer->ovfl)
320 276 tracer->ovfl(tracer);
321 /* todo: do some more overflow handling */ 277 }
278 break;
279 case ds_pebs: {
280 struct pebs_tracer *tracer =
281 container_of(context->owner[qual],
282 struct pebs_tracer, ds);
283 if (tracer->ovfl)
284 tracer->ovfl(tracer);
285 }
286 break;
287 }
322} 288}
323 289
324 290
325/* 291static void ds_install_ds_config(struct ds_context *context,
326 * Allocate a non-pageable buffer of the parameter size. 292 enum ds_qualifier qual,
327 * Checks the memory and the locked memory rlimit. 293 void *base, size_t size, size_t ith)
328 *
329 * Returns the buffer, if successful;
330 * NULL, if out of memory or rlimit exceeded.
331 *
332 * size: the requested buffer size in bytes
333 * pages (out): if not NULL, contains the number of pages reserved
334 */
335static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
336{ 294{
337 unsigned long rlim, vm, pgsz; 295 unsigned long buffer, adj;
338 void *buffer;
339
340 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
341
342 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
343 vm = current->mm->total_vm + pgsz;
344 if (rlim < vm)
345 return NULL;
346 296
347 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 297 /* adjust the buffer address and size to meet alignment
348 vm = current->mm->locked_vm + pgsz; 298 * constraints:
349 if (rlim < vm) 299 * - buffer is double-word aligned
350 return NULL; 300 * - size is multiple of record size
301 *
302 * We checked the size at the very beginning; we have enough
303 * space to do the adjustment.
304 */
305 buffer = (unsigned long)base;
351 306
352 buffer = kzalloc(size, GFP_KERNEL); 307 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
353 if (!buffer) 308 buffer += adj;
354 return NULL; 309 size -= adj;
355 310
356 current->mm->total_vm += pgsz; 311 size /= ds_cfg.sizeof_rec[qual];
357 current->mm->locked_vm += pgsz; 312 size *= ds_cfg.sizeof_rec[qual];
358 313
359 if (pages) 314 ds_set(context->ds, qual, ds_buffer_base, buffer);
360 *pages = pgsz; 315 ds_set(context->ds, qual, ds_index, buffer);
316 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
361 317
362 return buffer; 318 /* The value for 'no threshold' is -1, which will set the
319 * threshold outside of the buffer, just like we want it.
320 */
321 ds_set(context->ds, qual,
322 ds_interrupt_threshold, buffer + size - ith);
363} 323}
364 324
365static int ds_request(struct task_struct *task, void *base, size_t size, 325static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
366 ds_ovfl_callback_t ovfl, enum ds_qualifier qual) 326 struct task_struct *task,
327 void *base, size_t size, size_t th)
367{ 328{
368 struct ds_context *context; 329 struct ds_context *context;
369 unsigned long buffer, adj; 330 unsigned long irq;
370 const unsigned long alignment = (1 << 3); 331 int error;
371 int error = 0;
372 332
333 error = -EOPNOTSUPP;
373 if (!ds_cfg.sizeof_ds) 334 if (!ds_cfg.sizeof_ds)
374 return -EOPNOTSUPP; 335 goto out;
375 336
376 /* we require some space to do alignment adjustments below */ 337 error = -EINVAL;
377 if (size < (alignment + ds_cfg.sizeof_rec[qual])) 338 if (!base)
378 return -EINVAL; 339 goto out;
379 340
380 /* buffer overflow notification is not yet implemented */ 341 /* we require some space to do alignment adjustments below */
381 if (ovfl) 342 error = -EINVAL;
382 return -EOPNOTSUPP; 343 if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
344 goto out;
383 345
346 if (th != (size_t)-1) {
347 th *= ds_cfg.sizeof_rec[qual];
384 348
385 spin_lock(&ds_lock); 349 error = -EINVAL;
350 if (size <= th)
351 goto out;
352 }
386 353
387 if (!check_tracer(task)) 354 tracer->buffer = base;
388 return -EPERM; 355 tracer->size = size;
389 356
390 error = -ENOMEM; 357 error = -ENOMEM;
391 context = ds_alloc_context(task); 358 context = ds_get_context(task);
392 if (!context) 359 if (!context)
393 goto out_unlock; 360 goto out;
394 361 tracer->context = context;
395 error = -EALREADY;
396 if (context->owner[qual] == current)
397 goto out_unlock;
398 error = -EPERM;
399 if (context->owner[qual] != NULL)
400 goto out_unlock;
401 context->owner[qual] = current;
402
403 spin_unlock(&ds_lock);
404 362
405 363
406 error = -ENOMEM; 364 spin_lock_irqsave(&ds_lock, irq);
407 if (!base) {
408 base = ds_allocate_buffer(size, &context->pages[qual]);
409 if (!base)
410 goto out_release;
411
412 context->buffer[qual] = base;
413 }
414 error = 0;
415 365
416 context->callback[qual] = ovfl; 366 error = -EPERM;
417 367 if (!check_tracer(task))
418 /* adjust the buffer address and size to meet alignment 368 goto out_unlock;
419 * constraints: 369 get_tracer(task);
420 * - buffer is double-word aligned
421 * - size is multiple of record size
422 *
423 * We checked the size at the very beginning; we have enough
424 * space to do the adjustment.
425 */
426 buffer = (unsigned long)base;
427 370
428 adj = ALIGN(buffer, alignment) - buffer; 371 error = -EPERM;
429 buffer += adj; 372 if (context->owner[qual])
430 size -= adj; 373 goto out_put_tracer;
374 context->owner[qual] = tracer;
431 375
432 size /= ds_cfg.sizeof_rec[qual]; 376 spin_unlock_irqrestore(&ds_lock, irq);
433 size *= ds_cfg.sizeof_rec[qual];
434 377
435 ds_set(context->ds, qual, ds_buffer_base, buffer);
436 ds_set(context->ds, qual, ds_index, buffer);
437 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
438 378
439 if (ovfl) { 379 ds_install_ds_config(context, qual, base, size, th);
440 /* todo: select a suitable interrupt threshold */
441 } else
442 ds_set(context->ds, qual,
443 ds_interrupt_threshold, buffer + size + 1);
444 380
445 /* we keep the context until ds_release */ 381 return 0;
446 return error;
447
448 out_release:
449 context->owner[qual] = NULL;
450 ds_put_context(context);
451 return error;
452 382
383 out_put_tracer:
384 put_tracer(task);
453 out_unlock: 385 out_unlock:
454 spin_unlock(&ds_lock); 386 spin_unlock_irqrestore(&ds_lock, irq);
455 ds_put_context(context); 387 ds_put_context(context);
388 tracer->context = NULL;
389 out:
456 return error; 390 return error;
457} 391}
458 392
459int ds_request_bts(struct task_struct *task, void *base, size_t size, 393struct bts_tracer *ds_request_bts(struct task_struct *task,
460 ds_ovfl_callback_t ovfl) 394 void *base, size_t size,
395 bts_ovfl_callback_t ovfl, size_t th)
461{ 396{
462 return ds_request(task, base, size, ovfl, ds_bts); 397 struct bts_tracer *tracer;
463} 398 int error;
464 399
465int ds_request_pebs(struct task_struct *task, void *base, size_t size, 400 /* buffer overflow notification is not yet implemented */
466 ds_ovfl_callback_t ovfl) 401 error = -EOPNOTSUPP;
467{ 402 if (ovfl)
468 return ds_request(task, base, size, ovfl, ds_pebs); 403 goto out;
404
405 error = -ENOMEM;
406 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
407 if (!tracer)
408 goto out;
409 tracer->ovfl = ovfl;
410
411 error = ds_request(&tracer->ds, ds_bts, task, base, size, th);
412 if (error < 0)
413 goto out_tracer;
414
415 return tracer;
416
417 out_tracer:
418 kfree(tracer);
419 out:
420 return ERR_PTR(error);
469} 421}
470 422
471static int ds_release(struct task_struct *task, enum ds_qualifier qual) 423struct pebs_tracer *ds_request_pebs(struct task_struct *task,
424 void *base, size_t size,
425 pebs_ovfl_callback_t ovfl, size_t th)
472{ 426{
473 struct ds_context *context; 427 struct pebs_tracer *tracer;
474 int error; 428 int error;
475 429
476 context = ds_get_context(task); 430 /* buffer overflow notification is not yet implemented */
477 error = ds_validate_access(context, qual); 431 error = -EOPNOTSUPP;
478 if (error < 0) 432 if (ovfl)
433 goto out;
434
435 error = -ENOMEM;
436 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
437 if (!tracer)
479 goto out; 438 goto out;
439 tracer->ovfl = ovfl;
480 440
481 kfree(context->buffer[qual]); 441 error = ds_request(&tracer->ds, ds_pebs, task, base, size, th);
482 context->buffer[qual] = NULL; 442 if (error < 0)
443 goto out_tracer;
483 444
484 current->mm->total_vm -= context->pages[qual]; 445 return tracer;
485 current->mm->locked_vm -= context->pages[qual];
486 context->pages[qual] = 0;
487 context->owner[qual] = NULL;
488 446
489 /* 447 out_tracer:
490 * we put the context twice: 448 kfree(tracer);
491 * once for the ds_get_context
492 * once for the corresponding ds_request
493 */
494 ds_put_context(context);
495 out: 449 out:
496 ds_put_context(context); 450 return ERR_PTR(error);
497 return error;
498} 451}
499 452
500int ds_release_bts(struct task_struct *task) 453static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual)
501{ 454{
502 return ds_release(task, ds_bts); 455 BUG_ON(tracer->context->owner[qual] != tracer);
456 tracer->context->owner[qual] = NULL;
457
458 put_tracer(tracer->context->task);
459 ds_put_context(tracer->context);
503} 460}
504 461
505int ds_release_pebs(struct task_struct *task) 462int ds_release_bts(struct bts_tracer *tracer)
506{ 463{
507 return ds_release(task, ds_pebs); 464 if (!tracer)
465 return -EINVAL;
466
467 ds_release(&tracer->ds, ds_bts);
468 kfree(tracer);
469
470 return 0;
508} 471}
509 472
510static int ds_get_index(struct task_struct *task, size_t *pos, 473int ds_release_pebs(struct pebs_tracer *tracer)
511 enum ds_qualifier qual)
512{ 474{
513 struct ds_context *context; 475 if (!tracer)
514 unsigned long base, index; 476 return -EINVAL;
515 int error;
516 477
517 context = ds_get_context(task); 478 ds_release(&tracer->ds, ds_pebs);
518 error = ds_validate_access(context, qual); 479 kfree(tracer);
519 if (error < 0) 480
520 goto out; 481 return 0;
482}
483
484static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual)
485{
486 unsigned long base, index;
521 487
522 base = ds_get(context->ds, qual, ds_buffer_base); 488 base = ds_get(context->ds, qual, ds_buffer_base);
523 index = ds_get(context->ds, qual, ds_index); 489 index = ds_get(context->ds, qual, ds_index);
524 490
525 error = ((index - base) / ds_cfg.sizeof_rec[qual]); 491 return (index - base) / ds_cfg.sizeof_rec[qual];
526 if (pos)
527 *pos = error;
528 out:
529 ds_put_context(context);
530 return error;
531} 492}
532 493
533int ds_get_bts_index(struct task_struct *task, size_t *pos) 494int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos)
534{ 495{
535 return ds_get_index(task, pos, ds_bts); 496 if (!tracer)
497 return -EINVAL;
498
499 if (!pos)
500 return -EINVAL;
501
502 *pos = ds_get_index(tracer->ds.context, ds_bts);
503
504 return 0;
536} 505}
537 506
538int ds_get_pebs_index(struct task_struct *task, size_t *pos) 507int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos)
539{ 508{
540 return ds_get_index(task, pos, ds_pebs); 509 if (!tracer)
510 return -EINVAL;
511
512 if (!pos)
513 return -EINVAL;
514
515 *pos = ds_get_index(tracer->ds.context, ds_pebs);
516
517 return 0;
541} 518}
542 519
543static int ds_get_end(struct task_struct *task, size_t *pos, 520static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual)
544 enum ds_qualifier qual)
545{ 521{
546 struct ds_context *context; 522 unsigned long base, max;
547 unsigned long base, end;
548 int error;
549
550 context = ds_get_context(task);
551 error = ds_validate_access(context, qual);
552 if (error < 0)
553 goto out;
554 523
555 base = ds_get(context->ds, qual, ds_buffer_base); 524 base = ds_get(context->ds, qual, ds_buffer_base);
556 end = ds_get(context->ds, qual, ds_absolute_maximum); 525 max = ds_get(context->ds, qual, ds_absolute_maximum);
557 526
558 error = ((end - base) / ds_cfg.sizeof_rec[qual]); 527 return (max - base) / ds_cfg.sizeof_rec[qual];
559 if (pos)
560 *pos = error;
561 out:
562 ds_put_context(context);
563 return error;
564} 528}
565 529
566int ds_get_bts_end(struct task_struct *task, size_t *pos) 530int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos)
567{ 531{
568 return ds_get_end(task, pos, ds_bts); 532 if (!tracer)
533 return -EINVAL;
534
535 if (!pos)
536 return -EINVAL;
537
538 *pos = ds_get_end(tracer->ds.context, ds_bts);
539
540 return 0;
569} 541}
570 542
571int ds_get_pebs_end(struct task_struct *task, size_t *pos) 543int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos)
572{ 544{
573 return ds_get_end(task, pos, ds_pebs); 545 if (!tracer)
546 return -EINVAL;
547
548 if (!pos)
549 return -EINVAL;
550
551 *pos = ds_get_end(tracer->ds.context, ds_pebs);
552
553 return 0;
574} 554}
575 555
576static int ds_access(struct task_struct *task, size_t index, 556static int ds_access(struct ds_context *context, enum ds_qualifier qual,
577 const void **record, enum ds_qualifier qual) 557 size_t index, const void **record)
578{ 558{
579 struct ds_context *context;
580 unsigned long base, idx; 559 unsigned long base, idx;
581 int error;
582 560
583 if (!record) 561 if (!record)
584 return -EINVAL; 562 return -EINVAL;
585 563
586 context = ds_get_context(task);
587 error = ds_validate_access(context, qual);
588 if (error < 0)
589 goto out;
590
591 base = ds_get(context->ds, qual, ds_buffer_base); 564 base = ds_get(context->ds, qual, ds_buffer_base);
592 idx = base + (index * ds_cfg.sizeof_rec[qual]); 565 idx = base + (index * ds_cfg.sizeof_rec[qual]);
593 566
594 error = -EINVAL;
595 if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) 567 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
596 goto out; 568 return -EINVAL;
597 569
598 *record = (const void *)idx; 570 *record = (const void *)idx;
599 error = ds_cfg.sizeof_rec[qual]; 571
600 out: 572 return ds_cfg.sizeof_rec[qual];
601 ds_put_context(context);
602 return error;
603} 573}
604 574
605int ds_access_bts(struct task_struct *task, size_t index, const void **record) 575int ds_access_bts(struct bts_tracer *tracer, size_t index,
576 const void **record)
606{ 577{
607 return ds_access(task, index, record, ds_bts); 578 if (!tracer)
579 return -EINVAL;
580
581 return ds_access(tracer->ds.context, ds_bts, index, record);
608} 582}
609 583
610int ds_access_pebs(struct task_struct *task, size_t index, const void **record) 584int ds_access_pebs(struct pebs_tracer *tracer, size_t index,
585 const void **record)
611{ 586{
612 return ds_access(task, index, record, ds_pebs); 587 if (!tracer)
588 return -EINVAL;
589
590 return ds_access(tracer->ds.context, ds_pebs, index, record);
613} 591}
614 592
615static int ds_write(struct task_struct *task, const void *record, size_t size, 593static int ds_write(struct ds_context *context, enum ds_qualifier qual,
616 enum ds_qualifier qual, int force) 594 const void *record, size_t size)
617{ 595{
618 struct ds_context *context; 596 int bytes_written = 0;
619 int error;
620 597
621 if (!record) 598 if (!record)
622 return -EINVAL; 599 return -EINVAL;
623 600
624 error = -EPERM;
625 context = ds_get_context(task);
626 if (!context)
627 goto out;
628
629 if (!force) {
630 error = ds_validate_access(context, qual);
631 if (error < 0)
632 goto out;
633 }
634
635 error = 0;
636 while (size) { 601 while (size) {
637 unsigned long base, index, end, write_end, int_th; 602 unsigned long base, index, end, write_end, int_th;
638 unsigned long write_size, adj_write_size; 603 unsigned long write_size, adj_write_size;
@@ -660,14 +625,14 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
660 write_end = end; 625 write_end = end;
661 626
662 if (write_end <= index) 627 if (write_end <= index)
663 goto out; 628 break;
664 629
665 write_size = min((unsigned long) size, write_end - index); 630 write_size = min((unsigned long) size, write_end - index);
666 memcpy((void *)index, record, write_size); 631 memcpy((void *)index, record, write_size);
667 632
668 record = (const char *)record + write_size; 633 record = (const char *)record + write_size;
669 size -= write_size; 634 size -= write_size;
670 error += write_size; 635 bytes_written += write_size;
671 636
672 adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; 637 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
673 adj_write_size *= ds_cfg.sizeof_rec[qual]; 638 adj_write_size *= ds_cfg.sizeof_rec[qual];
@@ -682,47 +647,32 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
682 ds_set(context->ds, qual, ds_index, index); 647 ds_set(context->ds, qual, ds_index, index);
683 648
684 if (index >= int_th) 649 if (index >= int_th)
685 ds_overflow(task, context, qual); 650 ds_overflow(context, qual);
686 } 651 }
687 652
688 out: 653 return bytes_written;
689 ds_put_context(context);
690 return error;
691} 654}
692 655
693int ds_write_bts(struct task_struct *task, const void *record, size_t size) 656int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size)
694{ 657{
695 return ds_write(task, record, size, ds_bts, /* force = */ 0); 658 if (!tracer)
696} 659 return -EINVAL;
697 660
698int ds_write_pebs(struct task_struct *task, const void *record, size_t size) 661 return ds_write(tracer->ds.context, ds_bts, record, size);
699{
700 return ds_write(task, record, size, ds_pebs, /* force = */ 0);
701} 662}
702 663
703int ds_unchecked_write_bts(struct task_struct *task, 664int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size)
704 const void *record, size_t size)
705{ 665{
706 return ds_write(task, record, size, ds_bts, /* force = */ 1); 666 if (!tracer)
707} 667 return -EINVAL;
708 668
709int ds_unchecked_write_pebs(struct task_struct *task, 669 return ds_write(tracer->ds.context, ds_pebs, record, size);
710 const void *record, size_t size)
711{
712 return ds_write(task, record, size, ds_pebs, /* force = */ 1);
713} 670}
714 671
715static int ds_reset_or_clear(struct task_struct *task, 672static void ds_reset_or_clear(struct ds_context *context,
716 enum ds_qualifier qual, int clear) 673 enum ds_qualifier qual, int clear)
717{ 674{
718 struct ds_context *context;
719 unsigned long base, end; 675 unsigned long base, end;
720 int error;
721
722 context = ds_get_context(task);
723 error = ds_validate_access(context, qual);
724 if (error < 0)
725 goto out;
726 676
727 base = ds_get(context->ds, qual, ds_buffer_base); 677 base = ds_get(context->ds, qual, ds_buffer_base);
728 end = ds_get(context->ds, qual, ds_absolute_maximum); 678 end = ds_get(context->ds, qual, ds_absolute_maximum);
@@ -731,89 +681,100 @@ static int ds_reset_or_clear(struct task_struct *task,
731 memset((void *)base, 0, end - base); 681 memset((void *)base, 0, end - base);
732 682
733 ds_set(context->ds, qual, ds_index, base); 683 ds_set(context->ds, qual, ds_index, base);
734
735 error = 0;
736 out:
737 ds_put_context(context);
738 return error;
739} 684}
740 685
741int ds_reset_bts(struct task_struct *task) 686int ds_reset_bts(struct bts_tracer *tracer)
742{ 687{
743 return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); 688 if (!tracer)
689 return -EINVAL;
690
691 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0);
692
693 return 0;
744} 694}
745 695
746int ds_reset_pebs(struct task_struct *task) 696int ds_reset_pebs(struct pebs_tracer *tracer)
747{ 697{
748 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); 698 if (!tracer)
699 return -EINVAL;
700
701 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0);
702
703 return 0;
749} 704}
750 705
751int ds_clear_bts(struct task_struct *task) 706int ds_clear_bts(struct bts_tracer *tracer)
752{ 707{
753 return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); 708 if (!tracer)
709 return -EINVAL;
710
711 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1);
712
713 return 0;
754} 714}
755 715
756int ds_clear_pebs(struct task_struct *task) 716int ds_clear_pebs(struct pebs_tracer *tracer)
757{ 717{
758 return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); 718 if (!tracer)
719 return -EINVAL;
720
721 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1);
722
723 return 0;
759} 724}
760 725
761int ds_get_pebs_reset(struct task_struct *task, u64 *value) 726int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value)
762{ 727{
763 struct ds_context *context; 728 if (!tracer)
764 int error; 729 return -EINVAL;
765 730
766 if (!value) 731 if (!value)
767 return -EINVAL; 732 return -EINVAL;
768 733
769 context = ds_get_context(task); 734 *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
770 error = ds_validate_access(context, ds_pebs);
771 if (error < 0)
772 goto out;
773 735
774 *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); 736 return 0;
775
776 error = 0;
777 out:
778 ds_put_context(context);
779 return error;
780} 737}
781 738
782int ds_set_pebs_reset(struct task_struct *task, u64 value) 739int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
783{ 740{
784 struct ds_context *context; 741 if (!tracer)
785 int error; 742 return -EINVAL;
786
787 context = ds_get_context(task);
788 error = ds_validate_access(context, ds_pebs);
789 if (error < 0)
790 goto out;
791 743
792 *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; 744 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
793 745
794 error = 0; 746 return 0;
795 out:
796 ds_put_context(context);
797 return error;
798} 747}
799 748
800static const struct ds_configuration ds_cfg_var = { 749static const struct ds_configuration ds_cfg_var = {
801 .sizeof_ds = sizeof(long) * 12, 750 .sizeof_ds = sizeof(long) * 12,
802 .sizeof_field = sizeof(long), 751 .sizeof_field = sizeof(long),
803 .sizeof_rec[ds_bts] = sizeof(long) * 3, 752 .sizeof_rec[ds_bts] = sizeof(long) * 3,
753#ifdef __i386__
804 .sizeof_rec[ds_pebs] = sizeof(long) * 10 754 .sizeof_rec[ds_pebs] = sizeof(long) * 10
755#else
756 .sizeof_rec[ds_pebs] = sizeof(long) * 18
757#endif
805}; 758};
806static const struct ds_configuration ds_cfg_64 = { 759static const struct ds_configuration ds_cfg_64 = {
807 .sizeof_ds = 8 * 12, 760 .sizeof_ds = 8 * 12,
808 .sizeof_field = 8, 761 .sizeof_field = 8,
809 .sizeof_rec[ds_bts] = 8 * 3, 762 .sizeof_rec[ds_bts] = 8 * 3,
763#ifdef __i386__
810 .sizeof_rec[ds_pebs] = 8 * 10 764 .sizeof_rec[ds_pebs] = 8 * 10
765#else
766 .sizeof_rec[ds_pebs] = 8 * 18
767#endif
811}; 768};
812 769
813static inline void 770static inline void
814ds_configure(const struct ds_configuration *cfg) 771ds_configure(const struct ds_configuration *cfg)
815{ 772{
816 ds_cfg = *cfg; 773 ds_cfg = *cfg;
774
775 printk(KERN_INFO "DS available\n");
776
777 BUG_ON(MAX_SIZEOF_DS < ds_cfg.sizeof_ds);
817} 778}
818 779
819void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) 780void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -821,17 +782,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
821 switch (c->x86) { 782 switch (c->x86) {
822 case 0x6: 783 case 0x6:
823 switch (c->x86_model) { 784 switch (c->x86_model) {
785 case 0 ... 0xC:
786 /* sorry, don't know about them */
787 break;
824 case 0xD: 788 case 0xD:
825 case 0xE: /* Pentium M */ 789 case 0xE: /* Pentium M */
826 ds_configure(&ds_cfg_var); 790 ds_configure(&ds_cfg_var);
827 break; 791 break;
828 case 0xF: /* Core2 */ 792 default: /* Core2, Atom, ... */
829 case 0x1C: /* Atom */
830 ds_configure(&ds_cfg_64); 793 ds_configure(&ds_cfg_64);
831 break; 794 break;
832 default:
833 /* sorry, don't know about them */
834 break;
835 } 795 }
836 break; 796 break;
837 case 0xF: 797 case 0xF:
@@ -858,7 +818,8 @@ void ds_free(struct ds_context *context)
858 * is dying. There should not be any user of that context left 818 * is dying. There should not be any user of that context left
859 * to disturb us, anymore. */ 819 * to disturb us, anymore. */
860 unsigned long leftovers = context->count; 820 unsigned long leftovers = context->count;
861 while (leftovers--) 821 while (leftovers--) {
822 put_tracer(context->task);
862 ds_put_context(context); 823 ds_put_context(context);
824 }
863} 825}
864#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
new file mode 100644
index 000000000000..6b1f6f6f8661
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.c
@@ -0,0 +1,351 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5#include <linux/kallsyms.h>
6#include <linux/kprobes.h>
7#include <linux/uaccess.h>
8#include <linux/utsname.h>
9#include <linux/hardirq.h>
10#include <linux/kdebug.h>
11#include <linux/module.h>
12#include <linux/ptrace.h>
13#include <linux/kexec.h>
14#include <linux/bug.h>
15#include <linux/nmi.h>
16#include <linux/sysfs.h>
17
18#include <asm/stacktrace.h>
19
20#include "dumpstack.h"
21
22int panic_on_unrecovered_nmi;
23unsigned int code_bytes = 64;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static int die_counter;
26
27void printk_address(unsigned long address, int reliable)
28{
29 printk(" [<%p>] %s%pS\n", (void *) address,
30 reliable ? "" : "? ", (void *) address);
31}
32
33#ifdef CONFIG_FUNCTION_GRAPH_TRACER
34static void
35print_ftrace_graph_addr(unsigned long addr, void *data,
36 const struct stacktrace_ops *ops,
37 struct thread_info *tinfo, int *graph)
38{
39 struct task_struct *task = tinfo->task;
40 unsigned long ret_addr;
41 int index = task->curr_ret_stack;
42
43 if (addr != (unsigned long)return_to_handler)
44 return;
45
46 if (!task->ret_stack || index < *graph)
47 return;
48
49 index -= *graph;
50 ret_addr = task->ret_stack[index].ret;
51
52 ops->address(data, ret_addr, 1);
53
54 (*graph)++;
55}
56#else
57static inline void
58print_ftrace_graph_addr(unsigned long addr, void *data,
59 const struct stacktrace_ops *ops,
60 struct thread_info *tinfo, int *graph)
61{ }
62#endif
63
64/*
65 * x86-64 can have up to three kernel stacks:
66 * process stack
67 * interrupt stack
68 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
69 */
70
71static inline int valid_stack_ptr(struct thread_info *tinfo,
72 void *p, unsigned int size, void *end)
73{
74 void *t = tinfo;
75 if (end) {
76 if (p < end && p >= (end-THREAD_SIZE))
77 return 1;
78 else
79 return 0;
80 }
81 return p > t && p < t + THREAD_SIZE - size;
82}
83
84unsigned long
85print_context_stack(struct thread_info *tinfo,
86 unsigned long *stack, unsigned long bp,
87 const struct stacktrace_ops *ops, void *data,
88 unsigned long *end, int *graph)
89{
90 struct stack_frame *frame = (struct stack_frame *)bp;
91
92 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
93 unsigned long addr;
94
95 addr = *stack;
96 if (__kernel_text_address(addr)) {
97 if ((unsigned long) stack == bp + sizeof(long)) {
98 ops->address(data, addr, 1);
99 frame = frame->next_frame;
100 bp = (unsigned long) frame;
101 } else {
102 ops->address(data, addr, bp == 0);
103 }
104 print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
105 }
106 stack++;
107 }
108 return bp;
109}
110
111
112static void
113print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
114{
115 printk(data);
116 print_symbol(msg, symbol);
117 printk("\n");
118}
119
120static void print_trace_warning(void *data, char *msg)
121{
122 printk("%s%s\n", (char *)data, msg);
123}
124
125static int print_trace_stack(void *data, char *name)
126{
127 printk("%s <%s> ", (char *)data, name);
128 return 0;
129}
130
131/*
132 * Print one address/symbol entries per line.
133 */
134static void print_trace_address(void *data, unsigned long addr, int reliable)
135{
136 touch_nmi_watchdog();
137 printk(data);
138 printk_address(addr, reliable);
139}
140
141static const struct stacktrace_ops print_trace_ops = {
142 .warning = print_trace_warning,
143 .warning_symbol = print_trace_warning_symbol,
144 .stack = print_trace_stack,
145 .address = print_trace_address,
146};
147
148void
149show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
150 unsigned long *stack, unsigned long bp, char *log_lvl)
151{
152 printk("%sCall Trace:\n", log_lvl);
153 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
154}
155
156void show_trace(struct task_struct *task, struct pt_regs *regs,
157 unsigned long *stack, unsigned long bp)
158{
159 show_trace_log_lvl(task, regs, stack, bp, "");
160}
161
162void show_stack(struct task_struct *task, unsigned long *sp)
163{
164 show_stack_log_lvl(task, NULL, sp, 0, "");
165}
166
167/*
168 * The architecture-independent dump_stack generator
169 */
170void dump_stack(void)
171{
172 unsigned long bp = 0;
173 unsigned long stack;
174
175#ifdef CONFIG_FRAME_POINTER
176 if (!bp)
177 get_bp(bp);
178#endif
179
180 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
181 current->pid, current->comm, print_tainted(),
182 init_utsname()->release,
183 (int)strcspn(init_utsname()->version, " "),
184 init_utsname()->version);
185 show_trace(NULL, NULL, &stack, bp);
186}
187EXPORT_SYMBOL(dump_stack);
188
189static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
190static int die_owner = -1;
191static unsigned int die_nest_count;
192
193unsigned __kprobes long oops_begin(void)
194{
195 int cpu;
196 unsigned long flags;
197
198 oops_enter();
199
200 /* racy, but better than risking deadlock. */
201 raw_local_irq_save(flags);
202 cpu = smp_processor_id();
203 if (!__raw_spin_trylock(&die_lock)) {
204 if (cpu == die_owner)
205 /* nested oops. should stop eventually */;
206 else
207 __raw_spin_lock(&die_lock);
208 }
209 die_nest_count++;
210 die_owner = cpu;
211 console_verbose();
212 bust_spinlocks(1);
213 return flags;
214}
215
216void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
217{
218 if (regs && kexec_should_crash(current))
219 crash_kexec(regs);
220
221 bust_spinlocks(0);
222 die_owner = -1;
223 add_taint(TAINT_DIE);
224 die_nest_count--;
225 if (!die_nest_count)
226 /* Nest count reaches zero, release the lock. */
227 __raw_spin_unlock(&die_lock);
228 raw_local_irq_restore(flags);
229 oops_exit();
230
231 if (!signr)
232 return;
233 if (in_interrupt())
234 panic("Fatal exception in interrupt");
235 if (panic_on_oops)
236 panic("Fatal exception");
237 do_exit(signr);
238}
239
240int __kprobes __die(const char *str, struct pt_regs *regs, long err)
241{
242#ifdef CONFIG_X86_32
243 unsigned short ss;
244 unsigned long sp;
245#endif
246 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
247#ifdef CONFIG_PREEMPT
248 printk("PREEMPT ");
249#endif
250#ifdef CONFIG_SMP
251 printk("SMP ");
252#endif
253#ifdef CONFIG_DEBUG_PAGEALLOC
254 printk("DEBUG_PAGEALLOC");
255#endif
256 printk("\n");
257 sysfs_printk_last_file();
258 if (notify_die(DIE_OOPS, str, regs, err,
259 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
260 return 1;
261
262 show_registers(regs);
263#ifdef CONFIG_X86_32
264 sp = (unsigned long) (&regs->sp);
265 savesegment(ss, ss);
266 if (user_mode(regs)) {
267 sp = regs->sp;
268 ss = regs->ss & 0xffff;
269 }
270 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
271 print_symbol("%s", regs->ip);
272 printk(" SS:ESP %04x:%08lx\n", ss, sp);
273#else
274 /* Executive summary in case the oops scrolled away */
275 printk(KERN_ALERT "RIP ");
276 printk_address(regs->ip, 1);
277 printk(" RSP <%016lx>\n", regs->sp);
278#endif
279 return 0;
280}
281
282/*
283 * This is gone through when something in the kernel has done something bad
284 * and is about to be terminated:
285 */
286void die(const char *str, struct pt_regs *regs, long err)
287{
288 unsigned long flags = oops_begin();
289 int sig = SIGSEGV;
290
291 if (!user_mode_vm(regs))
292 report_bug(regs->ip, regs);
293
294 if (__die(str, regs, err))
295 sig = 0;
296 oops_end(flags, regs, sig);
297}
298
299void notrace __kprobes
300die_nmi(char *str, struct pt_regs *regs, int do_panic)
301{
302 unsigned long flags;
303
304 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
305 return;
306
307 /*
308 * We are in trouble anyway, lets at least try
309 * to get a message out.
310 */
311 flags = oops_begin();
312 printk(KERN_EMERG "%s", str);
313 printk(" on CPU%d, ip %08lx, registers:\n",
314 smp_processor_id(), regs->ip);
315 show_registers(regs);
316 oops_end(flags, regs, 0);
317 if (do_panic || panic_on_oops)
318 panic("Non maskable interrupt");
319 nmi_exit();
320 local_irq_enable();
321 do_exit(SIGBUS);
322}
323
324static int __init oops_setup(char *s)
325{
326 if (!s)
327 return -EINVAL;
328 if (!strcmp(s, "panic"))
329 panic_on_oops = 1;
330 return 0;
331}
332early_param("oops", oops_setup);
333
334static int __init kstack_setup(char *s)
335{
336 if (!s)
337 return -EINVAL;
338 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
339 return 0;
340}
341early_param("kstack", kstack_setup);
342
343static int __init code_bytes_setup(char *s)
344{
345 code_bytes = simple_strtoul(s, NULL, 0);
346 if (code_bytes > 8192)
347 code_bytes = 8192;
348
349 return 1;
350}
351__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
new file mode 100644
index 000000000000..da87590b8698
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
6#ifndef DUMPSTACK_H
7#define DUMPSTACK_H
8
9#ifdef CONFIG_X86_32
10#define STACKSLOTS_PER_LINE 8
11#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
12#else
13#define STACKSLOTS_PER_LINE 4
14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
15#endif
16
17extern unsigned long
18print_context_stack(struct thread_info *tinfo,
19 unsigned long *stack, unsigned long bp,
20 const struct stacktrace_ops *ops, void *data,
21 unsigned long *end, int *graph);
22
23extern void
24show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
25 unsigned long *stack, unsigned long bp, char *log_lvl);
26
27extern void
28show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
29 unsigned long *sp, unsigned long bp, char *log_lvl);
30
31extern unsigned int code_bytes;
32extern int kstack_depth_to_print;
33
34/* The form of the top of the frame on the stack */
35struct stack_frame {
36 struct stack_frame *next_frame;
37 unsigned long return_address;
38};
39#endif
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index b3614752197b..d593cd1f58dc 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,69 +17,14 @@
17 17
18#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
19 19
20#define STACKSLOTS_PER_LINE 8 20#include "dumpstack.h"
21#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
22
23int panic_on_unrecovered_nmi;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static unsigned int code_bytes = 64;
26static int die_counter;
27
28void printk_address(unsigned long address, int reliable)
29{
30 printk(" [<%p>] %s%pS\n", (void *) address,
31 reliable ? "" : "? ", (void *) address);
32}
33
34static inline int valid_stack_ptr(struct thread_info *tinfo,
35 void *p, unsigned int size, void *end)
36{
37 void *t = tinfo;
38 if (end) {
39 if (p < end && p >= (end-THREAD_SIZE))
40 return 1;
41 else
42 return 0;
43 }
44 return p > t && p < t + THREAD_SIZE - size;
45}
46
47/* The form of the top of the frame on the stack */
48struct stack_frame {
49 struct stack_frame *next_frame;
50 unsigned long return_address;
51};
52
53static inline unsigned long
54print_context_stack(struct thread_info *tinfo,
55 unsigned long *stack, unsigned long bp,
56 const struct stacktrace_ops *ops, void *data,
57 unsigned long *end)
58{
59 struct stack_frame *frame = (struct stack_frame *)bp;
60
61 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
62 unsigned long addr;
63
64 addr = *stack;
65 if (__kernel_text_address(addr)) {
66 if ((unsigned long) stack == bp + sizeof(long)) {
67 ops->address(data, addr, 1);
68 frame = frame->next_frame;
69 bp = (unsigned long) frame;
70 } else {
71 ops->address(data, addr, bp == 0);
72 }
73 }
74 stack++;
75 }
76 return bp;
77}
78 21
79void dump_trace(struct task_struct *task, struct pt_regs *regs, 22void dump_trace(struct task_struct *task, struct pt_regs *regs,
80 unsigned long *stack, unsigned long bp, 23 unsigned long *stack, unsigned long bp,
81 const struct stacktrace_ops *ops, void *data) 24 const struct stacktrace_ops *ops, void *data)
82{ 25{
26 int graph = 0;
27
83 if (!task) 28 if (!task)
84 task = current; 29 task = current;
85 30
@@ -107,7 +52,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
107 52
108 context = (struct thread_info *) 53 context = (struct thread_info *)
109 ((unsigned long)stack & (~(THREAD_SIZE - 1))); 54 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
110 bp = print_context_stack(context, stack, bp, ops, data, NULL); 55 bp = print_context_stack(context, stack, bp, ops,
56 data, NULL, &graph);
111 57
112 stack = (unsigned long *)context->previous_esp; 58 stack = (unsigned long *)context->previous_esp;
113 if (!stack) 59 if (!stack)
@@ -119,57 +65,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
119} 65}
120EXPORT_SYMBOL(dump_trace); 66EXPORT_SYMBOL(dump_trace);
121 67
122static void 68void
123print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
124{
125 printk(data);
126 print_symbol(msg, symbol);
127 printk("\n");
128}
129
130static void print_trace_warning(void *data, char *msg)
131{
132 printk("%s%s\n", (char *)data, msg);
133}
134
135static int print_trace_stack(void *data, char *name)
136{
137 printk("%s <%s> ", (char *)data, name);
138 return 0;
139}
140
141/*
142 * Print one address/symbol entries per line.
143 */
144static void print_trace_address(void *data, unsigned long addr, int reliable)
145{
146 touch_nmi_watchdog();
147 printk(data);
148 printk_address(addr, reliable);
149}
150
151static const struct stacktrace_ops print_trace_ops = {
152 .warning = print_trace_warning,
153 .warning_symbol = print_trace_warning_symbol,
154 .stack = print_trace_stack,
155 .address = print_trace_address,
156};
157
158static void
159show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
160 unsigned long *stack, unsigned long bp, char *log_lvl)
161{
162 printk("%sCall Trace:\n", log_lvl);
163 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
164}
165
166void show_trace(struct task_struct *task, struct pt_regs *regs,
167 unsigned long *stack, unsigned long bp)
168{
169 show_trace_log_lvl(task, regs, stack, bp, "");
170}
171
172static void
173show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 69show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
174 unsigned long *sp, unsigned long bp, char *log_lvl) 70 unsigned long *sp, unsigned long bp, char *log_lvl)
175{ 71{
@@ -196,33 +92,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
196 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 92 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
197} 93}
198 94
199void show_stack(struct task_struct *task, unsigned long *sp)
200{
201 show_stack_log_lvl(task, NULL, sp, 0, "");
202}
203
204/*
205 * The architecture-independent dump_stack generator
206 */
207void dump_stack(void)
208{
209 unsigned long bp = 0;
210 unsigned long stack;
211
212#ifdef CONFIG_FRAME_POINTER
213 if (!bp)
214 get_bp(bp);
215#endif
216
217 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
218 current->pid, current->comm, print_tainted(),
219 init_utsname()->release,
220 (int)strcspn(init_utsname()->version, " "),
221 init_utsname()->version);
222 show_trace(NULL, NULL, &stack, bp);
223}
224
225EXPORT_SYMBOL(dump_stack);
226 95
227void show_registers(struct pt_regs *regs) 96void show_registers(struct pt_regs *regs)
228{ 97{
@@ -283,167 +152,3 @@ int is_valid_bugaddr(unsigned long ip)
283 return ud2 == 0x0b0f; 152 return ud2 == 0x0b0f;
284} 153}
285 154
286static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
287static int die_owner = -1;
288static unsigned int die_nest_count;
289
290unsigned __kprobes long oops_begin(void)
291{
292 unsigned long flags;
293
294 oops_enter();
295
296 if (die_owner != raw_smp_processor_id()) {
297 console_verbose();
298 raw_local_irq_save(flags);
299 __raw_spin_lock(&die_lock);
300 die_owner = smp_processor_id();
301 die_nest_count = 0;
302 bust_spinlocks(1);
303 } else {
304 raw_local_irq_save(flags);
305 }
306 die_nest_count++;
307 return flags;
308}
309
310void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
311{
312 bust_spinlocks(0);
313 die_owner = -1;
314 add_taint(TAINT_DIE);
315 __raw_spin_unlock(&die_lock);
316 raw_local_irq_restore(flags);
317
318 if (!regs)
319 return;
320
321 if (kexec_should_crash(current))
322 crash_kexec(regs);
323 if (in_interrupt())
324 panic("Fatal exception in interrupt");
325 if (panic_on_oops)
326 panic("Fatal exception");
327 oops_exit();
328 do_exit(signr);
329}
330
331int __kprobes __die(const char *str, struct pt_regs *regs, long err)
332{
333 unsigned short ss;
334 unsigned long sp;
335
336 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
337#ifdef CONFIG_PREEMPT
338 printk("PREEMPT ");
339#endif
340#ifdef CONFIG_SMP
341 printk("SMP ");
342#endif
343#ifdef CONFIG_DEBUG_PAGEALLOC
344 printk("DEBUG_PAGEALLOC");
345#endif
346 printk("\n");
347 sysfs_printk_last_file();
348 if (notify_die(DIE_OOPS, str, regs, err,
349 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
350 return 1;
351
352 show_registers(regs);
353 /* Executive summary in case the oops scrolled away */
354 sp = (unsigned long) (&regs->sp);
355 savesegment(ss, ss);
356 if (user_mode(regs)) {
357 sp = regs->sp;
358 ss = regs->ss & 0xffff;
359 }
360 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
361 print_symbol("%s", regs->ip);
362 printk(" SS:ESP %04x:%08lx\n", ss, sp);
363 return 0;
364}
365
366/*
367 * This is gone through when something in the kernel has done something bad
368 * and is about to be terminated:
369 */
370void die(const char *str, struct pt_regs *regs, long err)
371{
372 unsigned long flags = oops_begin();
373
374 if (die_nest_count < 3) {
375 report_bug(regs->ip, regs);
376
377 if (__die(str, regs, err))
378 regs = NULL;
379 } else {
380 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
381 }
382
383 oops_end(flags, regs, SIGSEGV);
384}
385
386static DEFINE_SPINLOCK(nmi_print_lock);
387
388void notrace __kprobes
389die_nmi(char *str, struct pt_regs *regs, int do_panic)
390{
391 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
392 return;
393
394 spin_lock(&nmi_print_lock);
395 /*
396 * We are in trouble anyway, lets at least try
397 * to get a message out:
398 */
399 bust_spinlocks(1);
400 printk(KERN_EMERG "%s", str);
401 printk(" on CPU%d, ip %08lx, registers:\n",
402 smp_processor_id(), regs->ip);
403 show_registers(regs);
404 if (do_panic)
405 panic("Non maskable interrupt");
406 console_silent();
407 spin_unlock(&nmi_print_lock);
408
409 /*
410 * If we are in kernel we are probably nested up pretty bad
411 * and might aswell get out now while we still can:
412 */
413 if (!user_mode_vm(regs)) {
414 current->thread.trap_no = 2;
415 crash_kexec(regs);
416 }
417
418 bust_spinlocks(0);
419 do_exit(SIGSEGV);
420}
421
422static int __init oops_setup(char *s)
423{
424 if (!s)
425 return -EINVAL;
426 if (!strcmp(s, "panic"))
427 panic_on_oops = 1;
428 return 0;
429}
430early_param("oops", oops_setup);
431
432static int __init kstack_setup(char *s)
433{
434 if (!s)
435 return -EINVAL;
436 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
437 return 0;
438}
439early_param("kstack", kstack_setup);
440
441static int __init code_bytes_setup(char *s)
442{
443 code_bytes = simple_strtoul(s, NULL, 0);
444 if (code_bytes > 8192)
445 code_bytes = 8192;
446
447 return 1;
448}
449__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 96a5db7da8a7..c302d0707048 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -17,19 +17,7 @@
17 17
18#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
19 19
20#define STACKSLOTS_PER_LINE 4 20#include "dumpstack.h"
21#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
22
23int panic_on_unrecovered_nmi;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static unsigned int code_bytes = 64;
26static int die_counter;
27
28void printk_address(unsigned long address, int reliable)
29{
30 printk(" [<%p>] %s%pS\n", (void *) address,
31 reliable ? "" : "? ", (void *) address);
32}
33 21
34static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 22static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
35 unsigned *usedp, char **idp) 23 unsigned *usedp, char **idp)
@@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
113 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 101 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
114 */ 102 */
115 103
116static inline int valid_stack_ptr(struct thread_info *tinfo,
117 void *p, unsigned int size, void *end)
118{
119 void *t = tinfo;
120 if (end) {
121 if (p < end && p >= (end-THREAD_SIZE))
122 return 1;
123 else
124 return 0;
125 }
126 return p > t && p < t + THREAD_SIZE - size;
127}
128
129/* The form of the top of the frame on the stack */
130struct stack_frame {
131 struct stack_frame *next_frame;
132 unsigned long return_address;
133};
134
135static inline unsigned long
136print_context_stack(struct thread_info *tinfo,
137 unsigned long *stack, unsigned long bp,
138 const struct stacktrace_ops *ops, void *data,
139 unsigned long *end)
140{
141 struct stack_frame *frame = (struct stack_frame *)bp;
142
143 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
144 unsigned long addr;
145
146 addr = *stack;
147 if (__kernel_text_address(addr)) {
148 if ((unsigned long) stack == bp + sizeof(long)) {
149 ops->address(data, addr, 1);
150 frame = frame->next_frame;
151 bp = (unsigned long) frame;
152 } else {
153 ops->address(data, addr, bp == 0);
154 }
155 }
156 stack++;
157 }
158 return bp;
159}
160
161void dump_trace(struct task_struct *task, struct pt_regs *regs, 104void dump_trace(struct task_struct *task, struct pt_regs *regs,
162 unsigned long *stack, unsigned long bp, 105 unsigned long *stack, unsigned long bp,
163 const struct stacktrace_ops *ops, void *data) 106 const struct stacktrace_ops *ops, void *data)
@@ -166,6 +109,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
166 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 109 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
167 unsigned used = 0; 110 unsigned used = 0;
168 struct thread_info *tinfo; 111 struct thread_info *tinfo;
112 int graph = 0;
169 113
170 if (!task) 114 if (!task)
171 task = current; 115 task = current;
@@ -206,7 +150,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
206 break; 150 break;
207 151
208 bp = print_context_stack(tinfo, stack, bp, ops, 152 bp = print_context_stack(tinfo, stack, bp, ops,
209 data, estack_end); 153 data, estack_end, &graph);
210 ops->stack(data, "<EOE>"); 154 ops->stack(data, "<EOE>");
211 /* 155 /*
212 * We link to the next stack via the 156 * We link to the next stack via the
@@ -225,7 +169,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
225 if (ops->stack(data, "IRQ") < 0) 169 if (ops->stack(data, "IRQ") < 0)
226 break; 170 break;
227 bp = print_context_stack(tinfo, stack, bp, 171 bp = print_context_stack(tinfo, stack, bp,
228 ops, data, irqstack_end); 172 ops, data, irqstack_end, &graph);
229 /* 173 /*
230 * We link to the next stack (which would be 174 * We link to the next stack (which would be
231 * the process stack normally) the last 175 * the process stack normally) the last
@@ -243,62 +187,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
243 /* 187 /*
244 * This handles the process stack: 188 * This handles the process stack:
245 */ 189 */
246 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); 190 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph);
247 put_cpu(); 191 put_cpu();
248} 192}
249EXPORT_SYMBOL(dump_trace); 193EXPORT_SYMBOL(dump_trace);
250 194
251static void 195void
252print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
253{
254 printk(data);
255 print_symbol(msg, symbol);
256 printk("\n");
257}
258
259static void print_trace_warning(void *data, char *msg)
260{
261 printk("%s%s\n", (char *)data, msg);
262}
263
264static int print_trace_stack(void *data, char *name)
265{
266 printk("%s <%s> ", (char *)data, name);
267 return 0;
268}
269
270/*
271 * Print one address/symbol entries per line.
272 */
273static void print_trace_address(void *data, unsigned long addr, int reliable)
274{
275 touch_nmi_watchdog();
276 printk(data);
277 printk_address(addr, reliable);
278}
279
280static const struct stacktrace_ops print_trace_ops = {
281 .warning = print_trace_warning,
282 .warning_symbol = print_trace_warning_symbol,
283 .stack = print_trace_stack,
284 .address = print_trace_address,
285};
286
287static void
288show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
289 unsigned long *stack, unsigned long bp, char *log_lvl)
290{
291 printk("%sCall Trace:\n", log_lvl);
292 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
293}
294
295void show_trace(struct task_struct *task, struct pt_regs *regs,
296 unsigned long *stack, unsigned long bp)
297{
298 show_trace_log_lvl(task, regs, stack, bp, "");
299}
300
301static void
302show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 196show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
303 unsigned long *sp, unsigned long bp, char *log_lvl) 197 unsigned long *sp, unsigned long bp, char *log_lvl)
304{ 198{
@@ -342,33 +236,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
342 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 236 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
343} 237}
344 238
345void show_stack(struct task_struct *task, unsigned long *sp)
346{
347 show_stack_log_lvl(task, NULL, sp, 0, "");
348}
349
350/*
351 * The architecture-independent dump_stack generator
352 */
353void dump_stack(void)
354{
355 unsigned long bp = 0;
356 unsigned long stack;
357
358#ifdef CONFIG_FRAME_POINTER
359 if (!bp)
360 get_bp(bp);
361#endif
362
363 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
364 current->pid, current->comm, print_tainted(),
365 init_utsname()->release,
366 (int)strcspn(init_utsname()->version, " "),
367 init_utsname()->version);
368 show_trace(NULL, NULL, &stack, bp);
369}
370EXPORT_SYMBOL(dump_stack);
371
372void show_registers(struct pt_regs *regs) 239void show_registers(struct pt_regs *regs)
373{ 240{
374 int i; 241 int i;
@@ -429,147 +296,3 @@ int is_valid_bugaddr(unsigned long ip)
429 return ud2 == 0x0b0f; 296 return ud2 == 0x0b0f;
430} 297}
431 298
432static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
433static int die_owner = -1;
434static unsigned int die_nest_count;
435
436unsigned __kprobes long oops_begin(void)
437{
438 int cpu;
439 unsigned long flags;
440
441 oops_enter();
442
443 /* racy, but better than risking deadlock. */
444 raw_local_irq_save(flags);
445 cpu = smp_processor_id();
446 if (!__raw_spin_trylock(&die_lock)) {
447 if (cpu == die_owner)
448 /* nested oops. should stop eventually */;
449 else
450 __raw_spin_lock(&die_lock);
451 }
452 die_nest_count++;
453 die_owner = cpu;
454 console_verbose();
455 bust_spinlocks(1);
456 return flags;
457}
458
459void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
460{
461 die_owner = -1;
462 bust_spinlocks(0);
463 die_nest_count--;
464 if (!die_nest_count)
465 /* Nest count reaches zero, release the lock. */
466 __raw_spin_unlock(&die_lock);
467 raw_local_irq_restore(flags);
468 if (!regs) {
469 oops_exit();
470 return;
471 }
472 if (in_interrupt())
473 panic("Fatal exception in interrupt");
474 if (panic_on_oops)
475 panic("Fatal exception");
476 oops_exit();
477 do_exit(signr);
478}
479
480int __kprobes __die(const char *str, struct pt_regs *regs, long err)
481{
482 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
483#ifdef CONFIG_PREEMPT
484 printk("PREEMPT ");
485#endif
486#ifdef CONFIG_SMP
487 printk("SMP ");
488#endif
489#ifdef CONFIG_DEBUG_PAGEALLOC
490 printk("DEBUG_PAGEALLOC");
491#endif
492 printk("\n");
493 sysfs_printk_last_file();
494 if (notify_die(DIE_OOPS, str, regs, err,
495 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
496 return 1;
497
498 show_registers(regs);
499 add_taint(TAINT_DIE);
500 /* Executive summary in case the oops scrolled away */
501 printk(KERN_ALERT "RIP ");
502 printk_address(regs->ip, 1);
503 printk(" RSP <%016lx>\n", regs->sp);
504 if (kexec_should_crash(current))
505 crash_kexec(regs);
506 return 0;
507}
508
509void die(const char *str, struct pt_regs *regs, long err)
510{
511 unsigned long flags = oops_begin();
512
513 if (!user_mode(regs))
514 report_bug(regs->ip, regs);
515
516 if (__die(str, regs, err))
517 regs = NULL;
518 oops_end(flags, regs, SIGSEGV);
519}
520
521notrace __kprobes void
522die_nmi(char *str, struct pt_regs *regs, int do_panic)
523{
524 unsigned long flags;
525
526 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
527 return;
528
529 flags = oops_begin();
530 /*
531 * We are in trouble anyway, lets at least try
532 * to get a message out.
533 */
534 printk(KERN_EMERG "%s", str);
535 printk(" on CPU%d, ip %08lx, registers:\n",
536 smp_processor_id(), regs->ip);
537 show_registers(regs);
538 if (kexec_should_crash(current))
539 crash_kexec(regs);
540 if (do_panic || panic_on_oops)
541 panic("Non maskable interrupt");
542 oops_end(flags, NULL, SIGBUS);
543 nmi_exit();
544 local_irq_enable();
545 do_exit(SIGBUS);
546}
547
548static int __init oops_setup(char *s)
549{
550 if (!s)
551 return -EINVAL;
552 if (!strcmp(s, "panic"))
553 panic_on_oops = 1;
554 return 0;
555}
556early_param("oops", oops_setup);
557
558static int __init kstack_setup(char *s)
559{
560 if (!s)
561 return -EINVAL;
562 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
563 return 0;
564}
565early_param("kstack", kstack_setup);
566
567static int __init code_bytes_setup(char *s)
568{
569 code_bytes = simple_strtoul(s, NULL, 0);
570 if (code_bytes > 8192)
571 code_bytes = 8192;
572
573 return 1;
574}
575__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 3ce029ffaa55..1b894b72c0f5 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -188,20 +188,6 @@ static void __init ati_bugs_contd(int num, int slot, int func)
188} 188}
189#endif 189#endif
190 190
191#ifdef CONFIG_DMAR
192static void __init intel_g33_dmar(int num, int slot, int func)
193{
194 struct acpi_table_header *dmar_tbl;
195 acpi_status status;
196
197 status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
198 if (ACPI_SUCCESS(status)) {
199 printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n");
200 dmar_disabled = 1;
201 }
202}
203#endif
204
205#define QFLAG_APPLY_ONCE 0x1 191#define QFLAG_APPLY_ONCE 0x1
206#define QFLAG_APPLIED 0x2 192#define QFLAG_APPLIED 0x2
207#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) 193#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -225,10 +211,6 @@ static struct chipset early_qrk[] __initdata = {
225 PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, 211 PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
226 { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, 212 { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
227 PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, 213 PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
228#ifdef CONFIG_DMAR
229 { PCI_VENDOR_ID_INTEL, 0x29c0,
230 PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar },
231#endif
232 {} 214 {}
233}; 215};
234 216
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca1..43ceb3f454bf 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1157,6 +1157,9 @@ ENTRY(mcount)
1157END(mcount) 1157END(mcount)
1158 1158
1159ENTRY(ftrace_caller) 1159ENTRY(ftrace_caller)
1160 cmpl $0, function_trace_stop
1161 jne ftrace_stub
1162
1160 pushl %eax 1163 pushl %eax
1161 pushl %ecx 1164 pushl %ecx
1162 pushl %edx 1165 pushl %edx
@@ -1171,6 +1174,11 @@ ftrace_call:
1171 popl %edx 1174 popl %edx
1172 popl %ecx 1175 popl %ecx
1173 popl %eax 1176 popl %eax
1177#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1178.globl ftrace_graph_call
1179ftrace_graph_call:
1180 jmp ftrace_stub
1181#endif
1174 1182
1175.globl ftrace_stub 1183.globl ftrace_stub
1176ftrace_stub: 1184ftrace_stub:
@@ -1180,8 +1188,18 @@ END(ftrace_caller)
1180#else /* ! CONFIG_DYNAMIC_FTRACE */ 1188#else /* ! CONFIG_DYNAMIC_FTRACE */
1181 1189
1182ENTRY(mcount) 1190ENTRY(mcount)
1191 cmpl $0, function_trace_stop
1192 jne ftrace_stub
1193
1183 cmpl $ftrace_stub, ftrace_trace_function 1194 cmpl $ftrace_stub, ftrace_trace_function
1184 jnz trace 1195 jnz trace
1196#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1197 cmpl $ftrace_stub, ftrace_graph_return
1198 jnz ftrace_graph_caller
1199
1200 cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
1201 jnz ftrace_graph_caller
1202#endif
1185.globl ftrace_stub 1203.globl ftrace_stub
1186ftrace_stub: 1204ftrace_stub:
1187 ret 1205 ret
@@ -1200,12 +1218,43 @@ trace:
1200 popl %edx 1218 popl %edx
1201 popl %ecx 1219 popl %ecx
1202 popl %eax 1220 popl %eax
1203
1204 jmp ftrace_stub 1221 jmp ftrace_stub
1205END(mcount) 1222END(mcount)
1206#endif /* CONFIG_DYNAMIC_FTRACE */ 1223#endif /* CONFIG_DYNAMIC_FTRACE */
1207#endif /* CONFIG_FUNCTION_TRACER */ 1224#endif /* CONFIG_FUNCTION_TRACER */
1208 1225
1226#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1227ENTRY(ftrace_graph_caller)
1228 cmpl $0, function_trace_stop
1229 jne ftrace_stub
1230
1231 pushl %eax
1232 pushl %ecx
1233 pushl %edx
1234 movl 0xc(%esp), %edx
1235 lea 0x4(%ebp), %eax
1236 subl $MCOUNT_INSN_SIZE, %edx
1237 call prepare_ftrace_return
1238 popl %edx
1239 popl %ecx
1240 popl %eax
1241 ret
1242END(ftrace_graph_caller)
1243
1244.globl return_to_handler
1245return_to_handler:
1246 pushl $0
1247 pushl %eax
1248 pushl %ecx
1249 pushl %edx
1250 call ftrace_return_to_handler
1251 movl %eax, 0xc(%esp)
1252 popl %edx
1253 popl %ecx
1254 popl %eax
1255 ret
1256#endif
1257
1209.section .rodata,"a" 1258.section .rodata,"a"
1210#include "syscall_table_32.S" 1259#include "syscall_table_32.S"
1211 1260
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..54e0bbdccb99 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -68,6 +68,8 @@ ENTRY(mcount)
68END(mcount) 68END(mcount)
69 69
70ENTRY(ftrace_caller) 70ENTRY(ftrace_caller)
71 cmpl $0, function_trace_stop
72 jne ftrace_stub
71 73
72 /* taken from glibc */ 74 /* taken from glibc */
73 subq $0x38, %rsp 75 subq $0x38, %rsp
@@ -96,6 +98,12 @@ ftrace_call:
96 movq (%rsp), %rax 98 movq (%rsp), %rax
97 addq $0x38, %rsp 99 addq $0x38, %rsp
98 100
101#ifdef CONFIG_FUNCTION_GRAPH_TRACER
102.globl ftrace_graph_call
103ftrace_graph_call:
104 jmp ftrace_stub
105#endif
106
99.globl ftrace_stub 107.globl ftrace_stub
100ftrace_stub: 108ftrace_stub:
101 retq 109 retq
@@ -103,8 +111,20 @@ END(ftrace_caller)
103 111
104#else /* ! CONFIG_DYNAMIC_FTRACE */ 112#else /* ! CONFIG_DYNAMIC_FTRACE */
105ENTRY(mcount) 113ENTRY(mcount)
114 cmpl $0, function_trace_stop
115 jne ftrace_stub
116
106 cmpq $ftrace_stub, ftrace_trace_function 117 cmpq $ftrace_stub, ftrace_trace_function
107 jnz trace 118 jnz trace
119
120#ifdef CONFIG_FUNCTION_GRAPH_TRACER
121 cmpq $ftrace_stub, ftrace_graph_return
122 jnz ftrace_graph_caller
123
124 cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
125 jnz ftrace_graph_caller
126#endif
127
108.globl ftrace_stub 128.globl ftrace_stub
109ftrace_stub: 129ftrace_stub:
110 retq 130 retq
@@ -140,6 +160,69 @@ END(mcount)
140#endif /* CONFIG_DYNAMIC_FTRACE */ 160#endif /* CONFIG_DYNAMIC_FTRACE */
141#endif /* CONFIG_FUNCTION_TRACER */ 161#endif /* CONFIG_FUNCTION_TRACER */
142 162
163#ifdef CONFIG_FUNCTION_GRAPH_TRACER
164ENTRY(ftrace_graph_caller)
165 cmpl $0, function_trace_stop
166 jne ftrace_stub
167
168 subq $0x38, %rsp
169 movq %rax, (%rsp)
170 movq %rcx, 8(%rsp)
171 movq %rdx, 16(%rsp)
172 movq %rsi, 24(%rsp)
173 movq %rdi, 32(%rsp)
174 movq %r8, 40(%rsp)
175 movq %r9, 48(%rsp)
176
177 leaq 8(%rbp), %rdi
178 movq 0x38(%rsp), %rsi
179 subq $MCOUNT_INSN_SIZE, %rsi
180
181 call prepare_ftrace_return
182
183 movq 48(%rsp), %r9
184 movq 40(%rsp), %r8
185 movq 32(%rsp), %rdi
186 movq 24(%rsp), %rsi
187 movq 16(%rsp), %rdx
188 movq 8(%rsp), %rcx
189 movq (%rsp), %rax
190 addq $0x38, %rsp
191 retq
192END(ftrace_graph_caller)
193
194
195.globl return_to_handler
196return_to_handler:
197 subq $80, %rsp
198
199 movq %rax, (%rsp)
200 movq %rcx, 8(%rsp)
201 movq %rdx, 16(%rsp)
202 movq %rsi, 24(%rsp)
203 movq %rdi, 32(%rsp)
204 movq %r8, 40(%rsp)
205 movq %r9, 48(%rsp)
206 movq %r10, 56(%rsp)
207 movq %r11, 64(%rsp)
208
209 call ftrace_return_to_handler
210
211 movq %rax, 72(%rsp)
212 movq 64(%rsp), %r11
213 movq 56(%rsp), %r10
214 movq 48(%rsp), %r9
215 movq 40(%rsp), %r8
216 movq 32(%rsp), %rdi
217 movq 24(%rsp), %rsi
218 movq 16(%rsp), %rdx
219 movq 8(%rsp), %rcx
220 movq (%rsp), %rax
221 addq $72, %rsp
222 retq
223#endif
224
225
143#ifndef CONFIG_PREEMPT 226#ifndef CONFIG_PREEMPT
144#define retint_kernel retint_restore_args 227#define retint_kernel retint_restore_args
145#endif 228#endif
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index f454c78fcef6..53699c931ad4 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -38,8 +38,11 @@
38#include <asm/io.h> 38#include <asm/io.h>
39#include <asm/nmi.h> 39#include <asm/nmi.h>
40#include <asm/smp.h> 40#include <asm/smp.h>
41#include <asm/atomic.h>
41#include <asm/apicdef.h> 42#include <asm/apicdef.h>
42#include <mach_mpparse.h> 43#include <mach_mpparse.h>
44#include <asm/genapic.h>
45#include <asm/setup.h>
43 46
44/* 47/*
45 * ES7000 chipsets 48 * ES7000 chipsets
@@ -161,6 +164,43 @@ es7000_rename_gsi(int ioapic, int gsi)
161 return gsi; 164 return gsi;
162} 165}
163 166
167static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
168{
169 unsigned long vect = 0, psaival = 0;
170
171 if (psai == NULL)
172 return -1;
173
174 vect = ((unsigned long)__pa(eip)/0x1000) << 16;
175 psaival = (0x1000000 | vect | cpu);
176
177 while (*psai & 0x1000000)
178 ;
179
180 *psai = psaival;
181
182 return 0;
183}
184
185static void noop_wait_for_deassert(atomic_t *deassert_not_used)
186{
187}
188
189static int __init es7000_update_genapic(void)
190{
191 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
192
193 /* MPENTIUMIII */
194 if (boot_cpu_data.x86 == 6 &&
195 (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
196 es7000_update_genapic_to_cluster();
197 genapic->wait_for_init_deassert = noop_wait_for_deassert;
198 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
199 }
200
201 return 0;
202}
203
164void __init 204void __init
165setup_unisys(void) 205setup_unisys(void)
166{ 206{
@@ -176,6 +216,8 @@ setup_unisys(void)
176 else 216 else
177 es7000_plat = ES7000_CLASSIC; 217 es7000_plat = ES7000_CLASSIC;
178 ioapic_renumber_irq = es7000_rename_gsi; 218 ioapic_renumber_irq = es7000_rename_gsi;
219
220 x86_quirks->update_genapic = es7000_update_genapic;
179} 221}
180 222
181/* 223/*
@@ -250,31 +292,24 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
250{ 292{
251 struct acpi_table_header *header = NULL; 293 struct acpi_table_header *header = NULL;
252 int i = 0; 294 int i = 0;
253 acpi_size tbl_size;
254 295
255 while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) { 296 while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) {
256 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { 297 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
257 struct oem_table *t = (struct oem_table *)header; 298 struct oem_table *t = (struct oem_table *)header;
258 299
259 oem_addrX = t->OEMTableAddr; 300 oem_addrX = t->OEMTableAddr;
260 oem_size = t->OEMTableSize; 301 oem_size = t->OEMTableSize;
261 early_acpi_os_unmap_memory(header, tbl_size);
262 302
263 *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, 303 *oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
264 oem_size); 304 oem_size);
265 return 0; 305 return 0;
266 } 306 }
267 early_acpi_os_unmap_memory(header, tbl_size);
268 } 307 }
269 return -1; 308 return -1;
270} 309}
271 310
272void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) 311void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
273{ 312{
274 if (!oem_addr)
275 return;
276
277 __acpi_unmap_table((char *)oem_addr, oem_size);
278} 313}
279#endif 314#endif
280 315
@@ -324,26 +359,6 @@ es7000_mip_write(struct mip_reg *mip_reg)
324 return status; 359 return status;
325} 360}
326 361
327int
328es7000_start_cpu(int cpu, unsigned long eip)
329{
330 unsigned long vect = 0, psaival = 0;
331
332 if (psai == NULL)
333 return -1;
334
335 vect = ((unsigned long)__pa(eip)/0x1000) << 16;
336 psaival = (0x1000000 | vect | cpu);
337
338 while (*psai & 0x1000000)
339 ;
340
341 *psai = psaival;
342
343 return 0;
344
345}
346
347void __init 362void __init
348es7000_sw_apic(void) 363es7000_sw_apic(void)
349{ 364{
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9bf..1b43086b097a 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -14,14 +14,17 @@
14#include <linux/uaccess.h> 14#include <linux/uaccess.h>
15#include <linux/ftrace.h> 15#include <linux/ftrace.h>
16#include <linux/percpu.h> 16#include <linux/percpu.h>
17#include <linux/sched.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/list.h> 19#include <linux/list.h>
19 20
20#include <asm/ftrace.h> 21#include <asm/ftrace.h>
22#include <linux/ftrace.h>
21#include <asm/nops.h> 23#include <asm/nops.h>
24#include <asm/nmi.h>
22 25
23 26
24static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; 27#ifdef CONFIG_DYNAMIC_FTRACE
25 28
26union ftrace_code_union { 29union ftrace_code_union {
27 char code[MCOUNT_INSN_SIZE]; 30 char code[MCOUNT_INSN_SIZE];
@@ -31,18 +34,12 @@ union ftrace_code_union {
31 } __attribute__((packed)); 34 } __attribute__((packed));
32}; 35};
33 36
34
35static int ftrace_calc_offset(long ip, long addr) 37static int ftrace_calc_offset(long ip, long addr)
36{ 38{
37 return (int)(addr - ip); 39 return (int)(addr - ip);
38} 40}
39 41
40unsigned char *ftrace_nop_replace(void) 42static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
41{
42 return ftrace_nop;
43}
44
45unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
46{ 43{
47 static union ftrace_code_union calc; 44 static union ftrace_code_union calc;
48 45
@@ -56,7 +53,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
56 return calc.code; 53 return calc.code;
57} 54}
58 55
59int 56/*
57 * Modifying code must take extra care. On an SMP machine, if
58 * the code being modified is also being executed on another CPU
59 * that CPU will have undefined results and possibly take a GPF.
60 * We use kstop_machine to stop other CPUS from exectuing code.
61 * But this does not stop NMIs from happening. We still need
62 * to protect against that. We separate out the modification of
63 * the code to take care of this.
64 *
65 * Two buffers are added: An IP buffer and a "code" buffer.
66 *
67 * 1) Put the instruction pointer into the IP buffer
68 * and the new code into the "code" buffer.
69 * 2) Set a flag that says we are modifying code
70 * 3) Wait for any running NMIs to finish.
71 * 4) Write the code
72 * 5) clear the flag.
73 * 6) Wait for any running NMIs to finish.
74 *
75 * If an NMI is executed, the first thing it does is to call
76 * "ftrace_nmi_enter". This will check if the flag is set to write
77 * and if it is, it will write what is in the IP and "code" buffers.
78 *
79 * The trick is, it does not matter if everyone is writing the same
80 * content to the code location. Also, if a CPU is executing code
81 * it is OK to write to that code location if the contents being written
82 * are the same as what exists.
83 */
84
85static atomic_t in_nmi = ATOMIC_INIT(0);
86static int mod_code_status; /* holds return value of text write */
87static int mod_code_write; /* set when NMI should do the write */
88static void *mod_code_ip; /* holds the IP to write to */
89static void *mod_code_newcode; /* holds the text to write to the IP */
90
91static unsigned nmi_wait_count;
92static atomic_t nmi_update_count = ATOMIC_INIT(0);
93
94int ftrace_arch_read_dyn_info(char *buf, int size)
95{
96 int r;
97
98 r = snprintf(buf, size, "%u %u",
99 nmi_wait_count,
100 atomic_read(&nmi_update_count));
101 return r;
102}
103
104static void ftrace_mod_code(void)
105{
106 /*
107 * Yes, more than one CPU process can be writing to mod_code_status.
108 * (and the code itself)
109 * But if one were to fail, then they all should, and if one were
110 * to succeed, then they all should.
111 */
112 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
113 MCOUNT_INSN_SIZE);
114}
115
116void ftrace_nmi_enter(void)
117{
118 atomic_inc(&in_nmi);
119 /* Must have in_nmi seen before reading write flag */
120 smp_mb();
121 if (mod_code_write) {
122 ftrace_mod_code();
123 atomic_inc(&nmi_update_count);
124 }
125}
126
127void ftrace_nmi_exit(void)
128{
129 /* Finish all executions before clearing in_nmi */
130 smp_wmb();
131 atomic_dec(&in_nmi);
132}
133
134static void wait_for_nmi(void)
135{
136 int waited = 0;
137
138 while (atomic_read(&in_nmi)) {
139 waited = 1;
140 cpu_relax();
141 }
142
143 if (waited)
144 nmi_wait_count++;
145}
146
147static int
148do_ftrace_mod_code(unsigned long ip, void *new_code)
149{
150 mod_code_ip = (void *)ip;
151 mod_code_newcode = new_code;
152
153 /* The buffers need to be visible before we let NMIs write them */
154 smp_wmb();
155
156 mod_code_write = 1;
157
158 /* Make sure write bit is visible before we wait on NMIs */
159 smp_mb();
160
161 wait_for_nmi();
162
163 /* Make sure all running NMIs have finished before we write the code */
164 smp_mb();
165
166 ftrace_mod_code();
167
168 /* Make sure the write happens before clearing the bit */
169 smp_wmb();
170
171 mod_code_write = 0;
172
173 /* make sure NMIs see the cleared bit */
174 smp_mb();
175
176 wait_for_nmi();
177
178 return mod_code_status;
179}
180
181
182
183
184static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
185
186static unsigned char *ftrace_nop_replace(void)
187{
188 return ftrace_nop;
189}
190
191static int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code, 192ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code) 193 unsigned char *new_code)
62{ 194{
@@ -81,7 +213,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
81 return -EINVAL; 213 return -EINVAL;
82 214
83 /* replace the text with the new text */ 215 /* replace the text with the new text */
84 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) 216 if (do_ftrace_mod_code(ip, new_code))
85 return -EPERM; 217 return -EPERM;
86 218
87 sync_core(); 219 sync_core();
@@ -89,6 +221,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
89 return 0; 221 return 0;
90} 222}
91 223
224int ftrace_make_nop(struct module *mod,
225 struct dyn_ftrace *rec, unsigned long addr)
226{
227 unsigned char *new, *old;
228 unsigned long ip = rec->ip;
229
230 old = ftrace_call_replace(ip, addr);
231 new = ftrace_nop_replace();
232
233 return ftrace_modify_code(rec->ip, old, new);
234}
235
236int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
237{
238 unsigned char *new, *old;
239 unsigned long ip = rec->ip;
240
241 old = ftrace_nop_replace();
242 new = ftrace_call_replace(ip, addr);
243
244 return ftrace_modify_code(rec->ip, old, new);
245}
246
92int ftrace_update_ftrace_func(ftrace_func_t func) 247int ftrace_update_ftrace_func(ftrace_func_t func)
93{ 248{
94 unsigned long ip = (unsigned long)(&ftrace_call); 249 unsigned long ip = (unsigned long)(&ftrace_call);
@@ -165,3 +320,218 @@ int __init ftrace_dyn_arch_init(void *data)
165 320
166 return 0; 321 return 0;
167} 322}
323#endif
324
325#ifdef CONFIG_FUNCTION_GRAPH_TRACER
326
327#ifdef CONFIG_DYNAMIC_FTRACE
328extern void ftrace_graph_call(void);
329
330static int ftrace_mod_jmp(unsigned long ip,
331 int old_offset, int new_offset)
332{
333 unsigned char code[MCOUNT_INSN_SIZE];
334
335 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
336 return -EFAULT;
337
338 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
339 return -EINVAL;
340
341 *(int *)(&code[1]) = new_offset;
342
343 if (do_ftrace_mod_code(ip, &code))
344 return -EPERM;
345
346 return 0;
347}
348
349int ftrace_enable_ftrace_graph_caller(void)
350{
351 unsigned long ip = (unsigned long)(&ftrace_graph_call);
352 int old_offset, new_offset;
353
354 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
355 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
356
357 return ftrace_mod_jmp(ip, old_offset, new_offset);
358}
359
360int ftrace_disable_ftrace_graph_caller(void)
361{
362 unsigned long ip = (unsigned long)(&ftrace_graph_call);
363 int old_offset, new_offset;
364
365 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
366 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
367
368 return ftrace_mod_jmp(ip, old_offset, new_offset);
369}
370
371#else /* CONFIG_DYNAMIC_FTRACE */
372
373/*
374 * These functions are picked from those used on
375 * this page for dynamic ftrace. They have been
376 * simplified to ignore all traces in NMI context.
377 */
378static atomic_t in_nmi;
379
380void ftrace_nmi_enter(void)
381{
382 atomic_inc(&in_nmi);
383}
384
385void ftrace_nmi_exit(void)
386{
387 atomic_dec(&in_nmi);
388}
389
390#endif /* !CONFIG_DYNAMIC_FTRACE */
391
392/* Add a function return address to the trace stack on thread info.*/
393static int push_return_trace(unsigned long ret, unsigned long long time,
394 unsigned long func, int *depth)
395{
396 int index;
397
398 if (!current->ret_stack)
399 return -EBUSY;
400
401 /* The return trace stack is full */
402 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
403 atomic_inc(&current->trace_overrun);
404 return -EBUSY;
405 }
406
407 index = ++current->curr_ret_stack;
408 barrier();
409 current->ret_stack[index].ret = ret;
410 current->ret_stack[index].func = func;
411 current->ret_stack[index].calltime = time;
412 *depth = index;
413
414 return 0;
415}
416
417/* Retrieve a function return address to the trace stack on thread info.*/
418static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
419{
420 int index;
421
422 index = current->curr_ret_stack;
423
424 if (unlikely(index < 0)) {
425 ftrace_graph_stop();
426 WARN_ON(1);
427 /* Might as well panic, otherwise we have no where to go */
428 *ret = (unsigned long)panic;
429 return;
430 }
431
432 *ret = current->ret_stack[index].ret;
433 trace->func = current->ret_stack[index].func;
434 trace->calltime = current->ret_stack[index].calltime;
435 trace->overrun = atomic_read(&current->trace_overrun);
436 trace->depth = index;
437 barrier();
438 current->curr_ret_stack--;
439
440}
441
442/*
443 * Send the trace to the ring-buffer.
444 * @return the original return address.
445 */
446unsigned long ftrace_return_to_handler(void)
447{
448 struct ftrace_graph_ret trace;
449 unsigned long ret;
450
451 pop_return_trace(&trace, &ret);
452 trace.rettime = cpu_clock(raw_smp_processor_id());
453 ftrace_graph_return(&trace);
454
455 if (unlikely(!ret)) {
456 ftrace_graph_stop();
457 WARN_ON(1);
458 /* Might as well panic. What else to do? */
459 ret = (unsigned long)panic;
460 }
461
462 return ret;
463}
464
465/*
466 * Hook the return address and push it in the stack of return addrs
467 * in current thread info.
468 */
469void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
470{
471 unsigned long old;
472 unsigned long long calltime;
473 int faulted;
474 struct ftrace_graph_ent trace;
475 unsigned long return_hooker = (unsigned long)
476 &return_to_handler;
477
478 /* Nmi's are currently unsupported */
479 if (unlikely(atomic_read(&in_nmi)))
480 return;
481
482 if (unlikely(atomic_read(&current->tracing_graph_pause)))
483 return;
484
485 /*
486 * Protect against fault, even if it shouldn't
487 * happen. This tool is too much intrusive to
488 * ignore such a protection.
489 */
490 asm volatile(
491 "1: " _ASM_MOV " (%[parent_old]), %[old]\n"
492 "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n"
493 " movl $0, %[faulted]\n"
494
495 ".section .fixup, \"ax\"\n"
496 "3: movl $1, %[faulted]\n"
497 ".previous\n"
498
499 _ASM_EXTABLE(1b, 3b)
500 _ASM_EXTABLE(2b, 3b)
501
502 : [parent_replaced] "=r" (parent), [old] "=r" (old),
503 [faulted] "=r" (faulted)
504 : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
505 : "memory"
506 );
507
508 if (unlikely(faulted)) {
509 ftrace_graph_stop();
510 WARN_ON(1);
511 return;
512 }
513
514 if (unlikely(!__kernel_text_address(old))) {
515 ftrace_graph_stop();
516 *parent = old;
517 WARN_ON(1);
518 return;
519 }
520
521 calltime = cpu_clock(raw_smp_processor_id());
522
523 if (push_return_trace(old, calltime,
524 self_addr, &trace.depth) == -EBUSY) {
525 *parent = old;
526 return;
527 }
528
529 trace.func = self_addr;
530
531 /* Only trace if the calling function expects to */
532 if (!ftrace_graph_entry(&trace)) {
533 current->curr_ret_stack--;
534 *parent = old;
535 }
536}
537#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 6c9bfc9e1e95..2bced78b0b8e 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -21,6 +21,7 @@
21#include <asm/smp.h> 21#include <asm/smp.h>
22#include <asm/ipi.h> 22#include <asm/ipi.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
24#include <asm/setup.h>
24 25
25extern struct genapic apic_flat; 26extern struct genapic apic_flat;
26extern struct genapic apic_physflat; 27extern struct genapic apic_physflat;
@@ -53,6 +54,9 @@ void __init setup_apic_routing(void)
53 genapic = &apic_physflat; 54 genapic = &apic_physflat;
54 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 55 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
55 } 56 }
57
58 if (x86_quirks->update_genapic)
59 x86_quirks->update_genapic();
56} 60}
57 61
58/* Same for both flat and physical. */ 62/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 77017e834cf7..067d8de913f6 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -322,7 +322,7 @@ static int hpet_next_event(unsigned long delta,
322 * what we wrote hit the chip before we compare it to the 322 * what we wrote hit the chip before we compare it to the
323 * counter. 323 * counter.
324 */ 324 */
325 WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt); 325 WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt);
326 326
327 return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; 327 return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
328} 328}
@@ -445,7 +445,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
445{ 445{
446 446
447 if (request_irq(dev->irq, hpet_interrupt_handler, 447 if (request_irq(dev->irq, hpet_interrupt_handler,
448 IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev)) 448 IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev))
449 return -1; 449 return -1;
450 450
451 disable_irq(dev->irq); 451 disable_irq(dev->irq);
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 1f20608d4ca8..b0f61f0dcd0a 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void)
58 stts(); 58 stts();
59} 59}
60 60
61void __init init_thread_xstate(void) 61void __cpuinit init_thread_xstate(void)
62{ 62{
63 if (!HAVE_HWFP) { 63 if (!HAVE_HWFP) {
64 xstate_size = sizeof(struct i387_soft_struct); 64 xstate_size = sizeof(struct i387_soft_struct);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 7a3f2028e2eb..a1a2e070f31a 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -108,8 +108,33 @@ static int __init parse_noapic(char *str)
108early_param("noapic", parse_noapic); 108early_param("noapic", parse_noapic);
109 109
110struct irq_pin_list; 110struct irq_pin_list;
111
112/*
113 * This is performance-critical, we want to do it O(1)
114 *
115 * the indexing order of this array favors 1:1 mappings
116 * between pins and IRQs.
117 */
118
119struct irq_pin_list {
120 int apic, pin;
121 struct irq_pin_list *next;
122};
123
124static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
125{
126 struct irq_pin_list *pin;
127 int node;
128
129 node = cpu_to_node(cpu);
130
131 pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
132 printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node);
133
134 return pin;
135}
136
111struct irq_cfg { 137struct irq_cfg {
112 unsigned int irq;
113 struct irq_pin_list *irq_2_pin; 138 struct irq_pin_list *irq_2_pin;
114 cpumask_t domain; 139 cpumask_t domain;
115 cpumask_t old_domain; 140 cpumask_t old_domain;
@@ -119,81 +144,95 @@ struct irq_cfg {
119}; 144};
120 145
121/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 146/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
147#ifdef CONFIG_SPARSE_IRQ
148static struct irq_cfg irq_cfgx[] = {
149#else
122static struct irq_cfg irq_cfgx[NR_IRQS] = { 150static struct irq_cfg irq_cfgx[NR_IRQS] = {
123 [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, 151#endif
124 [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, 152 [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
125 [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, 153 [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
126 [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, 154 [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
127 [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, 155 [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
128 [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, 156 [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
129 [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, 157 [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
130 [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, 158 [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
131 [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, 159 [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
132 [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, 160 [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
133 [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, 161 [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
134 [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, 162 [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
135 [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, 163 [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
136 [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, 164 [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
137 [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, 165 [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
138 [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, 166 [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
167 [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
139}; 168};
140 169
141#define for_each_irq_cfg(irq, cfg) \ 170void __init arch_early_irq_init(void)
142 for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
143
144static struct irq_cfg *irq_cfg(unsigned int irq)
145{ 171{
146 return irq < nr_irqs ? irq_cfgx + irq : NULL; 172 struct irq_cfg *cfg;
173 struct irq_desc *desc;
174 int count;
175 int i;
176
177 cfg = irq_cfgx;
178 count = ARRAY_SIZE(irq_cfgx);
179
180 for (i = 0; i < count; i++) {
181 desc = irq_to_desc(i);
182 desc->chip_data = &cfg[i];
183 }
147} 184}
148 185
149static struct irq_cfg *irq_cfg_alloc(unsigned int irq) 186#ifdef CONFIG_SPARSE_IRQ
187static struct irq_cfg *irq_cfg(unsigned int irq)
150{ 188{
151 return irq_cfg(irq); 189 struct irq_cfg *cfg = NULL;
190 struct irq_desc *desc;
191
192 desc = irq_to_desc(irq);
193 if (desc)
194 cfg = desc->chip_data;
195
196 return cfg;
152} 197}
153 198
154/* 199static struct irq_cfg *get_one_free_irq_cfg(int cpu)
155 * Rough estimation of how many shared IRQs there are, can be changed 200{
156 * anytime. 201 struct irq_cfg *cfg;
157 */ 202 int node;
158#define MAX_PLUS_SHARED_IRQS NR_IRQS
159#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
160 203
161/* 204 node = cpu_to_node(cpu);
162 * This is performance-critical, we want to do it O(1)
163 *
164 * the indexing order of this array favors 1:1 mappings
165 * between pins and IRQs.
166 */
167 205
168struct irq_pin_list { 206 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
169 int apic, pin; 207 printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node);
170 struct irq_pin_list *next;
171};
172 208
173static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; 209 return cfg;
174static struct irq_pin_list *irq_2_pin_ptr; 210}
175 211
176static void __init irq_2_pin_init(void) 212void arch_init_chip_data(struct irq_desc *desc, int cpu)
177{ 213{
178 struct irq_pin_list *pin = irq_2_pin_head; 214 struct irq_cfg *cfg;
179 int i;
180
181 for (i = 1; i < PIN_MAP_SIZE; i++)
182 pin[i-1].next = &pin[i];
183 215
184 irq_2_pin_ptr = &pin[0]; 216 cfg = desc->chip_data;
217 if (!cfg) {
218 desc->chip_data = get_one_free_irq_cfg(cpu);
219 if (!desc->chip_data) {
220 printk(KERN_ERR "can not alloc irq_cfg\n");
221 BUG_ON(1);
222 }
223 }
185} 224}
186 225
187static struct irq_pin_list *get_one_free_irq_2_pin(void) 226#else
227static struct irq_cfg *irq_cfg(unsigned int irq)
188{ 228{
189 struct irq_pin_list *pin = irq_2_pin_ptr; 229 return irq < nr_irqs ? irq_cfgx + irq : NULL;
230}
190 231
191 if (!pin) 232#endif
192 panic("can not get more irq_2_pin\n");
193 233
194 irq_2_pin_ptr = pin->next; 234static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
195 pin->next = NULL; 235{
196 return pin;
197} 236}
198 237
199struct io_apic { 238struct io_apic {
@@ -237,11 +276,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
237 writel(value, &io_apic->data); 276 writel(value, &io_apic->data);
238} 277}
239 278
240static bool io_apic_level_ack_pending(unsigned int irq) 279static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
241{ 280{
242 struct irq_pin_list *entry; 281 struct irq_pin_list *entry;
243 unsigned long flags; 282 unsigned long flags;
244 struct irq_cfg *cfg = irq_cfg(irq);
245 283
246 spin_lock_irqsave(&ioapic_lock, flags); 284 spin_lock_irqsave(&ioapic_lock, flags);
247 entry = cfg->irq_2_pin; 285 entry = cfg->irq_2_pin;
@@ -323,13 +361,12 @@ static void ioapic_mask_entry(int apic, int pin)
323} 361}
324 362
325#ifdef CONFIG_SMP 363#ifdef CONFIG_SMP
326static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) 364static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
327{ 365{
328 int apic, pin; 366 int apic, pin;
329 struct irq_cfg *cfg;
330 struct irq_pin_list *entry; 367 struct irq_pin_list *entry;
368 u8 vector = cfg->vector;
331 369
332 cfg = irq_cfg(irq);
333 entry = cfg->irq_2_pin; 370 entry = cfg->irq_2_pin;
334 for (;;) { 371 for (;;) {
335 unsigned int reg; 372 unsigned int reg;
@@ -359,24 +396,27 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
359 } 396 }
360} 397}
361 398
362static int assign_irq_vector(int irq, cpumask_t mask); 399static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
363 400
364static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 401static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
365{ 402{
366 struct irq_cfg *cfg; 403 struct irq_cfg *cfg;
367 unsigned long flags; 404 unsigned long flags;
368 unsigned int dest; 405 unsigned int dest;
369 cpumask_t tmp; 406 cpumask_t tmp;
370 struct irq_desc *desc; 407 unsigned int irq;
371 408
372 cpus_and(tmp, mask, cpu_online_map); 409 cpus_and(tmp, mask, cpu_online_map);
373 if (cpus_empty(tmp)) 410 if (cpus_empty(tmp))
374 return; 411 return;
375 412
376 cfg = irq_cfg(irq); 413 irq = desc->irq;
377 if (assign_irq_vector(irq, mask)) 414 cfg = desc->chip_data;
415 if (assign_irq_vector(irq, cfg, mask))
378 return; 416 return;
379 417
418 set_extra_move_desc(desc, mask);
419
380 cpus_and(tmp, cfg->domain, mask); 420 cpus_and(tmp, cfg->domain, mask);
381 dest = cpu_mask_to_apicid(tmp); 421 dest = cpu_mask_to_apicid(tmp);
382 /* 422 /*
@@ -384,12 +424,20 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
384 */ 424 */
385 dest = SET_APIC_LOGICAL_ID(dest); 425 dest = SET_APIC_LOGICAL_ID(dest);
386 426
387 desc = irq_to_desc(irq);
388 spin_lock_irqsave(&ioapic_lock, flags); 427 spin_lock_irqsave(&ioapic_lock, flags);
389 __target_IO_APIC_irq(irq, dest, cfg->vector); 428 __target_IO_APIC_irq(irq, dest, cfg);
390 desc->affinity = mask; 429 desc->affinity = mask;
391 spin_unlock_irqrestore(&ioapic_lock, flags); 430 spin_unlock_irqrestore(&ioapic_lock, flags);
392} 431}
432
433static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
434{
435 struct irq_desc *desc;
436
437 desc = irq_to_desc(irq);
438
439 set_ioapic_affinity_irq_desc(desc, mask);
440}
393#endif /* CONFIG_SMP */ 441#endif /* CONFIG_SMP */
394 442
395/* 443/*
@@ -397,16 +445,18 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
397 * shared ISA-space IRQs, so we have to support them. We are super 445 * shared ISA-space IRQs, so we have to support them. We are super
398 * fast in the common case, and fast for shared ISA-space IRQs. 446 * fast in the common case, and fast for shared ISA-space IRQs.
399 */ 447 */
400static void add_pin_to_irq(unsigned int irq, int apic, int pin) 448static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
401{ 449{
402 struct irq_cfg *cfg;
403 struct irq_pin_list *entry; 450 struct irq_pin_list *entry;
404 451
405 /* first time to refer irq_cfg, so with new */
406 cfg = irq_cfg_alloc(irq);
407 entry = cfg->irq_2_pin; 452 entry = cfg->irq_2_pin;
408 if (!entry) { 453 if (!entry) {
409 entry = get_one_free_irq_2_pin(); 454 entry = get_one_free_irq_2_pin(cpu);
455 if (!entry) {
456 printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
457 apic, pin);
458 return;
459 }
410 cfg->irq_2_pin = entry; 460 cfg->irq_2_pin = entry;
411 entry->apic = apic; 461 entry->apic = apic;
412 entry->pin = pin; 462 entry->pin = pin;
@@ -421,7 +471,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
421 entry = entry->next; 471 entry = entry->next;
422 } 472 }
423 473
424 entry->next = get_one_free_irq_2_pin(); 474 entry->next = get_one_free_irq_2_pin(cpu);
425 entry = entry->next; 475 entry = entry->next;
426 entry->apic = apic; 476 entry->apic = apic;
427 entry->pin = pin; 477 entry->pin = pin;
@@ -430,11 +480,10 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
430/* 480/*
431 * Reroute an IRQ to a different pin. 481 * Reroute an IRQ to a different pin.
432 */ 482 */
433static void __init replace_pin_at_irq(unsigned int irq, 483static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
434 int oldapic, int oldpin, 484 int oldapic, int oldpin,
435 int newapic, int newpin) 485 int newapic, int newpin)
436{ 486{
437 struct irq_cfg *cfg = irq_cfg(irq);
438 struct irq_pin_list *entry = cfg->irq_2_pin; 487 struct irq_pin_list *entry = cfg->irq_2_pin;
439 int replaced = 0; 488 int replaced = 0;
440 489
@@ -451,18 +500,16 @@ static void __init replace_pin_at_irq(unsigned int irq,
451 500
452 /* why? call replace before add? */ 501 /* why? call replace before add? */
453 if (!replaced) 502 if (!replaced)
454 add_pin_to_irq(irq, newapic, newpin); 503 add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
455} 504}
456 505
457static inline void io_apic_modify_irq(unsigned int irq, 506static inline void io_apic_modify_irq(struct irq_cfg *cfg,
458 int mask_and, int mask_or, 507 int mask_and, int mask_or,
459 void (*final)(struct irq_pin_list *entry)) 508 void (*final)(struct irq_pin_list *entry))
460{ 509{
461 int pin; 510 int pin;
462 struct irq_cfg *cfg;
463 struct irq_pin_list *entry; 511 struct irq_pin_list *entry;
464 512
465 cfg = irq_cfg(irq);
466 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { 513 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
467 unsigned int reg; 514 unsigned int reg;
468 pin = entry->pin; 515 pin = entry->pin;
@@ -475,9 +522,9 @@ static inline void io_apic_modify_irq(unsigned int irq,
475 } 522 }
476} 523}
477 524
478static void __unmask_IO_APIC_irq(unsigned int irq) 525static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
479{ 526{
480 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); 527 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
481} 528}
482 529
483#ifdef CONFIG_X86_64 530#ifdef CONFIG_X86_64
@@ -492,47 +539,64 @@ void io_apic_sync(struct irq_pin_list *entry)
492 readl(&io_apic->data); 539 readl(&io_apic->data);
493} 540}
494 541
495static void __mask_IO_APIC_irq(unsigned int irq) 542static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
496{ 543{
497 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); 544 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
498} 545}
499#else /* CONFIG_X86_32 */ 546#else /* CONFIG_X86_32 */
500static void __mask_IO_APIC_irq(unsigned int irq) 547static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
501{ 548{
502 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); 549 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
503} 550}
504 551
505static void __mask_and_edge_IO_APIC_irq(unsigned int irq) 552static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
506{ 553{
507 io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, 554 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
508 IO_APIC_REDIR_MASKED, NULL); 555 IO_APIC_REDIR_MASKED, NULL);
509} 556}
510 557
511static void __unmask_and_level_IO_APIC_irq(unsigned int irq) 558static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
512{ 559{
513 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 560 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
514 IO_APIC_REDIR_LEVEL_TRIGGER, NULL); 561 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
515} 562}
516#endif /* CONFIG_X86_32 */ 563#endif /* CONFIG_X86_32 */
517 564
518static void mask_IO_APIC_irq (unsigned int irq) 565static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
519{ 566{
567 struct irq_cfg *cfg = desc->chip_data;
520 unsigned long flags; 568 unsigned long flags;
521 569
570 BUG_ON(!cfg);
571
522 spin_lock_irqsave(&ioapic_lock, flags); 572 spin_lock_irqsave(&ioapic_lock, flags);
523 __mask_IO_APIC_irq(irq); 573 __mask_IO_APIC_irq(cfg);
524 spin_unlock_irqrestore(&ioapic_lock, flags); 574 spin_unlock_irqrestore(&ioapic_lock, flags);
525} 575}
526 576
527static void unmask_IO_APIC_irq (unsigned int irq) 577static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
528{ 578{
579 struct irq_cfg *cfg = desc->chip_data;
529 unsigned long flags; 580 unsigned long flags;
530 581
531 spin_lock_irqsave(&ioapic_lock, flags); 582 spin_lock_irqsave(&ioapic_lock, flags);
532 __unmask_IO_APIC_irq(irq); 583 __unmask_IO_APIC_irq(cfg);
533 spin_unlock_irqrestore(&ioapic_lock, flags); 584 spin_unlock_irqrestore(&ioapic_lock, flags);
534} 585}
535 586
587static void mask_IO_APIC_irq(unsigned int irq)
588{
589 struct irq_desc *desc = irq_to_desc(irq);
590
591 mask_IO_APIC_irq_desc(desc);
592}
593static void unmask_IO_APIC_irq(unsigned int irq)
594{
595 struct irq_desc *desc = irq_to_desc(irq);
596
597 unmask_IO_APIC_irq_desc(desc);
598}
599
536static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 600static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
537{ 601{
538 struct IO_APIC_route_entry entry; 602 struct IO_APIC_route_entry entry;
@@ -809,7 +873,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
809 */ 873 */
810static int EISA_ELCR(unsigned int irq) 874static int EISA_ELCR(unsigned int irq)
811{ 875{
812 if (irq < 16) { 876 if (irq < NR_IRQS_LEGACY) {
813 unsigned int port = 0x4d0 + (irq >> 3); 877 unsigned int port = 0x4d0 + (irq >> 3);
814 return (inb(port) >> (irq & 7)) & 1; 878 return (inb(port) >> (irq & 7)) & 1;
815 } 879 }
@@ -1034,7 +1098,7 @@ void unlock_vector_lock(void)
1034 spin_unlock(&vector_lock); 1098 spin_unlock(&vector_lock);
1035} 1099}
1036 1100
1037static int __assign_irq_vector(int irq, cpumask_t mask) 1101static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
1038{ 1102{
1039 /* 1103 /*
1040 * NOTE! The local APIC isn't very good at handling 1104 * NOTE! The local APIC isn't very good at handling
@@ -1050,16 +1114,13 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
1050 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 1114 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
1051 unsigned int old_vector; 1115 unsigned int old_vector;
1052 int cpu; 1116 int cpu;
1053 struct irq_cfg *cfg;
1054 1117
1055 cfg = irq_cfg(irq); 1118 if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1119 return -EBUSY;
1056 1120
1057 /* Only try and allocate irqs on cpus that are present */ 1121 /* Only try and allocate irqs on cpus that are present */
1058 cpus_and(mask, mask, cpu_online_map); 1122 cpus_and(mask, mask, cpu_online_map);
1059 1123
1060 if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1061 return -EBUSY;
1062
1063 old_vector = cfg->vector; 1124 old_vector = cfg->vector;
1064 if (old_vector) { 1125 if (old_vector) {
1065 cpumask_t tmp; 1126 cpumask_t tmp;
@@ -1113,24 +1174,22 @@ next:
1113 return -ENOSPC; 1174 return -ENOSPC;
1114} 1175}
1115 1176
1116static int assign_irq_vector(int irq, cpumask_t mask) 1177static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
1117{ 1178{
1118 int err; 1179 int err;
1119 unsigned long flags; 1180 unsigned long flags;
1120 1181
1121 spin_lock_irqsave(&vector_lock, flags); 1182 spin_lock_irqsave(&vector_lock, flags);
1122 err = __assign_irq_vector(irq, mask); 1183 err = __assign_irq_vector(irq, cfg, mask);
1123 spin_unlock_irqrestore(&vector_lock, flags); 1184 spin_unlock_irqrestore(&vector_lock, flags);
1124 return err; 1185 return err;
1125} 1186}
1126 1187
1127static void __clear_irq_vector(int irq) 1188static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1128{ 1189{
1129 struct irq_cfg *cfg;
1130 cpumask_t mask; 1190 cpumask_t mask;
1131 int cpu, vector; 1191 int cpu, vector;
1132 1192
1133 cfg = irq_cfg(irq);
1134 BUG_ON(!cfg->vector); 1193 BUG_ON(!cfg->vector);
1135 1194
1136 vector = cfg->vector; 1195 vector = cfg->vector;
@@ -1140,6 +1199,20 @@ static void __clear_irq_vector(int irq)
1140 1199
1141 cfg->vector = 0; 1200 cfg->vector = 0;
1142 cpus_clear(cfg->domain); 1201 cpus_clear(cfg->domain);
1202
1203 if (likely(!cfg->move_in_progress))
1204 return;
1205 cpus_and(mask, cfg->old_domain, cpu_online_map);
1206 for_each_cpu_mask_nr(cpu, mask) {
1207 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1208 vector++) {
1209 if (per_cpu(vector_irq, cpu)[vector] != irq)
1210 continue;
1211 per_cpu(vector_irq, cpu)[vector] = -1;
1212 break;
1213 }
1214 }
1215 cfg->move_in_progress = 0;
1143} 1216}
1144 1217
1145void __setup_vector_irq(int cpu) 1218void __setup_vector_irq(int cpu)
@@ -1148,9 +1221,13 @@ void __setup_vector_irq(int cpu)
1148 /* This function must be called with vector_lock held */ 1221 /* This function must be called with vector_lock held */
1149 int irq, vector; 1222 int irq, vector;
1150 struct irq_cfg *cfg; 1223 struct irq_cfg *cfg;
1224 struct irq_desc *desc;
1151 1225
1152 /* Mark the inuse vectors */ 1226 /* Mark the inuse vectors */
1153 for_each_irq_cfg(irq, cfg) { 1227 for_each_irq_desc(irq, desc) {
1228 if (!desc)
1229 continue;
1230 cfg = desc->chip_data;
1154 if (!cpu_isset(cpu, cfg->domain)) 1231 if (!cpu_isset(cpu, cfg->domain))
1155 continue; 1232 continue;
1156 vector = cfg->vector; 1233 vector = cfg->vector;
@@ -1201,11 +1278,8 @@ static inline int IO_APIC_irq_trigger(int irq)
1201} 1278}
1202#endif 1279#endif
1203 1280
1204static void ioapic_register_intr(int irq, unsigned long trigger) 1281static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
1205{ 1282{
1206 struct irq_desc *desc;
1207
1208 desc = irq_to_desc(irq);
1209 1283
1210 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1284 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1211 trigger == IOAPIC_LEVEL) 1285 trigger == IOAPIC_LEVEL)
@@ -1297,7 +1371,7 @@ static int setup_ioapic_entry(int apic, int irq,
1297 return 0; 1371 return 0;
1298} 1372}
1299 1373
1300static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1374static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
1301 int trigger, int polarity) 1375 int trigger, int polarity)
1302{ 1376{
1303 struct irq_cfg *cfg; 1377 struct irq_cfg *cfg;
@@ -1307,10 +1381,10 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1307 if (!IO_APIC_IRQ(irq)) 1381 if (!IO_APIC_IRQ(irq))
1308 return; 1382 return;
1309 1383
1310 cfg = irq_cfg(irq); 1384 cfg = desc->chip_data;
1311 1385
1312 mask = TARGET_CPUS; 1386 mask = TARGET_CPUS;
1313 if (assign_irq_vector(irq, mask)) 1387 if (assign_irq_vector(irq, cfg, mask))
1314 return; 1388 return;
1315 1389
1316 cpus_and(mask, cfg->domain, mask); 1390 cpus_and(mask, cfg->domain, mask);
@@ -1327,12 +1401,12 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1327 cfg->vector)) { 1401 cfg->vector)) {
1328 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1402 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1329 mp_ioapics[apic].mp_apicid, pin); 1403 mp_ioapics[apic].mp_apicid, pin);
1330 __clear_irq_vector(irq); 1404 __clear_irq_vector(irq, cfg);
1331 return; 1405 return;
1332 } 1406 }
1333 1407
1334 ioapic_register_intr(irq, trigger); 1408 ioapic_register_intr(irq, desc, trigger);
1335 if (irq < 16) 1409 if (irq < NR_IRQS_LEGACY)
1336 disable_8259A_irq(irq); 1410 disable_8259A_irq(irq);
1337 1411
1338 ioapic_write_entry(apic, pin, entry); 1412 ioapic_write_entry(apic, pin, entry);
@@ -1342,6 +1416,9 @@ static void __init setup_IO_APIC_irqs(void)
1342{ 1416{
1343 int apic, pin, idx, irq; 1417 int apic, pin, idx, irq;
1344 int notcon = 0; 1418 int notcon = 0;
1419 struct irq_desc *desc;
1420 struct irq_cfg *cfg;
1421 int cpu = boot_cpu_id;
1345 1422
1346 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1423 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1347 1424
@@ -1373,9 +1450,15 @@ static void __init setup_IO_APIC_irqs(void)
1373 if (multi_timer_check(apic, irq)) 1450 if (multi_timer_check(apic, irq))
1374 continue; 1451 continue;
1375#endif 1452#endif
1376 add_pin_to_irq(irq, apic, pin); 1453 desc = irq_to_desc_alloc_cpu(irq, cpu);
1454 if (!desc) {
1455 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1456 continue;
1457 }
1458 cfg = desc->chip_data;
1459 add_pin_to_irq_cpu(cfg, cpu, apic, pin);
1377 1460
1378 setup_IO_APIC_irq(apic, pin, irq, 1461 setup_IO_APIC_irq(apic, pin, irq, desc,
1379 irq_trigger(idx), irq_polarity(idx)); 1462 irq_trigger(idx), irq_polarity(idx));
1380 } 1463 }
1381 } 1464 }
@@ -1434,6 +1517,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1434 union IO_APIC_reg_03 reg_03; 1517 union IO_APIC_reg_03 reg_03;
1435 unsigned long flags; 1518 unsigned long flags;
1436 struct irq_cfg *cfg; 1519 struct irq_cfg *cfg;
1520 struct irq_desc *desc;
1437 unsigned int irq; 1521 unsigned int irq;
1438 1522
1439 if (apic_verbosity == APIC_QUIET) 1523 if (apic_verbosity == APIC_QUIET)
@@ -1523,8 +1607,13 @@ __apicdebuginit(void) print_IO_APIC(void)
1523 } 1607 }
1524 } 1608 }
1525 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1609 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1526 for_each_irq_cfg(irq, cfg) { 1610 for_each_irq_desc(irq, desc) {
1527 struct irq_pin_list *entry = cfg->irq_2_pin; 1611 struct irq_pin_list *entry;
1612
1613 if (!desc)
1614 continue;
1615 cfg = desc->chip_data;
1616 entry = cfg->irq_2_pin;
1528 if (!entry) 1617 if (!entry)
1529 continue; 1618 continue;
1530 printk(KERN_DEBUG "IRQ%d ", irq); 1619 printk(KERN_DEBUG "IRQ%d ", irq);
@@ -2008,14 +2097,16 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2008{ 2097{
2009 int was_pending = 0; 2098 int was_pending = 0;
2010 unsigned long flags; 2099 unsigned long flags;
2100 struct irq_cfg *cfg;
2011 2101
2012 spin_lock_irqsave(&ioapic_lock, flags); 2102 spin_lock_irqsave(&ioapic_lock, flags);
2013 if (irq < 16) { 2103 if (irq < NR_IRQS_LEGACY) {
2014 disable_8259A_irq(irq); 2104 disable_8259A_irq(irq);
2015 if (i8259A_irq_pending(irq)) 2105 if (i8259A_irq_pending(irq))
2016 was_pending = 1; 2106 was_pending = 1;
2017 } 2107 }
2018 __unmask_IO_APIC_irq(irq); 2108 cfg = irq_cfg(irq);
2109 __unmask_IO_APIC_irq(cfg);
2019 spin_unlock_irqrestore(&ioapic_lock, flags); 2110 spin_unlock_irqrestore(&ioapic_lock, flags);
2020 2111
2021 return was_pending; 2112 return was_pending;
@@ -2078,35 +2169,37 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
2078 * as simple as edge triggered migration and we can do the irq migration 2169 * as simple as edge triggered migration and we can do the irq migration
2079 * with a simple atomic update to IO-APIC RTE. 2170 * with a simple atomic update to IO-APIC RTE.
2080 */ 2171 */
2081static void migrate_ioapic_irq(int irq, cpumask_t mask) 2172static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
2082{ 2173{
2083 struct irq_cfg *cfg; 2174 struct irq_cfg *cfg;
2084 struct irq_desc *desc;
2085 cpumask_t tmp, cleanup_mask; 2175 cpumask_t tmp, cleanup_mask;
2086 struct irte irte; 2176 struct irte irte;
2087 int modify_ioapic_rte; 2177 int modify_ioapic_rte;
2088 unsigned int dest; 2178 unsigned int dest;
2089 unsigned long flags; 2179 unsigned long flags;
2180 unsigned int irq;
2090 2181
2091 cpus_and(tmp, mask, cpu_online_map); 2182 cpus_and(tmp, mask, cpu_online_map);
2092 if (cpus_empty(tmp)) 2183 if (cpus_empty(tmp))
2093 return; 2184 return;
2094 2185
2186 irq = desc->irq;
2095 if (get_irte(irq, &irte)) 2187 if (get_irte(irq, &irte))
2096 return; 2188 return;
2097 2189
2098 if (assign_irq_vector(irq, mask)) 2190 cfg = desc->chip_data;
2191 if (assign_irq_vector(irq, cfg, mask))
2099 return; 2192 return;
2100 2193
2101 cfg = irq_cfg(irq); 2194 set_extra_move_desc(desc, mask);
2195
2102 cpus_and(tmp, cfg->domain, mask); 2196 cpus_and(tmp, cfg->domain, mask);
2103 dest = cpu_mask_to_apicid(tmp); 2197 dest = cpu_mask_to_apicid(tmp);
2104 2198
2105 desc = irq_to_desc(irq);
2106 modify_ioapic_rte = desc->status & IRQ_LEVEL; 2199 modify_ioapic_rte = desc->status & IRQ_LEVEL;
2107 if (modify_ioapic_rte) { 2200 if (modify_ioapic_rte) {
2108 spin_lock_irqsave(&ioapic_lock, flags); 2201 spin_lock_irqsave(&ioapic_lock, flags);
2109 __target_IO_APIC_irq(irq, dest, cfg->vector); 2202 __target_IO_APIC_irq(irq, dest, cfg);
2110 spin_unlock_irqrestore(&ioapic_lock, flags); 2203 spin_unlock_irqrestore(&ioapic_lock, flags);
2111 } 2204 }
2112 2205
@@ -2128,14 +2221,14 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
2128 desc->affinity = mask; 2221 desc->affinity = mask;
2129} 2222}
2130 2223
2131static int migrate_irq_remapped_level(int irq) 2224static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2132{ 2225{
2133 int ret = -1; 2226 int ret = -1;
2134 struct irq_desc *desc = irq_to_desc(irq); 2227 struct irq_cfg *cfg = desc->chip_data;
2135 2228
2136 mask_IO_APIC_irq(irq); 2229 mask_IO_APIC_irq_desc(desc);
2137 2230
2138 if (io_apic_level_ack_pending(irq)) { 2231 if (io_apic_level_ack_pending(cfg)) {
2139 /* 2232 /*
2140 * Interrupt in progress. Migrating irq now will change the 2233 * Interrupt in progress. Migrating irq now will change the
2141 * vector information in the IO-APIC RTE and that will confuse 2234 * vector information in the IO-APIC RTE and that will confuse
@@ -2147,14 +2240,15 @@ static int migrate_irq_remapped_level(int irq)
2147 } 2240 }
2148 2241
2149 /* everthing is clear. we have right of way */ 2242 /* everthing is clear. we have right of way */
2150 migrate_ioapic_irq(irq, desc->pending_mask); 2243 migrate_ioapic_irq_desc(desc, desc->pending_mask);
2151 2244
2152 ret = 0; 2245 ret = 0;
2153 desc->status &= ~IRQ_MOVE_PENDING; 2246 desc->status &= ~IRQ_MOVE_PENDING;
2154 cpus_clear(desc->pending_mask); 2247 cpus_clear(desc->pending_mask);
2155 2248
2156unmask: 2249unmask:
2157 unmask_IO_APIC_irq(irq); 2250 unmask_IO_APIC_irq_desc(desc);
2251
2158 return ret; 2252 return ret;
2159} 2253}
2160 2254
@@ -2164,6 +2258,9 @@ static void ir_irq_migration(struct work_struct *work)
2164 struct irq_desc *desc; 2258 struct irq_desc *desc;
2165 2259
2166 for_each_irq_desc(irq, desc) { 2260 for_each_irq_desc(irq, desc) {
2261 if (!desc)
2262 continue;
2263
2167 if (desc->status & IRQ_MOVE_PENDING) { 2264 if (desc->status & IRQ_MOVE_PENDING) {
2168 unsigned long flags; 2265 unsigned long flags;
2169 2266
@@ -2184,18 +2281,22 @@ static void ir_irq_migration(struct work_struct *work)
2184/* 2281/*
2185 * Migrates the IRQ destination in the process context. 2282 * Migrates the IRQ destination in the process context.
2186 */ 2283 */
2187static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 2284static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
2188{ 2285{
2189 struct irq_desc *desc = irq_to_desc(irq);
2190
2191 if (desc->status & IRQ_LEVEL) { 2286 if (desc->status & IRQ_LEVEL) {
2192 desc->status |= IRQ_MOVE_PENDING; 2287 desc->status |= IRQ_MOVE_PENDING;
2193 desc->pending_mask = mask; 2288 desc->pending_mask = mask;
2194 migrate_irq_remapped_level(irq); 2289 migrate_irq_remapped_level_desc(desc);
2195 return; 2290 return;
2196 } 2291 }
2197 2292
2198 migrate_ioapic_irq(irq, mask); 2293 migrate_ioapic_irq_desc(desc, mask);
2294}
2295static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
2296{
2297 struct irq_desc *desc = irq_to_desc(irq);
2298
2299 set_ir_ioapic_affinity_irq_desc(desc, mask);
2199} 2300}
2200#endif 2301#endif
2201 2302
@@ -2215,6 +2316,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2215 struct irq_cfg *cfg; 2316 struct irq_cfg *cfg;
2216 irq = __get_cpu_var(vector_irq)[vector]; 2317 irq = __get_cpu_var(vector_irq)[vector];
2217 2318
2319 if (irq == -1)
2320 continue;
2321
2218 desc = irq_to_desc(irq); 2322 desc = irq_to_desc(irq);
2219 if (!desc) 2323 if (!desc)
2220 continue; 2324 continue;
@@ -2236,9 +2340,10 @@ unlock:
2236 irq_exit(); 2340 irq_exit();
2237} 2341}
2238 2342
2239static void irq_complete_move(unsigned int irq) 2343static void irq_complete_move(struct irq_desc **descp)
2240{ 2344{
2241 struct irq_cfg *cfg = irq_cfg(irq); 2345 struct irq_desc *desc = *descp;
2346 struct irq_cfg *cfg = desc->chip_data;
2242 unsigned vector, me; 2347 unsigned vector, me;
2243 2348
2244 if (likely(!cfg->move_in_progress)) 2349 if (likely(!cfg->move_in_progress))
@@ -2256,8 +2361,9 @@ static void irq_complete_move(unsigned int irq)
2256 } 2361 }
2257} 2362}
2258#else 2363#else
2259static inline void irq_complete_move(unsigned int irq) {} 2364static inline void irq_complete_move(struct irq_desc **descp) {}
2260#endif 2365#endif
2366
2261#ifdef CONFIG_INTR_REMAP 2367#ifdef CONFIG_INTR_REMAP
2262static void ack_x2apic_level(unsigned int irq) 2368static void ack_x2apic_level(unsigned int irq)
2263{ 2369{
@@ -2268,11 +2374,14 @@ static void ack_x2apic_edge(unsigned int irq)
2268{ 2374{
2269 ack_x2APIC_irq(); 2375 ack_x2APIC_irq();
2270} 2376}
2377
2271#endif 2378#endif
2272 2379
2273static void ack_apic_edge(unsigned int irq) 2380static void ack_apic_edge(unsigned int irq)
2274{ 2381{
2275 irq_complete_move(irq); 2382 struct irq_desc *desc = irq_to_desc(irq);
2383
2384 irq_complete_move(&desc);
2276 move_native_irq(irq); 2385 move_native_irq(irq);
2277 ack_APIC_irq(); 2386 ack_APIC_irq();
2278} 2387}
@@ -2281,18 +2390,21 @@ atomic_t irq_mis_count;
2281 2390
2282static void ack_apic_level(unsigned int irq) 2391static void ack_apic_level(unsigned int irq)
2283{ 2392{
2393 struct irq_desc *desc = irq_to_desc(irq);
2394
2284#ifdef CONFIG_X86_32 2395#ifdef CONFIG_X86_32
2285 unsigned long v; 2396 unsigned long v;
2286 int i; 2397 int i;
2287#endif 2398#endif
2399 struct irq_cfg *cfg;
2288 int do_unmask_irq = 0; 2400 int do_unmask_irq = 0;
2289 2401
2290 irq_complete_move(irq); 2402 irq_complete_move(&desc);
2291#ifdef CONFIG_GENERIC_PENDING_IRQ 2403#ifdef CONFIG_GENERIC_PENDING_IRQ
2292 /* If we are moving the irq we need to mask it */ 2404 /* If we are moving the irq we need to mask it */
2293 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { 2405 if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
2294 do_unmask_irq = 1; 2406 do_unmask_irq = 1;
2295 mask_IO_APIC_irq(irq); 2407 mask_IO_APIC_irq_desc(desc);
2296 } 2408 }
2297#endif 2409#endif
2298 2410
@@ -2316,7 +2428,8 @@ static void ack_apic_level(unsigned int irq)
2316 * operation to prevent an edge-triggered interrupt escaping meanwhile. 2428 * operation to prevent an edge-triggered interrupt escaping meanwhile.
2317 * The idea is from Manfred Spraul. --macro 2429 * The idea is from Manfred Spraul. --macro
2318 */ 2430 */
2319 i = irq_cfg(irq)->vector; 2431 cfg = desc->chip_data;
2432 i = cfg->vector;
2320 2433
2321 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); 2434 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2322#endif 2435#endif
@@ -2355,17 +2468,18 @@ static void ack_apic_level(unsigned int irq)
2355 * accurate and is causing problems then it is a hardware bug 2468 * accurate and is causing problems then it is a hardware bug
2356 * and you can go talk to the chipset vendor about it. 2469 * and you can go talk to the chipset vendor about it.
2357 */ 2470 */
2358 if (!io_apic_level_ack_pending(irq)) 2471 cfg = desc->chip_data;
2472 if (!io_apic_level_ack_pending(cfg))
2359 move_masked_irq(irq); 2473 move_masked_irq(irq);
2360 unmask_IO_APIC_irq(irq); 2474 unmask_IO_APIC_irq_desc(desc);
2361 } 2475 }
2362 2476
2363#ifdef CONFIG_X86_32 2477#ifdef CONFIG_X86_32
2364 if (!(v & (1 << (i & 0x1f)))) { 2478 if (!(v & (1 << (i & 0x1f)))) {
2365 atomic_inc(&irq_mis_count); 2479 atomic_inc(&irq_mis_count);
2366 spin_lock(&ioapic_lock); 2480 spin_lock(&ioapic_lock);
2367 __mask_and_edge_IO_APIC_irq(irq); 2481 __mask_and_edge_IO_APIC_irq(cfg);
2368 __unmask_and_level_IO_APIC_irq(irq); 2482 __unmask_and_level_IO_APIC_irq(cfg);
2369 spin_unlock(&ioapic_lock); 2483 spin_unlock(&ioapic_lock);
2370 } 2484 }
2371#endif 2485#endif
@@ -2416,20 +2530,22 @@ static inline void init_IO_APIC_traps(void)
2416 * Also, we've got to be careful not to trash gate 2530 * Also, we've got to be careful not to trash gate
2417 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2531 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2418 */ 2532 */
2419 for_each_irq_cfg(irq, cfg) { 2533 for_each_irq_desc(irq, desc) {
2420 if (IO_APIC_IRQ(irq) && !cfg->vector) { 2534 if (!desc)
2535 continue;
2536
2537 cfg = desc->chip_data;
2538 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2421 /* 2539 /*
2422 * Hmm.. We don't have an entry for this, 2540 * Hmm.. We don't have an entry for this,
2423 * so default to an old-fashioned 8259 2541 * so default to an old-fashioned 8259
2424 * interrupt if we can.. 2542 * interrupt if we can..
2425 */ 2543 */
2426 if (irq < 16) 2544 if (irq < NR_IRQS_LEGACY)
2427 make_8259A_irq(irq); 2545 make_8259A_irq(irq);
2428 else { 2546 else
2429 desc = irq_to_desc(irq);
2430 /* Strange. Oh, well.. */ 2547 /* Strange. Oh, well.. */
2431 desc->chip = &no_irq_chip; 2548 desc->chip = &no_irq_chip;
2432 }
2433 } 2549 }
2434 } 2550 }
2435} 2551}
@@ -2454,7 +2570,7 @@ static void unmask_lapic_irq(unsigned int irq)
2454 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 2570 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2455} 2571}
2456 2572
2457static void ack_lapic_irq (unsigned int irq) 2573static void ack_lapic_irq(unsigned int irq)
2458{ 2574{
2459 ack_APIC_irq(); 2575 ack_APIC_irq();
2460} 2576}
@@ -2466,11 +2582,8 @@ static struct irq_chip lapic_chip __read_mostly = {
2466 .ack = ack_lapic_irq, 2582 .ack = ack_lapic_irq,
2467}; 2583};
2468 2584
2469static void lapic_register_intr(int irq) 2585static void lapic_register_intr(int irq, struct irq_desc *desc)
2470{ 2586{
2471 struct irq_desc *desc;
2472
2473 desc = irq_to_desc(irq);
2474 desc->status &= ~IRQ_LEVEL; 2587 desc->status &= ~IRQ_LEVEL;
2475 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2588 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2476 "edge"); 2589 "edge");
@@ -2574,7 +2687,9 @@ int timer_through_8259 __initdata;
2574 */ 2687 */
2575static inline void __init check_timer(void) 2688static inline void __init check_timer(void)
2576{ 2689{
2577 struct irq_cfg *cfg = irq_cfg(0); 2690 struct irq_desc *desc = irq_to_desc(0);
2691 struct irq_cfg *cfg = desc->chip_data;
2692 int cpu = boot_cpu_id;
2578 int apic1, pin1, apic2, pin2; 2693 int apic1, pin1, apic2, pin2;
2579 unsigned long flags; 2694 unsigned long flags;
2580 unsigned int ver; 2695 unsigned int ver;
@@ -2589,7 +2704,7 @@ static inline void __init check_timer(void)
2589 * get/set the timer IRQ vector: 2704 * get/set the timer IRQ vector:
2590 */ 2705 */
2591 disable_8259A_irq(0); 2706 disable_8259A_irq(0);
2592 assign_irq_vector(0, TARGET_CPUS); 2707 assign_irq_vector(0, cfg, TARGET_CPUS);
2593 2708
2594 /* 2709 /*
2595 * As IRQ0 is to be enabled in the 8259A, the virtual 2710 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2640,10 +2755,10 @@ static inline void __init check_timer(void)
2640 * Ok, does IRQ0 through the IOAPIC work? 2755 * Ok, does IRQ0 through the IOAPIC work?
2641 */ 2756 */
2642 if (no_pin1) { 2757 if (no_pin1) {
2643 add_pin_to_irq(0, apic1, pin1); 2758 add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
2644 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2759 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2645 } 2760 }
2646 unmask_IO_APIC_irq(0); 2761 unmask_IO_APIC_irq_desc(desc);
2647 if (timer_irq_works()) { 2762 if (timer_irq_works()) {
2648 if (nmi_watchdog == NMI_IO_APIC) { 2763 if (nmi_watchdog == NMI_IO_APIC) {
2649 setup_nmi(); 2764 setup_nmi();
@@ -2669,9 +2784,9 @@ static inline void __init check_timer(void)
2669 /* 2784 /*
2670 * legacy devices should be connected to IO APIC #0 2785 * legacy devices should be connected to IO APIC #0
2671 */ 2786 */
2672 replace_pin_at_irq(0, apic1, pin1, apic2, pin2); 2787 replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
2673 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 2788 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2674 unmask_IO_APIC_irq(0); 2789 unmask_IO_APIC_irq_desc(desc);
2675 enable_8259A_irq(0); 2790 enable_8259A_irq(0);
2676 if (timer_irq_works()) { 2791 if (timer_irq_works()) {
2677 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2792 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2703,7 +2818,7 @@ static inline void __init check_timer(void)
2703 apic_printk(APIC_QUIET, KERN_INFO 2818 apic_printk(APIC_QUIET, KERN_INFO
2704 "...trying to set up timer as Virtual Wire IRQ...\n"); 2819 "...trying to set up timer as Virtual Wire IRQ...\n");
2705 2820
2706 lapic_register_intr(0); 2821 lapic_register_intr(0, desc);
2707 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 2822 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
2708 enable_8259A_irq(0); 2823 enable_8259A_irq(0);
2709 2824
@@ -2888,22 +3003,26 @@ unsigned int create_irq_nr(unsigned int irq_want)
2888 unsigned int irq; 3003 unsigned int irq;
2889 unsigned int new; 3004 unsigned int new;
2890 unsigned long flags; 3005 unsigned long flags;
2891 struct irq_cfg *cfg_new; 3006 struct irq_cfg *cfg_new = NULL;
2892 3007 int cpu = boot_cpu_id;
2893 irq_want = nr_irqs - 1; 3008 struct irq_desc *desc_new = NULL;
2894 3009
2895 irq = 0; 3010 irq = 0;
2896 spin_lock_irqsave(&vector_lock, flags); 3011 spin_lock_irqsave(&vector_lock, flags);
2897 for (new = irq_want; new > 0; new--) { 3012 for (new = irq_want; new < NR_IRQS; new++) {
2898 if (platform_legacy_irq(new)) 3013 if (platform_legacy_irq(new))
2899 continue; 3014 continue;
2900 cfg_new = irq_cfg(new); 3015
2901 if (cfg_new && cfg_new->vector != 0) 3016 desc_new = irq_to_desc_alloc_cpu(new, cpu);
3017 if (!desc_new) {
3018 printk(KERN_INFO "can not get irq_desc for %d\n", new);
2902 continue; 3019 continue;
2903 /* check if need to create one */ 3020 }
2904 if (!cfg_new) 3021 cfg_new = desc_new->chip_data;
2905 cfg_new = irq_cfg_alloc(new); 3022
2906 if (__assign_irq_vector(new, TARGET_CPUS) == 0) 3023 if (cfg_new->vector != 0)
3024 continue;
3025 if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
2907 irq = new; 3026 irq = new;
2908 break; 3027 break;
2909 } 3028 }
@@ -2911,15 +3030,21 @@ unsigned int create_irq_nr(unsigned int irq_want)
2911 3030
2912 if (irq > 0) { 3031 if (irq > 0) {
2913 dynamic_irq_init(irq); 3032 dynamic_irq_init(irq);
3033 /* restore it, in case dynamic_irq_init clear it */
3034 if (desc_new)
3035 desc_new->chip_data = cfg_new;
2914 } 3036 }
2915 return irq; 3037 return irq;
2916} 3038}
2917 3039
3040static int nr_irqs_gsi = NR_IRQS_LEGACY;
2918int create_irq(void) 3041int create_irq(void)
2919{ 3042{
3043 unsigned int irq_want;
2920 int irq; 3044 int irq;
2921 3045
2922 irq = create_irq_nr(nr_irqs - 1); 3046 irq_want = nr_irqs_gsi;
3047 irq = create_irq_nr(irq_want);
2923 3048
2924 if (irq == 0) 3049 if (irq == 0)
2925 irq = -1; 3050 irq = -1;
@@ -2930,14 +3055,22 @@ int create_irq(void)
2930void destroy_irq(unsigned int irq) 3055void destroy_irq(unsigned int irq)
2931{ 3056{
2932 unsigned long flags; 3057 unsigned long flags;
3058 struct irq_cfg *cfg;
3059 struct irq_desc *desc;
2933 3060
3061 /* store it, in case dynamic_irq_cleanup clear it */
3062 desc = irq_to_desc(irq);
3063 cfg = desc->chip_data;
2934 dynamic_irq_cleanup(irq); 3064 dynamic_irq_cleanup(irq);
3065 /* connect back irq_cfg */
3066 if (desc)
3067 desc->chip_data = cfg;
2935 3068
2936#ifdef CONFIG_INTR_REMAP 3069#ifdef CONFIG_INTR_REMAP
2937 free_irte(irq); 3070 free_irte(irq);
2938#endif 3071#endif
2939 spin_lock_irqsave(&vector_lock, flags); 3072 spin_lock_irqsave(&vector_lock, flags);
2940 __clear_irq_vector(irq); 3073 __clear_irq_vector(irq, cfg);
2941 spin_unlock_irqrestore(&vector_lock, flags); 3074 spin_unlock_irqrestore(&vector_lock, flags);
2942} 3075}
2943 3076
@@ -2952,12 +3085,12 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2952 unsigned dest; 3085 unsigned dest;
2953 cpumask_t tmp; 3086 cpumask_t tmp;
2954 3087
3088 cfg = irq_cfg(irq);
2955 tmp = TARGET_CPUS; 3089 tmp = TARGET_CPUS;
2956 err = assign_irq_vector(irq, tmp); 3090 err = assign_irq_vector(irq, cfg, tmp);
2957 if (err) 3091 if (err)
2958 return err; 3092 return err;
2959 3093
2960 cfg = irq_cfg(irq);
2961 cpus_and(tmp, cfg->domain, tmp); 3094 cpus_and(tmp, cfg->domain, tmp);
2962 dest = cpu_mask_to_apicid(tmp); 3095 dest = cpu_mask_to_apicid(tmp);
2963 3096
@@ -3015,35 +3148,35 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3015#ifdef CONFIG_SMP 3148#ifdef CONFIG_SMP
3016static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3149static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3017{ 3150{
3151 struct irq_desc *desc = irq_to_desc(irq);
3018 struct irq_cfg *cfg; 3152 struct irq_cfg *cfg;
3019 struct msi_msg msg; 3153 struct msi_msg msg;
3020 unsigned int dest; 3154 unsigned int dest;
3021 cpumask_t tmp; 3155 cpumask_t tmp;
3022 struct irq_desc *desc;
3023 3156
3024 cpus_and(tmp, mask, cpu_online_map); 3157 cpus_and(tmp, mask, cpu_online_map);
3025 if (cpus_empty(tmp)) 3158 if (cpus_empty(tmp))
3026 return; 3159 return;
3027 3160
3028 if (assign_irq_vector(irq, mask)) 3161 cfg = desc->chip_data;
3162 if (assign_irq_vector(irq, cfg, mask))
3029 return; 3163 return;
3030 3164
3031 cfg = irq_cfg(irq); 3165 set_extra_move_desc(desc, mask);
3166
3032 cpus_and(tmp, cfg->domain, mask); 3167 cpus_and(tmp, cfg->domain, mask);
3033 dest = cpu_mask_to_apicid(tmp); 3168 dest = cpu_mask_to_apicid(tmp);
3034 3169
3035 read_msi_msg(irq, &msg); 3170 read_msi_msg_desc(desc, &msg);
3036 3171
3037 msg.data &= ~MSI_DATA_VECTOR_MASK; 3172 msg.data &= ~MSI_DATA_VECTOR_MASK;
3038 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3173 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3039 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3174 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3040 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3175 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3041 3176
3042 write_msi_msg(irq, &msg); 3177 write_msi_msg_desc(desc, &msg);
3043 desc = irq_to_desc(irq);
3044 desc->affinity = mask; 3178 desc->affinity = mask;
3045} 3179}
3046
3047#ifdef CONFIG_INTR_REMAP 3180#ifdef CONFIG_INTR_REMAP
3048/* 3181/*
3049 * Migrate the MSI irq to another cpumask. This migration is 3182 * Migrate the MSI irq to another cpumask. This migration is
@@ -3051,11 +3184,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3051 */ 3184 */
3052static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3185static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3053{ 3186{
3187 struct irq_desc *desc = irq_to_desc(irq);
3054 struct irq_cfg *cfg; 3188 struct irq_cfg *cfg;
3055 unsigned int dest; 3189 unsigned int dest;
3056 cpumask_t tmp, cleanup_mask; 3190 cpumask_t tmp, cleanup_mask;
3057 struct irte irte; 3191 struct irte irte;
3058 struct irq_desc *desc;
3059 3192
3060 cpus_and(tmp, mask, cpu_online_map); 3193 cpus_and(tmp, mask, cpu_online_map);
3061 if (cpus_empty(tmp)) 3194 if (cpus_empty(tmp))
@@ -3064,10 +3197,12 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3064 if (get_irte(irq, &irte)) 3197 if (get_irte(irq, &irte))
3065 return; 3198 return;
3066 3199
3067 if (assign_irq_vector(irq, mask)) 3200 cfg = desc->chip_data;
3201 if (assign_irq_vector(irq, cfg, mask))
3068 return; 3202 return;
3069 3203
3070 cfg = irq_cfg(irq); 3204 set_extra_move_desc(desc, mask);
3205
3071 cpus_and(tmp, cfg->domain, mask); 3206 cpus_and(tmp, cfg->domain, mask);
3072 dest = cpu_mask_to_apicid(tmp); 3207 dest = cpu_mask_to_apicid(tmp);
3073 3208
@@ -3091,9 +3226,9 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
3091 cfg->move_in_progress = 0; 3226 cfg->move_in_progress = 0;
3092 } 3227 }
3093 3228
3094 desc = irq_to_desc(irq);
3095 desc->affinity = mask; 3229 desc->affinity = mask;
3096} 3230}
3231
3097#endif 3232#endif
3098#endif /* CONFIG_SMP */ 3233#endif /* CONFIG_SMP */
3099 3234
@@ -3152,7 +3287,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3152} 3287}
3153#endif 3288#endif
3154 3289
3155static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) 3290static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3156{ 3291{
3157 int ret; 3292 int ret;
3158 struct msi_msg msg; 3293 struct msi_msg msg;
@@ -3161,7 +3296,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
3161 if (ret < 0) 3296 if (ret < 0)
3162 return ret; 3297 return ret;
3163 3298
3164 set_irq_msi(irq, desc); 3299 set_irq_msi(irq, msidesc);
3165 write_msi_msg(irq, &msg); 3300 write_msi_msg(irq, &msg);
3166 3301
3167#ifdef CONFIG_INTR_REMAP 3302#ifdef CONFIG_INTR_REMAP
@@ -3181,26 +3316,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
3181 return 0; 3316 return 0;
3182} 3317}
3183 3318
3184static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) 3319int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
3185{
3186 unsigned int irq;
3187
3188 irq = dev->bus->number;
3189 irq <<= 8;
3190 irq |= dev->devfn;
3191 irq <<= 12;
3192
3193 return irq;
3194}
3195
3196int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
3197{ 3320{
3198 unsigned int irq; 3321 unsigned int irq;
3199 int ret; 3322 int ret;
3200 unsigned int irq_want; 3323 unsigned int irq_want;
3201 3324
3202 irq_want = build_irq_for_pci_dev(dev) + 0x100; 3325 irq_want = nr_irqs_gsi;
3203
3204 irq = create_irq_nr(irq_want); 3326 irq = create_irq_nr(irq_want);
3205 if (irq == 0) 3327 if (irq == 0)
3206 return -1; 3328 return -1;
@@ -3214,7 +3336,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
3214 goto error; 3336 goto error;
3215no_ir: 3337no_ir:
3216#endif 3338#endif
3217 ret = setup_msi_irq(dev, desc, irq); 3339 ret = setup_msi_irq(dev, msidesc, irq);
3218 if (ret < 0) { 3340 if (ret < 0) {
3219 destroy_irq(irq); 3341 destroy_irq(irq);
3220 return ret; 3342 return ret;
@@ -3232,7 +3354,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3232{ 3354{
3233 unsigned int irq; 3355 unsigned int irq;
3234 int ret, sub_handle; 3356 int ret, sub_handle;
3235 struct msi_desc *desc; 3357 struct msi_desc *msidesc;
3236 unsigned int irq_want; 3358 unsigned int irq_want;
3237 3359
3238#ifdef CONFIG_INTR_REMAP 3360#ifdef CONFIG_INTR_REMAP
@@ -3240,10 +3362,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3240 int index = 0; 3362 int index = 0;
3241#endif 3363#endif
3242 3364
3243 irq_want = build_irq_for_pci_dev(dev) + 0x100; 3365 irq_want = nr_irqs_gsi;
3244 sub_handle = 0; 3366 sub_handle = 0;
3245 list_for_each_entry(desc, &dev->msi_list, list) { 3367 list_for_each_entry(msidesc, &dev->msi_list, list) {
3246 irq = create_irq_nr(irq_want--); 3368 irq = create_irq_nr(irq_want);
3369 irq_want++;
3247 if (irq == 0) 3370 if (irq == 0)
3248 return -1; 3371 return -1;
3249#ifdef CONFIG_INTR_REMAP 3372#ifdef CONFIG_INTR_REMAP
@@ -3275,7 +3398,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3275 } 3398 }
3276no_ir: 3399no_ir:
3277#endif 3400#endif
3278 ret = setup_msi_irq(dev, desc, irq); 3401 ret = setup_msi_irq(dev, msidesc, irq);
3279 if (ret < 0) 3402 if (ret < 0)
3280 goto error; 3403 goto error;
3281 sub_handle++; 3404 sub_handle++;
@@ -3296,20 +3419,22 @@ void arch_teardown_msi_irq(unsigned int irq)
3296#ifdef CONFIG_SMP 3419#ifdef CONFIG_SMP
3297static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) 3420static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
3298{ 3421{
3422 struct irq_desc *desc = irq_to_desc(irq);
3299 struct irq_cfg *cfg; 3423 struct irq_cfg *cfg;
3300 struct msi_msg msg; 3424 struct msi_msg msg;
3301 unsigned int dest; 3425 unsigned int dest;
3302 cpumask_t tmp; 3426 cpumask_t tmp;
3303 struct irq_desc *desc;
3304 3427
3305 cpus_and(tmp, mask, cpu_online_map); 3428 cpus_and(tmp, mask, cpu_online_map);
3306 if (cpus_empty(tmp)) 3429 if (cpus_empty(tmp))
3307 return; 3430 return;
3308 3431
3309 if (assign_irq_vector(irq, mask)) 3432 cfg = desc->chip_data;
3433 if (assign_irq_vector(irq, cfg, mask))
3310 return; 3434 return;
3311 3435
3312 cfg = irq_cfg(irq); 3436 set_extra_move_desc(desc, mask);
3437
3313 cpus_and(tmp, cfg->domain, mask); 3438 cpus_and(tmp, cfg->domain, mask);
3314 dest = cpu_mask_to_apicid(tmp); 3439 dest = cpu_mask_to_apicid(tmp);
3315 3440
@@ -3321,9 +3446,9 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
3321 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3446 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3322 3447
3323 dmar_msi_write(irq, &msg); 3448 dmar_msi_write(irq, &msg);
3324 desc = irq_to_desc(irq);
3325 desc->affinity = mask; 3449 desc->affinity = mask;
3326} 3450}
3451
3327#endif /* CONFIG_SMP */ 3452#endif /* CONFIG_SMP */
3328 3453
3329struct irq_chip dmar_msi_type = { 3454struct irq_chip dmar_msi_type = {
@@ -3357,8 +3482,8 @@ int arch_setup_dmar_msi(unsigned int irq)
3357#ifdef CONFIG_SMP 3482#ifdef CONFIG_SMP
3358static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) 3483static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
3359{ 3484{
3485 struct irq_desc *desc = irq_to_desc(irq);
3360 struct irq_cfg *cfg; 3486 struct irq_cfg *cfg;
3361 struct irq_desc *desc;
3362 struct msi_msg msg; 3487 struct msi_msg msg;
3363 unsigned int dest; 3488 unsigned int dest;
3364 cpumask_t tmp; 3489 cpumask_t tmp;
@@ -3367,10 +3492,12 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
3367 if (cpus_empty(tmp)) 3492 if (cpus_empty(tmp))
3368 return; 3493 return;
3369 3494
3370 if (assign_irq_vector(irq, mask)) 3495 cfg = desc->chip_data;
3496 if (assign_irq_vector(irq, cfg, mask))
3371 return; 3497 return;
3372 3498
3373 cfg = irq_cfg(irq); 3499 set_extra_move_desc(desc, mask);
3500
3374 cpus_and(tmp, cfg->domain, mask); 3501 cpus_and(tmp, cfg->domain, mask);
3375 dest = cpu_mask_to_apicid(tmp); 3502 dest = cpu_mask_to_apicid(tmp);
3376 3503
@@ -3382,9 +3509,9 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
3382 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3509 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3383 3510
3384 hpet_msi_write(irq, &msg); 3511 hpet_msi_write(irq, &msg);
3385 desc = irq_to_desc(irq);
3386 desc->affinity = mask; 3512 desc->affinity = mask;
3387} 3513}
3514
3388#endif /* CONFIG_SMP */ 3515#endif /* CONFIG_SMP */
3389 3516
3390struct irq_chip hpet_msi_type = { 3517struct irq_chip hpet_msi_type = {
@@ -3439,26 +3566,28 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3439 3566
3440static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) 3567static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
3441{ 3568{
3569 struct irq_desc *desc = irq_to_desc(irq);
3442 struct irq_cfg *cfg; 3570 struct irq_cfg *cfg;
3443 unsigned int dest; 3571 unsigned int dest;
3444 cpumask_t tmp; 3572 cpumask_t tmp;
3445 struct irq_desc *desc;
3446 3573
3447 cpus_and(tmp, mask, cpu_online_map); 3574 cpus_and(tmp, mask, cpu_online_map);
3448 if (cpus_empty(tmp)) 3575 if (cpus_empty(tmp))
3449 return; 3576 return;
3450 3577
3451 if (assign_irq_vector(irq, mask)) 3578 cfg = desc->chip_data;
3579 if (assign_irq_vector(irq, cfg, mask))
3452 return; 3580 return;
3453 3581
3454 cfg = irq_cfg(irq); 3582 set_extra_move_desc(desc, mask);
3583
3455 cpus_and(tmp, cfg->domain, mask); 3584 cpus_and(tmp, cfg->domain, mask);
3456 dest = cpu_mask_to_apicid(tmp); 3585 dest = cpu_mask_to_apicid(tmp);
3457 3586
3458 target_ht_irq(irq, dest, cfg->vector); 3587 target_ht_irq(irq, dest, cfg->vector);
3459 desc = irq_to_desc(irq);
3460 desc->affinity = mask; 3588 desc->affinity = mask;
3461} 3589}
3590
3462#endif 3591#endif
3463 3592
3464static struct irq_chip ht_irq_chip = { 3593static struct irq_chip ht_irq_chip = {
@@ -3478,13 +3607,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3478 int err; 3607 int err;
3479 cpumask_t tmp; 3608 cpumask_t tmp;
3480 3609
3610 cfg = irq_cfg(irq);
3481 tmp = TARGET_CPUS; 3611 tmp = TARGET_CPUS;
3482 err = assign_irq_vector(irq, tmp); 3612 err = assign_irq_vector(irq, cfg, tmp);
3483 if (!err) { 3613 if (!err) {
3484 struct ht_irq_msg msg; 3614 struct ht_irq_msg msg;
3485 unsigned dest; 3615 unsigned dest;
3486 3616
3487 cfg = irq_cfg(irq);
3488 cpus_and(tmp, cfg->domain, tmp); 3617 cpus_and(tmp, cfg->domain, tmp);
3489 dest = cpu_mask_to_apicid(tmp); 3618 dest = cpu_mask_to_apicid(tmp);
3490 3619
@@ -3530,7 +3659,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3530 unsigned long flags; 3659 unsigned long flags;
3531 int err; 3660 int err;
3532 3661
3533 err = assign_irq_vector(irq, *eligible_cpu); 3662 cfg = irq_cfg(irq);
3663
3664 err = assign_irq_vector(irq, cfg, *eligible_cpu);
3534 if (err != 0) 3665 if (err != 0)
3535 return err; 3666 return err;
3536 3667
@@ -3539,8 +3670,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3539 irq_name); 3670 irq_name);
3540 spin_unlock_irqrestore(&vector_lock, flags); 3671 spin_unlock_irqrestore(&vector_lock, flags);
3541 3672
3542 cfg = irq_cfg(irq);
3543
3544 mmr_value = 0; 3673 mmr_value = 0;
3545 entry = (struct uv_IO_APIC_route_entry *)&mmr_value; 3674 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3546 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); 3675 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
@@ -3592,29 +3721,16 @@ int __init io_apic_get_redir_entries (int ioapic)
3592 return reg_01.bits.entries; 3721 return reg_01.bits.entries;
3593} 3722}
3594 3723
3595int __init probe_nr_irqs(void) 3724void __init probe_nr_irqs_gsi(void)
3596{ 3725{
3597 int idx; 3726 int idx;
3598 int nr = 0; 3727 int nr = 0;
3599#ifndef CONFIG_XEN
3600 int nr_min = 32;
3601#else
3602 int nr_min = NR_IRQS;
3603#endif
3604 3728
3605 for (idx = 0; idx < nr_ioapics; idx++) 3729 for (idx = 0; idx < nr_ioapics; idx++)
3606 nr += io_apic_get_redir_entries(idx) + 1; 3730 nr += io_apic_get_redir_entries(idx) + 1;
3607 3731
3608 /* double it for hotplug and msi and nmi */ 3732 if (nr > nr_irqs_gsi)
3609 nr <<= 1; 3733 nr_irqs_gsi = nr;
3610
3611 /* something wrong ? */
3612 if (nr < nr_min)
3613 nr = nr_min;
3614 if (WARN_ON(nr > NR_IRQS))
3615 nr = NR_IRQS;
3616
3617 return nr;
3618} 3734}
3619 3735
3620/* -------------------------------------------------------------------------- 3736/* --------------------------------------------------------------------------
@@ -3713,19 +3829,31 @@ int __init io_apic_get_version(int ioapic)
3713 3829
3714int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) 3830int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
3715{ 3831{
3832 struct irq_desc *desc;
3833 struct irq_cfg *cfg;
3834 int cpu = boot_cpu_id;
3835
3716 if (!IO_APIC_IRQ(irq)) { 3836 if (!IO_APIC_IRQ(irq)) {
3717 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", 3837 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3718 ioapic); 3838 ioapic);
3719 return -EINVAL; 3839 return -EINVAL;
3720 } 3840 }
3721 3841
3842 desc = irq_to_desc_alloc_cpu(irq, cpu);
3843 if (!desc) {
3844 printk(KERN_INFO "can not get irq_desc %d\n", irq);
3845 return 0;
3846 }
3847
3722 /* 3848 /*
3723 * IRQs < 16 are already in the irq_2_pin[] map 3849 * IRQs < 16 are already in the irq_2_pin[] map
3724 */ 3850 */
3725 if (irq >= 16) 3851 if (irq >= NR_IRQS_LEGACY) {
3726 add_pin_to_irq(irq, ioapic, pin); 3852 cfg = desc->chip_data;
3853 add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
3854 }
3727 3855
3728 setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); 3856 setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
3729 3857
3730 return 0; 3858 return 0;
3731} 3859}
@@ -3761,7 +3889,9 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
3761void __init setup_ioapic_dest(void) 3889void __init setup_ioapic_dest(void)
3762{ 3890{
3763 int pin, ioapic, irq, irq_entry; 3891 int pin, ioapic, irq, irq_entry;
3892 struct irq_desc *desc;
3764 struct irq_cfg *cfg; 3893 struct irq_cfg *cfg;
3894 cpumask_t mask;
3765 3895
3766 if (skip_ioapic_setup == 1) 3896 if (skip_ioapic_setup == 1)
3767 return; 3897 return;
@@ -3777,17 +3907,31 @@ void __init setup_ioapic_dest(void)
3777 * when you have too many devices, because at that time only boot 3907 * when you have too many devices, because at that time only boot
3778 * cpu is online. 3908 * cpu is online.
3779 */ 3909 */
3780 cfg = irq_cfg(irq); 3910 desc = irq_to_desc(irq);
3781 if (!cfg->vector) 3911 cfg = desc->chip_data;
3782 setup_IO_APIC_irq(ioapic, pin, irq, 3912 if (!cfg->vector) {
3913 setup_IO_APIC_irq(ioapic, pin, irq, desc,
3783 irq_trigger(irq_entry), 3914 irq_trigger(irq_entry),
3784 irq_polarity(irq_entry)); 3915 irq_polarity(irq_entry));
3916 continue;
3917
3918 }
3919
3920 /*
3921 * Honour affinities which have been set in early boot
3922 */
3923 if (desc->status &
3924 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
3925 mask = desc->affinity;
3926 else
3927 mask = TARGET_CPUS;
3928
3785#ifdef CONFIG_INTR_REMAP 3929#ifdef CONFIG_INTR_REMAP
3786 else if (intr_remapping_enabled) 3930 if (intr_remapping_enabled)
3787 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); 3931 set_ir_ioapic_affinity_irq_desc(desc, mask);
3788#endif
3789 else 3932 else
3790 set_ioapic_affinity_irq(irq, TARGET_CPUS); 3933#endif
3934 set_ioapic_affinity_irq_desc(desc, mask);
3791 } 3935 }
3792 3936
3793 } 3937 }
@@ -3836,7 +3980,6 @@ void __init ioapic_init_mappings(void)
3836 struct resource *ioapic_res; 3980 struct resource *ioapic_res;
3837 int i; 3981 int i;
3838 3982
3839 irq_2_pin_init();
3840 ioapic_res = ioapic_setup_resources(); 3983 ioapic_res = ioapic_setup_resources();
3841 for (i = 0; i < nr_ioapics; i++) { 3984 for (i = 0; i < nr_ioapics; i++) {
3842 if (smp_found_config) { 3985 if (smp_found_config) {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d1d4dc52f649..3f1d9d18df67 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -118,6 +118,9 @@ int show_interrupts(struct seq_file *p, void *v)
118 } 118 }
119 119
120 desc = irq_to_desc(i); 120 desc = irq_to_desc(i);
121 if (!desc)
122 return 0;
123
121 spin_lock_irqsave(&desc->lock, flags); 124 spin_lock_irqsave(&desc->lock, flags);
122#ifndef CONFIG_SMP 125#ifndef CONFIG_SMP
123 any_count = kstat_irqs(i); 126 any_count = kstat_irqs(i);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a51382672de0..119fc9c8ff7f 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -242,6 +242,8 @@ void fixup_irqs(cpumask_t map)
242 for_each_irq_desc(irq, desc) { 242 for_each_irq_desc(irq, desc) {
243 cpumask_t mask; 243 cpumask_t mask;
244 244
245 if (!desc)
246 continue;
245 if (irq == 2) 247 if (irq == 2)
246 continue; 248 continue;
247 249
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a0..900009c70591 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -94,6 +94,8 @@ void fixup_irqs(cpumask_t map)
94 int break_affinity = 0; 94 int break_affinity = 0;
95 int set_affinity = 1; 95 int set_affinity = 1;
96 96
97 if (!desc)
98 continue;
97 if (irq == 2) 99 if (irq == 2)
98 continue; 100 continue;
99 101
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e80..6a92f47c52e7 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -68,8 +68,7 @@ void __init init_ISA_irqs (void)
68 /* 68 /*
69 * 16 old-style INTA-cycle interrupts: 69 * 16 old-style INTA-cycle interrupts:
70 */ 70 */
71 for (i = 0; i < 16; i++) { 71 for (i = 0; i < NR_IRQS_LEGACY; i++) {
72 /* first time call this irq_desc */
73 struct irq_desc *desc = irq_to_desc(i); 72 struct irq_desc *desc = irq_to_desc(i);
74 73
75 desc->status = IRQ_DISABLED; 74 desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff0235391285..40c1e62ec785 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,8 +142,7 @@ void __init init_ISA_irqs(void)
142 init_bsp_APIC(); 142 init_bsp_APIC();
143 init_8259A(0); 143 init_8259A(0);
144 144
145 for (i = 0; i < 16; i++) { 145 for (i = 0; i < NR_IRQS_LEGACY; i++) {
146 /* first time call this irq_desc */
147 struct irq_desc *desc = irq_to_desc(i); 146 struct irq_desc *desc = irq_to_desc(i);
148 147
149 desc->status = IRQ_DISABLED; 148 desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 774ac4991568..e169ae9b6a62 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt)
128} 128}
129 129
130#ifdef CONFIG_X86_LOCAL_APIC 130#ifdef CONFIG_X86_LOCAL_APIC
131static void kvm_setup_secondary_clock(void) 131static void __cpuinit kvm_setup_secondary_clock(void)
132{ 132{
133 /* 133 /*
134 * Now that the first cpu already had this clocksource initialized, 134 * Now that the first cpu already had this clocksource initialized,
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index f98f4e1dba09..45e3b69808ba 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -586,23 +586,23 @@ static void __init __get_smp_config(unsigned int early)
586{ 586{
587 struct intel_mp_floating *mpf = mpf_found; 587 struct intel_mp_floating *mpf = mpf_found;
588 588
589 if (x86_quirks->mach_get_smp_config) { 589 if (!mpf)
590 if (x86_quirks->mach_get_smp_config(early)) 590 return;
591 return; 591
592 }
593 if (acpi_lapic && early) 592 if (acpi_lapic && early)
594 return; 593 return;
594
595 /* 595 /*
596 * ACPI supports both logical (e.g. Hyper-Threading) and physical 596 * MPS doesn't support hyperthreading, aka only have
597 * processors, where MPS only supports physical. 597 * thread 0 apic id in MPS table
598 */ 598 */
599 if (acpi_lapic && acpi_ioapic) { 599 if (acpi_lapic && acpi_ioapic)
600 printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
601 "information\n");
602 return; 600 return;
603 } else if (acpi_lapic) 601
604 printk(KERN_INFO "Using ACPI for processor (LAPIC) " 602 if (x86_quirks->mach_get_smp_config) {
605 "configuration information\n"); 603 if (x86_quirks->mach_get_smp_config(early))
604 return;
605 }
606 606
607 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 607 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
608 mpf->mpf_specification); 608 mpf->mpf_specification);
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 4caff39078e0..0deea37a53cf 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -31,7 +31,7 @@
31#include <asm/numaq.h> 31#include <asm/numaq.h>
32#include <asm/topology.h> 32#include <asm/topology.h>
33#include <asm/processor.h> 33#include <asm/processor.h>
34#include <asm/mpspec.h> 34#include <asm/genapic.h>
35#include <asm/e820.h> 35#include <asm/e820.h>
36#include <asm/setup.h> 36#include <asm/setup.h>
37 37
@@ -235,6 +235,13 @@ static int __init numaq_setup_ioapic_ids(void)
235 return 1; 235 return 1;
236} 236}
237 237
238static int __init numaq_update_genapic(void)
239{
240 genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
241
242 return 0;
243}
244
238static struct x86_quirks numaq_x86_quirks __initdata = { 245static struct x86_quirks numaq_x86_quirks __initdata = {
239 .arch_pre_time_init = numaq_pre_time_init, 246 .arch_pre_time_init = numaq_pre_time_init,
240 .arch_time_init = NULL, 247 .arch_time_init = NULL,
@@ -250,6 +257,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
250 .mpc_oem_pci_bus = mpc_oem_pci_bus, 257 .mpc_oem_pci_bus = mpc_oem_pci_bus,
251 .smp_read_mpc_oem = smp_read_mpc_oem, 258 .smp_read_mpc_oem = smp_read_mpc_oem,
252 .setup_ioapic_ids = numaq_setup_ioapic_ids, 259 .setup_ioapic_ids = numaq_setup_ioapic_ids,
260 .update_genapic = numaq_update_genapic,
253}; 261};
254 262
255void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, 263void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 0e9f1982b1dd..95777b0faa73 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -7,7 +7,8 @@
7 7
8#include <asm/paravirt.h> 8#include <asm/paravirt.h>
9 9
10static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) 10static inline void
11default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
11{ 12{
12 __raw_spin_lock(lock); 13 __raw_spin_lock(lock);
13} 14}
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index e1e731d78f38..d28bbdc35e4e 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p)
1567 ++p; 1567 ++p;
1568 if (*p == '\0') 1568 if (*p == '\0')
1569 break; 1569 break;
1570 bridge = simple_strtol(p, &endp, 0); 1570 bridge = simple_strtoul(p, &endp, 0);
1571 if (p == endp) 1571 if (p == endp)
1572 break; 1572 break;
1573 1573
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a42b02b4df68..ba7ad83e20a8 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size)
123 123
124 spin_lock_irqsave(&iommu_bitmap_lock, flags); 124 spin_lock_irqsave(&iommu_bitmap_lock, flags);
125 iommu_area_free(iommu_gart_bitmap, offset, size); 125 iommu_area_free(iommu_gart_bitmap, offset, size);
126 if (offset >= next_bit)
127 next_bit = offset + size;
126 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 128 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
127} 129}
128 130
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index a4da7c4b3129..95d811a9594f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,7 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/pm.h> 8#include <linux/pm.h>
9#include <linux/clockchips.h> 9#include <linux/clockchips.h>
10#include <linux/ftrace.h>
10#include <asm/system.h> 11#include <asm/system.h>
11#include <asm/apic.h> 12#include <asm/apic.h>
12 13
@@ -101,6 +102,9 @@ static inline int hlt_use_halt(void)
101void default_idle(void) 102void default_idle(void)
102{ 103{
103 if (hlt_use_halt()) { 104 if (hlt_use_halt()) {
105 struct power_trace it;
106
107 trace_power_start(&it, POWER_CSTATE, 1);
104 current_thread_info()->status &= ~TS_POLLING; 108 current_thread_info()->status &= ~TS_POLLING;
105 /* 109 /*
106 * TS_POLLING-cleared state must be visible before we 110 * TS_POLLING-cleared state must be visible before we
@@ -113,6 +117,7 @@ void default_idle(void)
113 else 117 else
114 local_irq_enable(); 118 local_irq_enable();
115 current_thread_info()->status |= TS_POLLING; 119 current_thread_info()->status |= TS_POLLING;
120 trace_power_end(&it);
116 } else { 121 } else {
117 local_irq_enable(); 122 local_irq_enable();
118 /* loop is done by the caller */ 123 /* loop is done by the caller */
@@ -170,24 +175,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
170 */ 175 */
171void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 176void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
172{ 177{
178 struct power_trace it;
179
180 trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
173 if (!need_resched()) { 181 if (!need_resched()) {
174 __monitor((void *)&current_thread_info()->flags, 0, 0); 182 __monitor((void *)&current_thread_info()->flags, 0, 0);
175 smp_mb(); 183 smp_mb();
176 if (!need_resched()) 184 if (!need_resched())
177 __mwait(ax, cx); 185 __mwait(ax, cx);
178 } 186 }
187 trace_power_end(&it);
179} 188}
180 189
181/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 190/* Default MONITOR/MWAIT with no hints, used for default C1 state */
182static void mwait_idle(void) 191static void mwait_idle(void)
183{ 192{
193 struct power_trace it;
184 if (!need_resched()) { 194 if (!need_resched()) {
195 trace_power_start(&it, POWER_CSTATE, 1);
185 __monitor((void *)&current_thread_info()->flags, 0, 0); 196 __monitor((void *)&current_thread_info()->flags, 0, 0);
186 smp_mb(); 197 smp_mb();
187 if (!need_resched()) 198 if (!need_resched())
188 __sti_mwait(0, 0); 199 __sti_mwait(0, 0);
189 else 200 else
190 local_irq_enable(); 201 local_irq_enable();
202 trace_power_end(&it);
191 } else 203 } else
192 local_irq_enable(); 204 local_irq_enable();
193} 205}
@@ -199,9 +211,13 @@ static void mwait_idle(void)
199 */ 211 */
200static void poll_idle(void) 212static void poll_idle(void)
201{ 213{
214 struct power_trace it;
215
216 trace_power_start(&it, POWER_CSTATE, 0);
202 local_irq_enable(); 217 local_irq_enable();
203 while (!need_resched()) 218 while (!need_resched())
204 cpu_relax(); 219 cpu_relax();
220 trace_power_end(&it);
205} 221}
206 222
207/* 223/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 0a1302fe6d45..24c2276aa453 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
38#include <linux/percpu.h> 38#include <linux/percpu.h>
39#include <linux/prctl.h> 39#include <linux/prctl.h>
40#include <linux/dmi.h> 40#include <linux/dmi.h>
41#include <linux/ftrace.h>
41 42
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
43#include <asm/pgtable.h> 44#include <asm/pgtable.h>
@@ -548,7 +549,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
548 * the task-switch, and shows up in ret_from_fork in entry.S, 549 * the task-switch, and shows up in ret_from_fork in entry.S,
549 * for example. 550 * for example.
550 */ 551 */
551struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) 552__notrace_funcgraph struct task_struct *
553__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
552{ 554{
553 struct thread_struct *prev = &prev_p->thread, 555 struct thread_struct *prev = &prev_p->thread,
554 *next = &next_p->thread; 556 *next = &next_p->thread;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c958120fb1b6..fbb321d53d34 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -39,6 +39,7 @@
39#include <linux/prctl.h> 39#include <linux/prctl.h>
40#include <linux/uaccess.h> 40#include <linux/uaccess.h>
41#include <linux/io.h> 41#include <linux/io.h>
42#include <linux/ftrace.h>
42 43
43#include <asm/pgtable.h> 44#include <asm/pgtable.h>
44#include <asm/system.h> 45#include <asm/system.h>
@@ -551,8 +552,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
551 * - could test fs/gs bitsliced 552 * - could test fs/gs bitsliced
552 * 553 *
553 * Kprobes not supported here. Set the probe on schedule instead. 554 * Kprobes not supported here. Set the probe on schedule instead.
555 * Function graph tracer not supported too.
554 */ 556 */
555struct task_struct * 557__notrace_funcgraph struct task_struct *
556__switch_to(struct task_struct *prev_p, struct task_struct *next_p) 558__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
557{ 559{
558 struct thread_struct *prev = &prev_p->thread; 560 struct thread_struct *prev = &prev_p->thread;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a6d8c12e10d..2c8ec1ba75e6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -668,14 +668,14 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
668 size_t bts_index, bts_end; 668 size_t bts_index, bts_end;
669 int error; 669 int error;
670 670
671 error = ds_get_bts_end(child, &bts_end); 671 error = ds_get_bts_end(child->bts, &bts_end);
672 if (error < 0) 672 if (error < 0)
673 return error; 673 return error;
674 674
675 if (bts_end <= index) 675 if (bts_end <= index)
676 return -EINVAL; 676 return -EINVAL;
677 677
678 error = ds_get_bts_index(child, &bts_index); 678 error = ds_get_bts_index(child->bts, &bts_index);
679 if (error < 0) 679 if (error < 0)
680 return error; 680 return error;
681 681
@@ -684,7 +684,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
684 if (bts_end <= bts_index) 684 if (bts_end <= bts_index)
685 bts_index -= bts_end; 685 bts_index -= bts_end;
686 686
687 error = ds_access_bts(child, bts_index, &bts_record); 687 error = ds_access_bts(child->bts, bts_index, &bts_record);
688 if (error < 0) 688 if (error < 0)
689 return error; 689 return error;
690 690
@@ -705,14 +705,14 @@ static int ptrace_bts_drain(struct task_struct *child,
705 size_t end, i; 705 size_t end, i;
706 int error; 706 int error;
707 707
708 error = ds_get_bts_index(child, &end); 708 error = ds_get_bts_index(child->bts, &end);
709 if (error < 0) 709 if (error < 0)
710 return error; 710 return error;
711 711
712 if (size < (end * sizeof(struct bts_struct))) 712 if (size < (end * sizeof(struct bts_struct)))
713 return -EIO; 713 return -EIO;
714 714
715 error = ds_access_bts(child, 0, (const void **)&raw); 715 error = ds_access_bts(child->bts, 0, (const void **)&raw);
716 if (error < 0) 716 if (error < 0)
717 return error; 717 return error;
718 718
@@ -723,18 +723,13 @@ static int ptrace_bts_drain(struct task_struct *child,
723 return -EFAULT; 723 return -EFAULT;
724 } 724 }
725 725
726 error = ds_clear_bts(child); 726 error = ds_clear_bts(child->bts);
727 if (error < 0) 727 if (error < 0)
728 return error; 728 return error;
729 729
730 return end; 730 return end;
731} 731}
732 732
733static void ptrace_bts_ovfl(struct task_struct *child)
734{
735 send_sig(child->thread.bts_ovfl_signal, child, 0);
736}
737
738static int ptrace_bts_config(struct task_struct *child, 733static int ptrace_bts_config(struct task_struct *child,
739 long cfg_size, 734 long cfg_size,
740 const struct ptrace_bts_config __user *ucfg) 735 const struct ptrace_bts_config __user *ucfg)
@@ -760,23 +755,45 @@ static int ptrace_bts_config(struct task_struct *child,
760 goto errout; 755 goto errout;
761 756
762 if (cfg.flags & PTRACE_BTS_O_ALLOC) { 757 if (cfg.flags & PTRACE_BTS_O_ALLOC) {
763 ds_ovfl_callback_t ovfl = NULL; 758 bts_ovfl_callback_t ovfl = NULL;
764 unsigned int sig = 0; 759 unsigned int sig = 0;
765 760
766 /* we ignore the error in case we were not tracing child */ 761 error = -EINVAL;
767 (void)ds_release_bts(child); 762 if (cfg.size < (10 * bts_cfg.sizeof_bts))
763 goto errout;
768 764
769 if (cfg.flags & PTRACE_BTS_O_SIGNAL) { 765 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
770 if (!cfg.signal) 766 if (!cfg.signal)
771 goto errout; 767 goto errout;
772 768
769 error = -EOPNOTSUPP;
770 goto errout;
771
773 sig = cfg.signal; 772 sig = cfg.signal;
774 ovfl = ptrace_bts_ovfl;
775 } 773 }
776 774
777 error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); 775 if (child->bts) {
778 if (error < 0) 776 (void)ds_release_bts(child->bts);
777 kfree(child->bts_buffer);
778
779 child->bts = NULL;
780 child->bts_buffer = NULL;
781 }
782
783 error = -ENOMEM;
784 child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL);
785 if (!child->bts_buffer)
786 goto errout;
787
788 child->bts = ds_request_bts(child, child->bts_buffer, cfg.size,
789 ovfl, /* th = */ (size_t)-1);
790 if (IS_ERR(child->bts)) {
791 error = PTR_ERR(child->bts);
792 kfree(child->bts_buffer);
793 child->bts = NULL;
794 child->bts_buffer = NULL;
779 goto errout; 795 goto errout;
796 }
780 797
781 child->thread.bts_ovfl_signal = sig; 798 child->thread.bts_ovfl_signal = sig;
782 } 799 }
@@ -823,15 +840,15 @@ static int ptrace_bts_status(struct task_struct *child,
823 if (cfg_size < sizeof(cfg)) 840 if (cfg_size < sizeof(cfg))
824 return -EIO; 841 return -EIO;
825 842
826 error = ds_get_bts_end(child, &end); 843 error = ds_get_bts_end(child->bts, &end);
827 if (error < 0) 844 if (error < 0)
828 return error; 845 return error;
829 846
830 error = ds_access_bts(child, /* index = */ 0, &base); 847 error = ds_access_bts(child->bts, /* index = */ 0, &base);
831 if (error < 0) 848 if (error < 0)
832 return error; 849 return error;
833 850
834 error = ds_access_bts(child, /* index = */ end, &max); 851 error = ds_access_bts(child->bts, /* index = */ end, &max);
835 if (error < 0) 852 if (error < 0)
836 return error; 853 return error;
837 854
@@ -884,10 +901,7 @@ static int ptrace_bts_write_record(struct task_struct *child,
884 return -EINVAL; 901 return -EINVAL;
885 } 902 }
886 903
887 /* The writing task will be the switched-to task on a context 904 return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts);
888 * switch. It needs to write into the switched-from task's BTS
889 * buffer. */
890 return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
891} 905}
892 906
893void ptrace_bts_take_timestamp(struct task_struct *tsk, 907void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -929,17 +943,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
929 switch (c->x86) { 943 switch (c->x86) {
930 case 0x6: 944 case 0x6:
931 switch (c->x86_model) { 945 switch (c->x86_model) {
946 case 0 ... 0xC:
947 /* sorry, don't know about them */
948 break;
932 case 0xD: 949 case 0xD:
933 case 0xE: /* Pentium M */ 950 case 0xE: /* Pentium M */
934 bts_configure(&bts_cfg_pentium_m); 951 bts_configure(&bts_cfg_pentium_m);
935 break; 952 break;
936 case 0xF: /* Core2 */ 953 default: /* Core2, Atom, ... */
937 case 0x1C: /* Atom */
938 bts_configure(&bts_cfg_core2); 954 bts_configure(&bts_cfg_core2);
939 break; 955 break;
940 default:
941 /* sorry, don't know about them */
942 break;
943 } 956 }
944 break; 957 break;
945 case 0xF: 958 case 0xF:
@@ -973,13 +986,17 @@ void ptrace_disable(struct task_struct *child)
973 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 986 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
974#endif 987#endif
975#ifdef CONFIG_X86_PTRACE_BTS 988#ifdef CONFIG_X86_PTRACE_BTS
976 (void)ds_release_bts(child); 989 if (child->bts) {
990 (void)ds_release_bts(child->bts);
991 kfree(child->bts_buffer);
992 child->bts_buffer = NULL;
977 993
978 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; 994 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
979 if (!child->thread.debugctlmsr) 995 if (!child->thread.debugctlmsr)
980 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 996 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
981 997
982 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 998 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
999 }
983#endif /* CONFIG_X86_PTRACE_BTS */ 1000#endif /* CONFIG_X86_PTRACE_BTS */
984} 1001}
985 1002
@@ -1111,9 +1128,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1111 (child, data, (struct ptrace_bts_config __user *)addr); 1128 (child, data, (struct ptrace_bts_config __user *)addr);
1112 break; 1129 break;
1113 1130
1114 case PTRACE_BTS_SIZE: 1131 case PTRACE_BTS_SIZE: {
1115 ret = ds_get_bts_index(child, /* pos = */ NULL); 1132 size_t size;
1133
1134 ret = ds_get_bts_index(child->bts, &size);
1135 if (ret == 0) {
1136 BUG_ON(size != (int) size);
1137 ret = (int) size;
1138 }
1116 break; 1139 break;
1140 }
1117 1141
1118 case PTRACE_BTS_GET: 1142 case PTRACE_BTS_GET:
1119 ret = ptrace_bts_read_record 1143 ret = ptrace_bts_read_record
@@ -1121,7 +1145,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1121 break; 1145 break;
1122 1146
1123 case PTRACE_BTS_CLEAR: 1147 case PTRACE_BTS_CLEAR:
1124 ret = ds_clear_bts(child); 1148 ret = ds_clear_bts(child->bts);
1125 break; 1149 break;
1126 1150
1127 case PTRACE_BTS_DRAIN: 1151 case PTRACE_BTS_DRAIN:
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index bb387ab0eea8..0e3dbc7b2bdb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -174,6 +174,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
174 DMI_MATCH(DMI_BOARD_NAME, "0KW626"), 174 DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
175 }, 175 },
176 }, 176 },
177 { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */
178 .callback = set_bios_reboot,
179 .ident = "Dell OptiPlex 330",
180 .matches = {
181 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
182 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 330"),
183 DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
184 },
185 },
177 { /* Handle problems with rebooting on Dell 2400's */ 186 { /* Handle problems with rebooting on Dell 2400's */
178 .callback = set_bios_reboot, 187 .callback = set_bios_reboot,
179 .ident = "Dell PowerEdge 2400", 188 .ident = "Dell PowerEdge 2400",
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0fa6790c1dd3..b9018955a04f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -583,7 +583,20 @@ static int __init setup_elfcorehdr(char *arg)
583early_param("elfcorehdr", setup_elfcorehdr); 583early_param("elfcorehdr", setup_elfcorehdr);
584#endif 584#endif
585 585
586static struct x86_quirks default_x86_quirks __initdata; 586static int __init default_update_genapic(void)
587{
588#ifdef CONFIG_X86_SMP
589# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
590 genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
591# endif
592#endif
593
594 return 0;
595}
596
597static struct x86_quirks default_x86_quirks __initdata = {
598 .update_genapic = default_update_genapic,
599};
587 600
588struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; 601struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
589 602
@@ -764,7 +777,7 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
764 .callback = dmi_low_memory_corruption, 777 .callback = dmi_low_memory_corruption,
765 .ident = "Phoenix BIOS", 778 .ident = "Phoenix BIOS",
766 .matches = { 779 .matches = {
767 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), 780 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
768 }, 781 },
769 }, 782 },
770#endif 783#endif
@@ -1082,7 +1095,7 @@ void __init setup_arch(char **cmdline_p)
1082 ioapic_init_mappings(); 1095 ioapic_init_mappings();
1083 1096
1084 /* need to wait for io_apic is mapped */ 1097 /* need to wait for io_apic is mapped */
1085 nr_irqs = probe_nr_irqs(); 1098 probe_nr_irqs_gsi();
1086 1099
1087 kvm_guest_init(); 1100 kvm_guest_init();
1088 1101
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7b1093397319..0e9f446269f4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
62#include <asm/mtrr.h> 62#include <asm/mtrr.h>
63#include <asm/vmi.h> 63#include <asm/vmi.h>
64#include <asm/genapic.h> 64#include <asm/genapic.h>
65#include <asm/setup.h>
65#include <linux/mc146818rtc.h> 66#include <linux/mc146818rtc.h>
66 67
67#include <mach_apic.h> 68#include <mach_apic.h>
@@ -536,7 +537,7 @@ static void impress_friends(void)
536 pr_debug("Before bogocount - setting activated=1.\n"); 537 pr_debug("Before bogocount - setting activated=1.\n");
537} 538}
538 539
539static inline void __inquire_remote_apic(int apicid) 540void __inquire_remote_apic(int apicid)
540{ 541{
541 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; 542 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
542 char *names[] = { "ID", "VERSION", "SPIV" }; 543 char *names[] = { "ID", "VERSION", "SPIV" };
@@ -575,14 +576,13 @@ static inline void __inquire_remote_apic(int apicid)
575 } 576 }
576} 577}
577 578
578#ifdef WAKE_SECONDARY_VIA_NMI
579/* 579/*
580 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal 580 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
581 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this 581 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
582 * won't ... remember to clear down the APIC, etc later. 582 * won't ... remember to clear down the APIC, etc later.
583 */ 583 */
584static int __devinit 584int __devinit
585wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) 585wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
586{ 586{
587 unsigned long send_status, accept_status = 0; 587 unsigned long send_status, accept_status = 0;
588 int maxlvt; 588 int maxlvt;
@@ -599,7 +599,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
599 * Give the other CPU some time to accept the IPI. 599 * Give the other CPU some time to accept the IPI.
600 */ 600 */
601 udelay(200); 601 udelay(200);
602 if (APIC_INTEGRATED(apic_version[phys_apicid])) { 602 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
603 maxlvt = lapic_get_maxlvt(); 603 maxlvt = lapic_get_maxlvt();
604 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 604 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
605 apic_write(APIC_ESR, 0); 605 apic_write(APIC_ESR, 0);
@@ -614,11 +614,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
614 614
615 return (send_status | accept_status); 615 return (send_status | accept_status);
616} 616}
617#endif /* WAKE_SECONDARY_VIA_NMI */
618 617
619#ifdef WAKE_SECONDARY_VIA_INIT 618int __devinit
620static int __devinit 619wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
621wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
622{ 620{
623 unsigned long send_status, accept_status = 0; 621 unsigned long send_status, accept_status = 0;
624 int maxlvt, num_starts, j; 622 int maxlvt, num_starts, j;
@@ -737,7 +735,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
737 735
738 return (send_status | accept_status); 736 return (send_status | accept_status);
739} 737}
740#endif /* WAKE_SECONDARY_VIA_INIT */
741 738
742struct create_idle { 739struct create_idle {
743 struct work_struct work; 740 struct work_struct work;
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index a03e7f6d90c3..10786af95545 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -6,6 +6,7 @@
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/stacktrace.h> 7#include <linux/stacktrace.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/uaccess.h>
9#include <asm/stacktrace.h> 10#include <asm/stacktrace.h>
10 11
11static void save_stack_warning(void *data, char *msg) 12static void save_stack_warning(void *data, char *msg)
@@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
83 trace->entries[trace->nr_entries++] = ULONG_MAX; 84 trace->entries[trace->nr_entries++] = ULONG_MAX;
84} 85}
85EXPORT_SYMBOL_GPL(save_stack_trace_tsk); 86EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
87
88/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
89
90struct stack_frame {
91 const void __user *next_fp;
92 unsigned long ret_addr;
93};
94
95static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
96{
97 int ret;
98
99 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
100 return 0;
101
102 ret = 1;
103 pagefault_disable();
104 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
105 ret = 0;
106 pagefault_enable();
107
108 return ret;
109}
110
111static inline void __save_stack_trace_user(struct stack_trace *trace)
112{
113 const struct pt_regs *regs = task_pt_regs(current);
114 const void __user *fp = (const void __user *)regs->bp;
115
116 if (trace->nr_entries < trace->max_entries)
117 trace->entries[trace->nr_entries++] = regs->ip;
118
119 while (trace->nr_entries < trace->max_entries) {
120 struct stack_frame frame;
121
122 frame.next_fp = NULL;
123 frame.ret_addr = 0;
124 if (!copy_stack_frame(fp, &frame))
125 break;
126 if ((unsigned long)fp < regs->sp)
127 break;
128 if (frame.ret_addr) {
129 trace->entries[trace->nr_entries++] =
130 frame.ret_addr;
131 }
132 if (fp == frame.next_fp)
133 break;
134 fp = frame.next_fp;
135 }
136}
137
138void save_stack_trace_user(struct stack_trace *trace)
139{
140 /*
141 * Trace user stack if we are not a kernel thread
142 */
143 if (current->mm) {
144 __save_stack_trace_user(trace);
145 }
146 if (trace->nr_entries < trace->max_entries)
147 trace->entries[trace->nr_entries++] = ULONG_MAX;
148}
149
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9ffb01c31c40..1c0dfbca87c1 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -46,7 +46,9 @@ static __cpuinit void check_tsc_warp(void)
46 cycles_t start, now, prev, end; 46 cycles_t start, now, prev, end;
47 int i; 47 int i;
48 48
49 rdtsc_barrier();
49 start = get_cycles(); 50 start = get_cycles();
51 rdtsc_barrier();
50 /* 52 /*
51 * The measurement runs for 20 msecs: 53 * The measurement runs for 20 msecs:
52 */ 54 */
@@ -61,7 +63,9 @@ static __cpuinit void check_tsc_warp(void)
61 */ 63 */
62 __raw_spin_lock(&sync_lock); 64 __raw_spin_lock(&sync_lock);
63 prev = last_tsc; 65 prev = last_tsc;
66 rdtsc_barrier();
64 now = get_cycles(); 67 now = get_cycles();
68 rdtsc_barrier();
65 last_tsc = now; 69 last_tsc = now;
66 __raw_spin_unlock(&sync_lock); 70 __raw_spin_unlock(&sync_lock);
67 71
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b8b6690a86d..6f3d3d4cd973 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -17,6 +17,9 @@
17 * want per guest time just set the kernel.vsyscall64 sysctl to 0. 17 * want per guest time just set the kernel.vsyscall64 sysctl to 0.
18 */ 18 */
19 19
20/* Disable profiling for userspace code: */
21#define DISABLE_BRANCH_PROFILING
22
20#include <linux/time.h> 23#include <linux/time.h>
21#include <linux/init.h> 24#include <linux/init.h>
22#include <linux/kernel.h> 25#include <linux/kernel.h>
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index b13acb75e822..15c3e6999182 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -310,7 +310,7 @@ static void __init setup_xstate_init(void)
310/* 310/*
311 * Enable and initialize the xsave feature. 311 * Enable and initialize the xsave feature.
312 */ 312 */
313void __init xsave_cntxt_init(void) 313void __ref xsave_cntxt_init(void)
314{ 314{
315 unsigned int eax, ebx, ecx, edx; 315 unsigned int eax, ebx, ecx, edx;
316 316
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ce3251ce5504..b81125f0bdee 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -20,6 +20,8 @@ if VIRTUALIZATION
20config KVM 20config KVM
21 tristate "Kernel-based Virtual Machine (KVM) support" 21 tristate "Kernel-based Virtual Machine (KVM) support"
22 depends on HAVE_KVM 22 depends on HAVE_KVM
23 # for device assignment:
24 depends on PCI
23 select PREEMPT_NOTIFIERS 25 select PREEMPT_NOTIFIERS
24 select MMU_NOTIFIER 26 select MMU_NOTIFIER
25 select ANON_INODES 27 select ANON_INODES
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 8772dc946823..59ebd37ad79e 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -548,8 +548,10 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
548 mutex_lock(&kvm->lock); 548 mutex_lock(&kvm->lock);
549 pit->irq_source_id = kvm_request_irq_source_id(kvm); 549 pit->irq_source_id = kvm_request_irq_source_id(kvm);
550 mutex_unlock(&kvm->lock); 550 mutex_unlock(&kvm->lock);
551 if (pit->irq_source_id < 0) 551 if (pit->irq_source_id < 0) {
552 kfree(pit);
552 return NULL; 553 return NULL;
554 }
553 555
554 mutex_init(&pit->pit_state.lock); 556 mutex_init(&pit->pit_state.lock);
555 mutex_lock(&pit->pit_state.lock); 557 mutex_lock(&pit->pit_state.lock);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2a5e64881d9b..410ddbc1aa2e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -314,7 +314,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
314 if (r) 314 if (r)
315 goto out; 315 goto out;
316 r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, 316 r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
317 rmap_desc_cache, 1); 317 rmap_desc_cache, 4);
318 if (r) 318 if (r)
319 goto out; 319 goto out;
320 r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); 320 r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
@@ -1038,13 +1038,13 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1038 } 1038 }
1039 1039
1040 rmap_write_protect(vcpu->kvm, sp->gfn); 1040 rmap_write_protect(vcpu->kvm, sp->gfn);
1041 kvm_unlink_unsync_page(vcpu->kvm, sp);
1041 if (vcpu->arch.mmu.sync_page(vcpu, sp)) { 1042 if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
1042 kvm_mmu_zap_page(vcpu->kvm, sp); 1043 kvm_mmu_zap_page(vcpu->kvm, sp);
1043 return 1; 1044 return 1;
1044 } 1045 }
1045 1046
1046 kvm_mmu_flush_tlb(vcpu); 1047 kvm_mmu_flush_tlb(vcpu);
1047 kvm_unlink_unsync_page(vcpu->kvm, sp);
1048 return 0; 1048 return 0;
1049} 1049}
1050 1050
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 613ec9aa674a..84eee43bbe74 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -331,6 +331,7 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
331 r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], 331 r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
332 &curr_pte, sizeof(curr_pte)); 332 &curr_pte, sizeof(curr_pte));
333 if (r || curr_pte != gw->ptes[level - 2]) { 333 if (r || curr_pte != gw->ptes[level - 2]) {
334 kvm_mmu_put_page(shadow_page, sptep);
334 kvm_release_pfn_clean(sw->pfn); 335 kvm_release_pfn_clean(sw->pfn);
335 sw->sptep = NULL; 336 sw->sptep = NULL;
336 return 1; 337 return 1;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2643b430d83a..a4018b01e1f9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3149,7 +3149,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3149 3149
3150 if (cpu_has_virtual_nmis()) { 3150 if (cpu_has_virtual_nmis()) {
3151 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 3151 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
3152 if (vmx_nmi_enabled(vcpu)) { 3152 if (vcpu->arch.interrupt.pending) {
3153 enable_nmi_window(vcpu);
3154 } else if (vmx_nmi_enabled(vcpu)) {
3153 vcpu->arch.nmi_pending = false; 3155 vcpu->arch.nmi_pending = false;
3154 vcpu->arch.nmi_injected = true; 3156 vcpu->arch.nmi_injected = true;
3155 } else { 3157 } else {
@@ -3564,7 +3566,8 @@ static int __init vmx_init(void)
3564 bypass_guest_pf = 0; 3566 bypass_guest_pf = 0;
3565 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3567 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3566 VMX_EPT_WRITABLE_MASK | 3568 VMX_EPT_WRITABLE_MASK |
3567 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); 3569 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
3570 VMX_EPT_IGMT_BIT);
3568 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 3571 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3569 VMX_EPT_EXECUTABLE_MASK); 3572 VMX_EPT_EXECUTABLE_MASK);
3570 kvm_enable_tdp(); 3573 kvm_enable_tdp();
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 3e010d21fdd7..ec5edc339da6 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -352,6 +352,7 @@ enum vmcs_field {
352#define VMX_EPT_READABLE_MASK 0x1ull 352#define VMX_EPT_READABLE_MASK 0x1ull
353#define VMX_EPT_WRITABLE_MASK 0x2ull 353#define VMX_EPT_WRITABLE_MASK 0x2ull
354#define VMX_EPT_EXECUTABLE_MASK 0x4ull 354#define VMX_EPT_EXECUTABLE_MASK 0x4ull
355#define VMX_EPT_IGMT_BIT (1ull << 6)
355 356
356#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul 357#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
357 358
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9e68075544f6..4a20b2f9a381 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
39#define __do_strncpy_from_user(dst, src, count, res) \ 39#define __do_strncpy_from_user(dst, src, count, res) \
40do { \ 40do { \
41 int __d0, __d1, __d2; \ 41 int __d0, __d1, __d2; \
42 might_sleep(); \ 42 might_fault(); \
43 __asm__ __volatile__( \ 43 __asm__ __volatile__( \
44 " testl %1,%1\n" \ 44 " testl %1,%1\n" \
45 " jz 2f\n" \ 45 " jz 2f\n" \
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user);
126#define __do_clear_user(addr,size) \ 126#define __do_clear_user(addr,size) \
127do { \ 127do { \
128 int __d0; \ 128 int __d0; \
129 might_sleep(); \ 129 might_fault(); \
130 __asm__ __volatile__( \ 130 __asm__ __volatile__( \
131 "0: rep; stosl\n" \ 131 "0: rep; stosl\n" \
132 " movl %2,%0\n" \ 132 " movl %2,%0\n" \
@@ -155,7 +155,7 @@ do { \
155unsigned long 155unsigned long
156clear_user(void __user *to, unsigned long n) 156clear_user(void __user *to, unsigned long n)
157{ 157{
158 might_sleep(); 158 might_fault();
159 if (access_ok(VERIFY_WRITE, to, n)) 159 if (access_ok(VERIFY_WRITE, to, n))
160 __do_clear_user(to, n); 160 __do_clear_user(to, n);
161 return n; 161 return n;
@@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n)
197 unsigned long mask = -__addr_ok(s); 197 unsigned long mask = -__addr_ok(s);
198 unsigned long res, tmp; 198 unsigned long res, tmp;
199 199
200 might_sleep(); 200 might_fault();
201 201
202 __asm__ __volatile__( 202 __asm__ __volatile__(
203 " testl %0, %0\n" 203 " testl %0, %0\n"
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index f4df6e7c718b..64d6c84e6353 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -15,7 +15,7 @@
15#define __do_strncpy_from_user(dst,src,count,res) \ 15#define __do_strncpy_from_user(dst,src,count,res) \
16do { \ 16do { \
17 long __d0, __d1, __d2; \ 17 long __d0, __d1, __d2; \
18 might_sleep(); \ 18 might_fault(); \
19 __asm__ __volatile__( \ 19 __asm__ __volatile__( \
20 " testq %1,%1\n" \ 20 " testq %1,%1\n" \
21 " jz 2f\n" \ 21 " jz 2f\n" \
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user);
64unsigned long __clear_user(void __user *addr, unsigned long size) 64unsigned long __clear_user(void __user *addr, unsigned long size)
65{ 65{
66 long __d0; 66 long __d0;
67 might_sleep(); 67 might_fault();
68 /* no memory constraint because it doesn't change any memory gcc knows 68 /* no memory constraint because it doesn't change any memory gcc knows
69 about */ 69 about */
70 asm volatile( 70 asm volatile(
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 3c3b471ea496..3624a364b7f3 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -17,6 +17,7 @@
17#include <asm/bigsmp/apic.h> 17#include <asm/bigsmp/apic.h>
18#include <asm/bigsmp/ipi.h> 18#include <asm/bigsmp/ipi.h>
19#include <asm/mach-default/mach_mpparse.h> 19#include <asm/mach-default/mach_mpparse.h>
20#include <asm/mach-default/mach_wakecpu.h>
20 21
21static int dmi_bigsmp; /* can be set by dmi scanners */ 22static int dmi_bigsmp; /* can be set by dmi scanners */
22 23
diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c
index 9e835a11a13a..e63a4a76d8cd 100644
--- a/arch/x86/mach-generic/default.c
+++ b/arch/x86/mach-generic/default.c
@@ -16,6 +16,7 @@
16#include <asm/mach-default/mach_apic.h> 16#include <asm/mach-default/mach_apic.h>
17#include <asm/mach-default/mach_ipi.h> 17#include <asm/mach-default/mach_ipi.h>
18#include <asm/mach-default/mach_mpparse.h> 18#include <asm/mach-default/mach_mpparse.h>
19#include <asm/mach-default/mach_wakecpu.h>
19 20
20/* should be called last. */ 21/* should be called last. */
21static int probe_default(void) 22static int probe_default(void)
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 28459cab3ddb..7b4e6d0d1690 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -16,7 +16,19 @@
16#include <asm/es7000/apic.h> 16#include <asm/es7000/apic.h>
17#include <asm/es7000/ipi.h> 17#include <asm/es7000/ipi.h>
18#include <asm/es7000/mpparse.h> 18#include <asm/es7000/mpparse.h>
19#include <asm/es7000/wakecpu.h> 19#include <asm/mach-default/mach_wakecpu.h>
20
21void __init es7000_update_genapic_to_cluster(void)
22{
23 genapic->target_cpus = target_cpus_cluster;
24 genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER;
25 genapic->int_dest_mode = INT_DEST_MODE_CLUSTER;
26 genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER;
27
28 genapic->init_apic_ldr = init_apic_ldr_cluster;
29
30 genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster;
31}
20 32
21static int probe_es7000(void) 33static int probe_es7000(void)
22{ 34{
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
index 5a7e4619e1c4..c346d9d0226f 100644
--- a/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@ -15,6 +15,7 @@
15#include <asm/mpspec.h> 15#include <asm/mpspec.h>
16#include <asm/apicdef.h> 16#include <asm/apicdef.h>
17#include <asm/genapic.h> 17#include <asm/genapic.h>
18#include <asm/setup.h>
18 19
19extern struct genapic apic_numaq; 20extern struct genapic apic_numaq;
20extern struct genapic apic_summit; 21extern struct genapic apic_summit;
@@ -57,6 +58,9 @@ static int __init parse_apic(char *arg)
57 } 58 }
58 } 59 }
59 60
61 if (x86_quirks->update_genapic)
62 x86_quirks->update_genapic();
63
60 /* Parsed again by __setup for debug/verbose */ 64 /* Parsed again by __setup for debug/verbose */
61 return 0; 65 return 0;
62} 66}
@@ -72,12 +76,15 @@ void __init generic_bigsmp_probe(void)
72 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support 76 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
73 */ 77 */
74 78
75 if (!cmdline_apic && genapic == &apic_default) 79 if (!cmdline_apic && genapic == &apic_default) {
76 if (apic_bigsmp.probe()) { 80 if (apic_bigsmp.probe()) {
77 genapic = &apic_bigsmp; 81 genapic = &apic_bigsmp;
82 if (x86_quirks->update_genapic)
83 x86_quirks->update_genapic();
78 printk(KERN_INFO "Overriding APIC driver with %s\n", 84 printk(KERN_INFO "Overriding APIC driver with %s\n",
79 genapic->name); 85 genapic->name);
80 } 86 }
87 }
81#endif 88#endif
82} 89}
83 90
@@ -94,6 +101,9 @@ void __init generic_apic_probe(void)
94 /* Not visible without early console */ 101 /* Not visible without early console */
95 if (!apic_probe[i]) 102 if (!apic_probe[i])
96 panic("Didn't find an APIC driver"); 103 panic("Didn't find an APIC driver");
104
105 if (x86_quirks->update_genapic)
106 x86_quirks->update_genapic();
97 } 107 }
98 printk(KERN_INFO "Using APIC driver %s\n", genapic->name); 108 printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
99} 109}
@@ -108,6 +118,8 @@ int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
108 if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { 118 if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) {
109 if (!cmdline_apic) { 119 if (!cmdline_apic) {
110 genapic = apic_probe[i]; 120 genapic = apic_probe[i];
121 if (x86_quirks->update_genapic)
122 x86_quirks->update_genapic();
111 printk(KERN_INFO "Switched to APIC driver `%s'.\n", 123 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
112 genapic->name); 124 genapic->name);
113 } 125 }
@@ -124,6 +136,8 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
124 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { 136 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
125 if (!cmdline_apic) { 137 if (!cmdline_apic) {
126 genapic = apic_probe[i]; 138 genapic = apic_probe[i];
139 if (x86_quirks->update_genapic)
140 x86_quirks->update_genapic();
127 printk(KERN_INFO "Switched to APIC driver `%s'.\n", 141 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
128 genapic->name); 142 genapic->name);
129 } 143 }
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 6272b5e69da6..2c6d234e0009 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -16,6 +16,7 @@
16#include <asm/summit/apic.h> 16#include <asm/summit/apic.h>
17#include <asm/summit/ipi.h> 17#include <asm/summit/ipi.h>
18#include <asm/summit/mpparse.h> 18#include <asm/summit/mpparse.h>
19#include <asm/mach-default/mach_wakecpu.h>
19 20
20static int probe_summit(void) 21static int probe_summit(void)
21{ 22{
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 0e331652681e..52145007bd7e 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -7,6 +7,7 @@
7 * This file provides all the same external entries as smp.c but uses 7 * This file provides all the same external entries as smp.c but uses
8 * the voyager hal to provide the functionality 8 * the voyager hal to provide the functionality
9 */ 9 */
10#include <linux/cpu.h>
10#include <linux/module.h> 11#include <linux/module.h>
11#include <linux/mm.h> 12#include <linux/mm.h>
12#include <linux/kernel_stat.h> 13#include <linux/kernel_stat.h>
@@ -1790,6 +1791,17 @@ void __init smp_setup_processor_id(void)
1790 x86_write_percpu(cpu_number, hard_smp_processor_id()); 1791 x86_write_percpu(cpu_number, hard_smp_processor_id());
1791} 1792}
1792 1793
1794static void voyager_send_call_func(cpumask_t callmask)
1795{
1796 __u32 mask = cpus_addr(callmask)[0] & ~(1 << smp_processor_id());
1797 send_CPI(mask, VIC_CALL_FUNCTION_CPI);
1798}
1799
1800static void voyager_send_call_func_single(int cpu)
1801{
1802 send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI);
1803}
1804
1793struct smp_ops smp_ops = { 1805struct smp_ops smp_ops = {
1794 .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu, 1806 .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu,
1795 .smp_prepare_cpus = voyager_smp_prepare_cpus, 1807 .smp_prepare_cpus = voyager_smp_prepare_cpus,
@@ -1799,6 +1811,6 @@ struct smp_ops smp_ops = {
1799 .smp_send_stop = voyager_smp_send_stop, 1811 .smp_send_stop = voyager_smp_send_stop,
1800 .smp_send_reschedule = voyager_smp_send_reschedule, 1812 .smp_send_reschedule = voyager_smp_send_reschedule,
1801 1813
1802 .send_call_func_ipi = native_send_call_func_ipi, 1814 .send_call_func_ipi = voyager_send_call_func,
1803 .send_call_func_single_ipi = native_send_call_func_single_ipi, 1815 .send_call_func_single_ipi = voyager_send_call_func_single,
1804}; 1816};
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fea4565ff576..d8cc96a2738f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
8 8
9obj-$(CONFIG_HIGHMEM) += highmem_32.o 9obj-$(CONFIG_HIGHMEM) += highmem_32.o
10 10
11obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
12obj-$(CONFIG_MMIOTRACE) += mmiotrace.o 11obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
13mmiotrace-y := pf_in.o mmio-mod.o 12mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
14obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 13obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
15 14
16obj-$(CONFIG_NUMA) += numa_$(BITS).o 15obj-$(CONFIG_NUMA) += numa_$(BITS).o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 31e8730fa246..21e996a70d68 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -53,7 +53,7 @@
53 53
54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) 54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
55{ 55{
56#ifdef CONFIG_MMIOTRACE_HOOKS 56#ifdef CONFIG_MMIOTRACE
57 if (unlikely(is_kmmio_active())) 57 if (unlikely(is_kmmio_active()))
58 if (kmmio_handler(regs, addr) == 1) 58 if (kmmio_handler(regs, addr) == 1)
59 return -1; 59 return -1;
@@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
413 unsigned long error_code) 413 unsigned long error_code)
414{ 414{
415 unsigned long flags = oops_begin(); 415 unsigned long flags = oops_begin();
416 int sig = SIGKILL;
416 struct task_struct *tsk; 417 struct task_struct *tsk;
417 418
418 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", 419 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
@@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
423 tsk->thread.trap_no = 14; 424 tsk->thread.trap_no = 14;
424 tsk->thread.error_code = error_code; 425 tsk->thread.error_code = error_code;
425 if (__die("Bad pagetable", regs, error_code)) 426 if (__die("Bad pagetable", regs, error_code))
426 regs = NULL; 427 sig = 0;
427 oops_end(flags, regs, SIGKILL); 428 oops_end(flags, regs, sig);
428} 429}
429#endif 430#endif
430 431
@@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
590 int fault; 591 int fault;
591#ifdef CONFIG_X86_64 592#ifdef CONFIG_X86_64
592 unsigned long flags; 593 unsigned long flags;
594 int sig;
593#endif 595#endif
594 596
595 tsk = current; 597 tsk = current;
@@ -849,11 +851,12 @@ no_context:
849 bust_spinlocks(0); 851 bust_spinlocks(0);
850 do_exit(SIGKILL); 852 do_exit(SIGKILL);
851#else 853#else
854 sig = SIGKILL;
852 if (__die("Oops", regs, error_code)) 855 if (__die("Oops", regs, error_code))
853 regs = NULL; 856 sig = 0;
854 /* Executive summary in case the body of the oops scrolled away */ 857 /* Executive summary in case the body of the oops scrolled away */
855 printk(KERN_EMERG "CR2: %016lx\n", address); 858 printk(KERN_EMERG "CR2: %016lx\n", address);
856 oops_end(flags, regs, SIGKILL); 859 oops_end(flags, regs, sig);
857#endif 860#endif
858 861
859/* 862/*
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 847c164725f4..8518c678d83f 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -222,6 +222,41 @@ static void __init remap_numa_kva(void)
222 } 222 }
223} 223}
224 224
225#ifdef CONFIG_HIBERNATION
226/**
227 * resume_map_numa_kva - add KVA mapping to the temporary page tables created
228 * during resume from hibernation
229 * @pgd_base - temporary resume page directory
230 */
231void resume_map_numa_kva(pgd_t *pgd_base)
232{
233 int node;
234
235 for_each_online_node(node) {
236 unsigned long start_va, start_pfn, size, pfn;
237
238 start_va = (unsigned long)node_remap_start_vaddr[node];
239 start_pfn = node_remap_start_pfn[node];
240 size = node_remap_size[node];
241
242 printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node);
243
244 for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
245 unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
246 pgd_t *pgd = pgd_base + pgd_index(vaddr);
247 pud_t *pud = pud_offset(pgd, vaddr);
248 pmd_t *pmd = pmd_offset(pud, vaddr);
249
250 set_pmd(pmd, pfn_pmd(start_pfn + pfn,
251 PAGE_KERNEL_LARGE_EXEC));
252
253 printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
254 __FUNCTION__, vaddr, start_pfn + pfn);
255 }
256 }
257}
258#endif
259
225static unsigned long calculate_numa_remap_pages(void) 260static unsigned long calculate_numa_remap_pages(void)
226{ 261{
227 int nid; 262 int nid;
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 022cd41ea9b4..202864ad49a7 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -401,14 +401,13 @@ static int __init ppro_init(char **cpu_type)
401 *cpu_type = "i386/pii"; 401 *cpu_type = "i386/pii";
402 break; 402 break;
403 case 6 ... 8: 403 case 6 ... 8:
404 case 10 ... 11:
404 *cpu_type = "i386/piii"; 405 *cpu_type = "i386/piii";
405 break; 406 break;
406 case 9: 407 case 9:
408 case 13:
407 *cpu_type = "i386/p6_mobile"; 409 *cpu_type = "i386/p6_mobile";
408 break; 410 break;
409 case 10 ... 13:
410 *cpu_type = "i386/p6";
411 break;
412 case 14: 411 case 14:
413 *cpu_type = "i386/core"; 412 *cpu_type = "i386/core";
414 break; 413 break;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 3f1b81a83e2e..e9f80c744cf3 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -69,7 +69,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
69 int i; 69 int i;
70 70
71 if (!reset_value) { 71 if (!reset_value) {
72 reset_value = kmalloc(sizeof(unsigned) * num_counters, 72 reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
73 GFP_ATOMIC); 73 GFP_ATOMIC);
74 if (!reset_value) 74 if (!reset_value)
75 return; 75 return;
@@ -156,6 +156,8 @@ static void ppro_start(struct op_msrs const * const msrs)
156 unsigned int low, high; 156 unsigned int low, high;
157 int i; 157 int i;
158 158
159 if (!reset_value)
160 return;
159 for (i = 0; i < num_counters; ++i) { 161 for (i = 0; i < num_counters; ++i) {
160 if (reset_value[i]) { 162 if (reset_value[i]) {
161 CTRL_READ(low, high, msrs, i); 163 CTRL_READ(low, high, msrs, i);
@@ -171,6 +173,8 @@ static void ppro_stop(struct op_msrs const * const msrs)
171 unsigned int low, high; 173 unsigned int low, high;
172 int i; 174 int i;
173 175
176 if (!reset_value)
177 return;
174 for (i = 0; i < num_counters; ++i) { 178 for (i = 0; i < num_counters; ++i) {
175 if (!reset_value[i]) 179 if (!reset_value[i])
176 continue; 180 continue;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 3c27a809393b..2051dc96b8e9 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -496,21 +496,24 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
496 pci_siemens_interrupt_controller); 496 pci_siemens_interrupt_controller);
497 497
498/* 498/*
499 * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config 499 * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have
500 * have 4096 bytes. Even if the device is capable, that doesn't mean we can 500 * 4096 bytes configuration space for each function of their processor
501 * access it. Maybe we don't have a way to generate extended config space 501 * configuration space.
502 * accesses. So check it
503 */ 502 */
504static void fam10h_pci_cfg_space_size(struct pci_dev *dev) 503static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev)
505{ 504{
506 dev->cfg_size = pci_cfg_space_size_ext(dev); 505 dev->cfg_size = pci_cfg_space_size_ext(dev);
507} 506}
508 507DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size);
509DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); 508DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size);
510DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size); 509DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size);
511DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); 510DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size);
512DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); 511DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size);
513DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); 512DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size);
513DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size);
514DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size);
515DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size);
516DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size);
514 517
515/* 518/*
516 * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from 519 * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index f2b6e3f11bfc..81197c62d5b3 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -12,6 +12,7 @@
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/page.h> 13#include <asm/page.h>
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15#include <asm/mmzone.h>
15 16
16/* Defined in hibernate_asm_32.S */ 17/* Defined in hibernate_asm_32.S */
17extern int restore_image(void); 18extern int restore_image(void);
@@ -127,6 +128,9 @@ static int resume_physical_mapping_init(pgd_t *pgd_base)
127 } 128 }
128 } 129 }
129 } 130 }
131
132 resume_map_numa_kva(pgd_base);
133
130 return 0; 134 return 0;
131} 135}
132 136
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 1ef0f90813d6..d9d35824c56f 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -9,6 +9,9 @@
9 * Also alternative() doesn't work. 9 * Also alternative() doesn't work.
10 */ 10 */
11 11
12/* Disable profiling for userspace code: */
13#define DISABLE_BRANCH_PROFILING
14
12#include <linux/kernel.h> 15#include <linux/kernel.h>
13#include <linux/posix-timers.h> 16#include <linux/posix-timers.h>
14#include <linux/time.h> 17#include <linux/time.h>
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 688936044dc9..636ef4caa52d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -661,12 +661,11 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
661 * For 64-bit, we must skip the Xen hole in the middle of the address 661 * For 64-bit, we must skip the Xen hole in the middle of the address
662 * space, just after the big x86-64 virtual hole. 662 * space, just after the big x86-64 virtual hole.
663 */ 663 */
664static int xen_pgd_walk(struct mm_struct *mm, 664static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
665 int (*func)(struct mm_struct *mm, struct page *, 665 int (*func)(struct mm_struct *mm, struct page *,
666 enum pt_level), 666 enum pt_level),
667 unsigned long limit) 667 unsigned long limit)
668{ 668{
669 pgd_t *pgd = mm->pgd;
670 int flush = 0; 669 int flush = 0;
671 unsigned hole_low, hole_high; 670 unsigned hole_low, hole_high;
672 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; 671 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
@@ -753,6 +752,14 @@ out:
753 return flush; 752 return flush;
754} 753}
755 754
755static int xen_pgd_walk(struct mm_struct *mm,
756 int (*func)(struct mm_struct *mm, struct page *,
757 enum pt_level),
758 unsigned long limit)
759{
760 return __xen_pgd_walk(mm, mm->pgd, func, limit);
761}
762
756/* If we're using split pte locks, then take the page's lock and 763/* If we're using split pte locks, then take the page's lock and
757 return a pointer to it. Otherwise return NULL. */ 764 return a pointer to it. Otherwise return NULL. */
758static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) 765static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
@@ -854,7 +861,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
854 861
855 xen_mc_batch(); 862 xen_mc_batch();
856 863
857 if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) { 864 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
858 /* re-enable interrupts for flushing */ 865 /* re-enable interrupts for flushing */
859 xen_mc_issue(0); 866 xen_mc_issue(0);
860 867
@@ -998,7 +1005,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
998 PT_PMD); 1005 PT_PMD);
999#endif 1006#endif
1000 1007
1001 xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT); 1008 __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
1002 1009
1003 xen_mc_issue(0); 1010 xen_mc_issue(0);
1004} 1011}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d77da613b1d2..acd9b6705e02 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -362,7 +362,7 @@ static void xen_cpu_die(unsigned int cpu)
362 alternatives_smp_switch(0); 362 alternatives_smp_switch(0);
363} 363}
364 364
365static void xen_play_dead(void) 365static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */
366{ 366{
367 play_dead_common(); 367 play_dead_common();
368 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); 368 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d7422dc2a55c..9e1afae8461f 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -49,7 +49,7 @@ bool xen_vcpu_stolen(int vcpu);
49 49
50void xen_mark_init_mm_pinned(void); 50void xen_mark_init_mm_pinned(void);
51 51
52void __init xen_setup_vcpu_info_placement(void); 52void xen_setup_vcpu_info_placement(void);
53 53
54#ifdef CONFIG_SMP 54#ifdef CONFIG_SMP
55void xen_smp_init(void); 55void xen_smp_init(void);