aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-18 16:50:34 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-18 16:50:34 -0400
commita208f37a465e222218974ab20a31b42b7b4893b2 (patch)
tree77c6acdd4be32024330a14f2618b814126ce7a20 /arch/x86
parent511d9d34183662aada3890883e860b151d707e22 (diff)
parent5b664cb235e97afbf34db9c4d77f08ebd725335e (diff)
Merge branch 'linus' into x86/x2apic
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig25
-rw-r--r--arch/x86/Kconfig.debug27
-rw-r--r--arch/x86/kernel/Makefile7
-rw-r--r--arch/x86/kernel/acpi/boot.c1
-rw-r--r--arch/x86/kernel/acpi/processor.c6
-rw-r--r--arch/x86/kernel/acpi/sleep.c32
-rw-r--r--arch/x86/kernel/alternative.c22
-rw-r--r--arch/x86/kernel/apic_32.c4
-rw-r--r--arch/x86/kernel/apm_32.c12
-rw-r--r--arch/x86/kernel/cpu/amd_64.c1
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c10
-rw-r--r--arch/x86/kernel/cpu/common_64.c14
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/intel_64.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/cpuid.c27
-rw-r--r--arch/x86/kernel/e820.c19
-rw-r--r--arch/x86/kernel/early-quirks.c26
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/entry_32.S72
-rw-r--r--arch/x86/kernel/entry_64.S109
-rw-r--r--arch/x86/kernel/ftrace.c141
-rw-r--r--arch/x86/kernel/hpet.c20
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c9
-rw-r--r--arch/x86/kernel/io_apic_32.c12
-rw-r--r--arch/x86/kernel/io_apic_64.c12
-rw-r--r--arch/x86/kernel/irqinit_64.c4
-rw-r--r--arch/x86/kernel/ldt.c2
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/microcode.c6
-rw-r--r--arch/x86/kernel/msr.c16
-rw-r--r--arch/x86/kernel/nmi.c6
-rw-r--r--arch/x86/kernel/numaq_32.c7
-rw-r--r--arch/x86/kernel/pci-gart_64.c8
-rw-r--r--arch/x86/kernel/process.c30
-rw-r--r--arch/x86/kernel/process_32.c3
-rw-r--r--arch/x86/kernel/process_64.c3
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/setup.c13
-rw-r--r--arch/x86/kernel/setup_percpu.c8
-rw-r--r--arch/x86/kernel/smp.c158
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kernel/smpcommon.c56
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/kernel/tlb_32.c2
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/traps_64.c25
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S8
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S10
-rw-r--r--arch/x86/kernel/vsyscall_64.c7
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c11
-rw-r--r--arch/x86/kvm/vmx.c4
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/msr-on-cpu.c8
-rw-r--r--arch/x86/lib/thunk_32.S47
-rw-r--r--arch/x86/lib/thunk_64.S19
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c98
-rw-r--r--arch/x86/mm/Makefile5
-rw-r--r--arch/x86/mm/fault.c13
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c26
-rw-r--r--arch/x86/mm/ioremap.c11
-rw-r--r--arch/x86/mm/kmmio.c510
-rw-r--r--arch/x86/mm/mmio-mod.c515
-rw-r--r--arch/x86/mm/pageattr.c9
-rw-r--r--arch/x86/mm/pat.c4
-rw-r--r--arch/x86/mm/pf_in.c489
-rw-r--r--arch/x86/mm/pf_in.h39
-rw-r--r--arch/x86/mm/srat_32.c3
-rw-r--r--arch/x86/mm/testmmiotrace.c71
-rw-r--r--arch/x86/oprofile/nmi_int.c10
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/common.c30
-rw-r--r--arch/x86/pci/early.c60
-rw-r--r--arch/x86/pci/i386.c4
-rw-r--r--arch/x86/pci/irq.c120
-rw-r--r--arch/x86/pci/pci.h1
-rw-r--r--arch/x86/vdso/vclock_gettime.c15
-rw-r--r--arch/x86/vdso/vgetcpu.c3
-rw-r--r--arch/x86/xen/enlighten.c4
-rw-r--r--arch/x86/xen/mmu.c2
-rw-r--r--arch/x86/xen/smp.c135
-rw-r--r--arch/x86/xen/xen-ops.h9
89 files changed, 2669 insertions, 651 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0bf8391a7f2d..baca55455005 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,6 +23,8 @@ config X86
23 select HAVE_OPROFILE 23 select HAVE_OPROFILE
24 select HAVE_KPROBES 24 select HAVE_KPROBES
25 select HAVE_KRETPROBES 25 select HAVE_KRETPROBES
26 select HAVE_DYNAMIC_FTRACE
27 select HAVE_FTRACE
26 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 28 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
27 select HAVE_ARCH_KGDB if !X86_VOYAGER 29 select HAVE_ARCH_KGDB if !X86_VOYAGER
28 30
@@ -168,6 +170,7 @@ config GENERIC_PENDING_IRQ
168config X86_SMP 170config X86_SMP
169 bool 171 bool
170 depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64) 172 depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
173 select USE_GENERIC_SMP_HELPERS
171 default y 174 default y
172 175
173config X86_32_SMP 176config X86_32_SMP
@@ -445,7 +448,6 @@ config PARAVIRT_DEBUG
445config MEMTEST 448config MEMTEST
446 bool "Memtest" 449 bool "Memtest"
447 depends on X86_64 450 depends on X86_64
448 default y
449 help 451 help
450 This option adds a kernel parameter 'memtest', which allows memtest 452 This option adds a kernel parameter 'memtest', which allows memtest
451 to be set. 453 to be set.
@@ -453,7 +455,7 @@ config MEMTEST
453 memtest=1, mean do 1 test pattern; 455 memtest=1, mean do 1 test pattern;
454 ... 456 ...
455 memtest=4, mean do 4 test patterns. 457 memtest=4, mean do 4 test patterns.
456 If you are unsure how to answer this question, answer Y. 458 If you are unsure how to answer this question, answer N.
457 459
458config X86_SUMMIT_NUMA 460config X86_SUMMIT_NUMA
459 def_bool y 461 def_bool y
@@ -1133,21 +1135,18 @@ config MTRR
1133 See <file:Documentation/mtrr.txt> for more information. 1135 See <file:Documentation/mtrr.txt> for more information.
1134 1136
1135config MTRR_SANITIZER 1137config MTRR_SANITIZER
1136 def_bool y 1138 bool
1137 prompt "MTRR cleanup support" 1139 prompt "MTRR cleanup support"
1138 depends on MTRR 1140 depends on MTRR
1139 help 1141 help
1140 Convert MTRR layout from continuous to discrete, so some X driver 1142 Convert MTRR layout from continuous to discrete, so X drivers can
1141 could add WB entries. 1143 add writeback entries.
1142 1144
1143 Say N here if you see bootup problems (boot crash, boot hang, 1145 Can be disabled with disable_mtrr_cleanup on the kernel command line.
1144 spontaneous reboots). 1146 The largest mtrr entry size for a continous block can be set with
1147 mtrr_chunk_size.
1145 1148
1146 Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size 1149 If unsure, say N.
1147 could be used to send largest mtrr entry size for continuous block
1148 to hold holes (aka. UC entries)
1149
1150 If unsure, say Y.
1151 1150
1152config MTRR_SANITIZER_ENABLE_DEFAULT 1151config MTRR_SANITIZER_ENABLE_DEFAULT
1153 int "MTRR cleanup enable value (0-1)" 1152 int "MTRR cleanup enable value (0-1)"
@@ -1164,7 +1163,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
1164 depends on MTRR_SANITIZER 1163 depends on MTRR_SANITIZER
1165 help 1164 help
1166 mtrr cleanup spare entries default, it can be changed via 1165 mtrr cleanup spare entries default, it can be changed via
1167 mtrr_spare_reg_nr= 1166 mtrr_spare_reg_nr=N on the kernel command line.
1168 1167
1169config X86_PAT 1168config X86_PAT
1170 bool 1169 bool
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index acc0271920f2..ae36bfa814e5 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -171,6 +171,33 @@ config IOMMU_LEAK
171 Add a simple leak tracer to the IOMMU code. This is useful when you 171 Add a simple leak tracer to the IOMMU code. This is useful when you
172 are debugging a buggy device driver that leaks IOMMU mappings. 172 are debugging a buggy device driver that leaks IOMMU mappings.
173 173
174config MMIOTRACE_HOOKS
175 bool
176
177config MMIOTRACE
178 bool "Memory mapped IO tracing"
179 depends on DEBUG_KERNEL && PCI
180 select TRACING
181 select MMIOTRACE_HOOKS
182 help
183 Mmiotrace traces Memory Mapped I/O access and is meant for
184 debugging and reverse engineering. It is called from the ioremap
185 implementation and works via page faults. Tracing is disabled by
186 default and can be enabled at run-time.
187
188 See Documentation/tracers/mmiotrace.txt.
189 If you are not helping to develop drivers, say N.
190
191config MMIOTRACE_TEST
192 tristate "Test module for mmiotrace"
193 depends on MMIOTRACE && m
194 help
195 This is a dumb module for testing mmiotrace. It is very dangerous
196 as it will write garbage to IO memory starting at a given address.
197 However, it should be safe to use on e.g. unused portion of VRAM.
198
199 Say N, unless you absolutely know what you are doing.
200
174# 201#
175# IO delay types: 202# IO delay types:
176# 203#
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 81280e93e792..673f1d12b420 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,6 +6,12 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu
6 6
7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) 7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 8
9ifdef CONFIG_FTRACE
10# Do not profile debug utilities
11CFLAGS_REMOVE_tsc.o = -pg
12CFLAGS_REMOVE_rtc.o = -pg
13endif
14
9# 15#
10# vsyscalls (which work on the user stack) should have 16# vsyscalls (which work on the user stack) should have
11# no stack-protector checks: 17# no stack-protector checks:
@@ -57,6 +63,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
57obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o 63obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o
58obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o 64obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
59obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 65obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
66obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
60obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 67obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
61obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 68obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
62obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 69obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b314bcd08406..b41b27af33e6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1411,7 +1411,6 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
1411{ 1411{
1412 pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident); 1412 pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident);
1413 acpi_skip_timer_override = 1; 1413 acpi_skip_timer_override = 1;
1414 force_mask_ioapic_irq_2();
1415 return 0; 1414 return 0;
1416} 1415}
1417 1416
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index de2d2e4ebad9..7c074eec39fb 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -56,6 +56,12 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
56 if (cpu_has(c, X86_FEATURE_ACPI)) 56 if (cpu_has(c, X86_FEATURE_ACPI))
57 buf[2] |= ACPI_PDC_T_FFH; 57 buf[2] |= ACPI_PDC_T_FFH;
58 58
59 /*
60 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
61 */
62 if (!cpu_has(c, X86_FEATURE_MWAIT))
63 buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
64
59 obj->type = ACPI_TYPE_BUFFER; 65 obj->type = ACPI_TYPE_BUFFER;
60 obj->buffer.length = 12; 66 obj->buffer.length = 12;
61 obj->buffer.pointer = (u8 *) buf; 67 obj->buffer.pointer = (u8 *) buf;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index e6a4b564ccaa..868de3d5c39d 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -23,6 +23,15 @@ static unsigned long acpi_realmode;
23static char temp_stack[10240]; 23static char temp_stack[10240];
24#endif 24#endif
25 25
26/* XXX: this macro should move to asm-x86/segment.h and be shared with the
27 boot code... */
28#define GDT_ENTRY(flags, base, limit) \
29 (((u64)(base & 0xff000000) << 32) | \
30 ((u64)flags << 40) | \
31 ((u64)(limit & 0x00ff0000) << 32) | \
32 ((u64)(base & 0x00ffffff) << 16) | \
33 ((u64)(limit & 0x0000ffff)))
34
26/** 35/**
27 * acpi_save_state_mem - save kernel state 36 * acpi_save_state_mem - save kernel state
28 * 37 *
@@ -51,18 +60,27 @@ int acpi_save_state_mem(void)
51 header->video_mode = saved_video_mode; 60 header->video_mode = saved_video_mode;
52 61
53 header->wakeup_jmp_seg = acpi_wakeup_address >> 4; 62 header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
63
64 /*
65 * Set up the wakeup GDT. We set these up as Big Real Mode,
66 * that is, with limits set to 4 GB. At least the Lenovo
67 * Thinkpad X61 is known to need this for the video BIOS
68 * initialization quirk to work; this is likely to also
69 * be the case for other laptops or integrated video devices.
70 */
71
54 /* GDT[0]: GDT self-pointer */ 72 /* GDT[0]: GDT self-pointer */
55 header->wakeup_gdt[0] = 73 header->wakeup_gdt[0] =
56 (u64)(sizeof(header->wakeup_gdt) - 1) + 74 (u64)(sizeof(header->wakeup_gdt) - 1) +
57 ((u64)(acpi_wakeup_address + 75 ((u64)(acpi_wakeup_address +
58 ((char *)&header->wakeup_gdt - (char *)acpi_realmode)) 76 ((char *)&header->wakeup_gdt - (char *)acpi_realmode))
59 << 16); 77 << 16);
60 /* GDT[1]: real-mode-like code segment */ 78 /* GDT[1]: big real mode-like code segment */
61 header->wakeup_gdt[1] = (0x009bULL << 40) + 79 header->wakeup_gdt[1] =
62 ((u64)acpi_wakeup_address << 16) + 0xffff; 80 GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
63 /* GDT[2]: real-mode-like data segment */ 81 /* GDT[2]: big real mode-like data segment */
64 header->wakeup_gdt[2] = (0x0093ULL << 40) + 82 header->wakeup_gdt[2] =
65 ((u64)acpi_wakeup_address << 16) + 0xffff; 83 GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);
66 84
67#ifndef CONFIG_64BIT 85#ifndef CONFIG_64BIT
68 store_gdt((struct desc_ptr *)&header->pmode_gdt); 86 store_gdt((struct desc_ptr *)&header->pmode_gdt);
@@ -140,6 +158,8 @@ static int __init acpi_sleep_setup(char *str)
140 acpi_realmode_flags |= 2; 158 acpi_realmode_flags |= 2;
141 if (strncmp(str, "s3_beep", 7) == 0) 159 if (strncmp(str, "s3_beep", 7) == 0)
142 acpi_realmode_flags |= 4; 160 acpi_realmode_flags |= 4;
161 if (strncmp(str, "old_ordering", 12) == 0)
162 acpi_old_suspend_ordering();
143 str = strchr(str, ','); 163 str = strchr(str, ',');
144 if (str != NULL) 164 if (str != NULL)
145 str += strspn(str, ", \t"); 165 str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65c7857a90dd..2763cb37b553 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,6 +1,6 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/spinlock.h> 3#include <linux/mutex.h>
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/kprobes.h> 5#include <linux/kprobes.h>
6#include <linux/mm.h> 6#include <linux/mm.h>
@@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
143#ifdef CONFIG_X86_64 143#ifdef CONFIG_X86_64
144 144
145extern char __vsyscall_0; 145extern char __vsyscall_0;
146static inline const unsigned char*const * find_nop_table(void) 146const unsigned char *const *find_nop_table(void)
147{ 147{
148 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || 148 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
149 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; 149 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
@@ -162,7 +162,7 @@ static const struct nop {
162 { -1, NULL } 162 { -1, NULL }
163}; 163};
164 164
165static const unsigned char*const * find_nop_table(void) 165const unsigned char *const *find_nop_table(void)
166{ 166{
167 const unsigned char *const *noptable = intel_nops; 167 const unsigned char *const *noptable = intel_nops;
168 int i; 168 int i;
@@ -279,7 +279,7 @@ struct smp_alt_module {
279 struct list_head next; 279 struct list_head next;
280}; 280};
281static LIST_HEAD(smp_alt_modules); 281static LIST_HEAD(smp_alt_modules);
282static DEFINE_SPINLOCK(smp_alt); 282static DEFINE_MUTEX(smp_alt);
283static int smp_mode = 1; /* protected by smp_alt */ 283static int smp_mode = 1; /* protected by smp_alt */
284 284
285void alternatives_smp_module_add(struct module *mod, char *name, 285void alternatives_smp_module_add(struct module *mod, char *name,
@@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name,
312 __func__, smp->locks, smp->locks_end, 312 __func__, smp->locks, smp->locks_end,
313 smp->text, smp->text_end, smp->name); 313 smp->text, smp->text_end, smp->name);
314 314
315 spin_lock(&smp_alt); 315 mutex_lock(&smp_alt);
316 list_add_tail(&smp->next, &smp_alt_modules); 316 list_add_tail(&smp->next, &smp_alt_modules);
317 if (boot_cpu_has(X86_FEATURE_UP)) 317 if (boot_cpu_has(X86_FEATURE_UP))
318 alternatives_smp_unlock(smp->locks, smp->locks_end, 318 alternatives_smp_unlock(smp->locks, smp->locks_end,
319 smp->text, smp->text_end); 319 smp->text, smp->text_end);
320 spin_unlock(&smp_alt); 320 mutex_unlock(&smp_alt);
321} 321}
322 322
323void alternatives_smp_module_del(struct module *mod) 323void alternatives_smp_module_del(struct module *mod)
@@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod)
327 if (smp_alt_once || noreplace_smp) 327 if (smp_alt_once || noreplace_smp)
328 return; 328 return;
329 329
330 spin_lock(&smp_alt); 330 mutex_lock(&smp_alt);
331 list_for_each_entry(item, &smp_alt_modules, next) { 331 list_for_each_entry(item, &smp_alt_modules, next) {
332 if (mod != item->mod) 332 if (mod != item->mod)
333 continue; 333 continue;
334 list_del(&item->next); 334 list_del(&item->next);
335 spin_unlock(&smp_alt); 335 mutex_unlock(&smp_alt);
336 DPRINTK("%s: %s\n", __func__, item->name); 336 DPRINTK("%s: %s\n", __func__, item->name);
337 kfree(item); 337 kfree(item);
338 return; 338 return;
339 } 339 }
340 spin_unlock(&smp_alt); 340 mutex_unlock(&smp_alt);
341} 341}
342 342
343void alternatives_smp_switch(int smp) 343void alternatives_smp_switch(int smp)
@@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp)
359 return; 359 return;
360 BUG_ON(!smp && (num_online_cpus() > 1)); 360 BUG_ON(!smp && (num_online_cpus() > 1));
361 361
362 spin_lock(&smp_alt); 362 mutex_lock(&smp_alt);
363 363
364 /* 364 /*
365 * Avoid unnecessary switches because it forces JIT based VMs to 365 * Avoid unnecessary switches because it forces JIT based VMs to
@@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp)
383 mod->text, mod->text_end); 383 mod->text, mod->text_end);
384 } 384 }
385 smp_mode = smp; 385 smp_mode = smp;
386 spin_unlock(&smp_alt); 386 mutex_unlock(&smp_alt);
387} 387}
388 388
389#endif 389#endif
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index cb54d9e20f94..34101962fb0e 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1374,6 +1374,10 @@ void __init smp_intr_init(void)
1374 1374
1375 /* IPI for generic function call */ 1375 /* IPI for generic function call */
1376 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 1376 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
1377
1378 /* IPI for single call function */
1379 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
1380 call_function_single_interrupt);
1377} 1381}
1378#endif 1382#endif
1379 1383
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 00e6d1370954..bf9b441331e9 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -204,6 +204,7 @@
204#include <linux/module.h> 204#include <linux/module.h>
205 205
206#include <linux/poll.h> 206#include <linux/poll.h>
207#include <linux/smp_lock.h>
207#include <linux/types.h> 208#include <linux/types.h>
208#include <linux/stddef.h> 209#include <linux/stddef.h>
209#include <linux/timer.h> 210#include <linux/timer.h>
@@ -1212,9 +1213,9 @@ static int suspend(int vetoable)
1212 if (err != APM_SUCCESS) 1213 if (err != APM_SUCCESS)
1213 apm_error("suspend", err); 1214 apm_error("suspend", err);
1214 err = (err == APM_SUCCESS) ? 0 : -EIO; 1215 err = (err == APM_SUCCESS) ? 0 : -EIO;
1215 device_power_up(); 1216 device_power_up(PMSG_RESUME);
1216 local_irq_enable(); 1217 local_irq_enable();
1217 device_resume(); 1218 device_resume(PMSG_RESUME);
1218 queue_event(APM_NORMAL_RESUME, NULL); 1219 queue_event(APM_NORMAL_RESUME, NULL);
1219 spin_lock(&user_list_lock); 1220 spin_lock(&user_list_lock);
1220 for (as = user_list; as != NULL; as = as->next) { 1221 for (as = user_list; as != NULL; as = as->next) {
@@ -1239,7 +1240,7 @@ static void standby(void)
1239 apm_error("standby", err); 1240 apm_error("standby", err);
1240 1241
1241 local_irq_disable(); 1242 local_irq_disable();
1242 device_power_up(); 1243 device_power_up(PMSG_RESUME);
1243 local_irq_enable(); 1244 local_irq_enable();
1244} 1245}
1245 1246
@@ -1325,7 +1326,7 @@ static void check_events(void)
1325 ignore_bounce = 1; 1326 ignore_bounce = 1;
1326 if ((event != APM_NORMAL_RESUME) 1327 if ((event != APM_NORMAL_RESUME)
1327 || (ignore_normal_resume == 0)) { 1328 || (ignore_normal_resume == 0)) {
1328 device_resume(); 1329 device_resume(PMSG_RESUME);
1329 queue_event(event, NULL); 1330 queue_event(event, NULL);
1330 } 1331 }
1331 ignore_normal_resume = 0; 1332 ignore_normal_resume = 0;
@@ -1549,10 +1550,12 @@ static int do_open(struct inode *inode, struct file *filp)
1549{ 1550{
1550 struct apm_user *as; 1551 struct apm_user *as;
1551 1552
1553 lock_kernel();
1552 as = kmalloc(sizeof(*as), GFP_KERNEL); 1554 as = kmalloc(sizeof(*as), GFP_KERNEL);
1553 if (as == NULL) { 1555 if (as == NULL) {
1554 printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", 1556 printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
1555 sizeof(*as)); 1557 sizeof(*as));
1558 unlock_kernel();
1556 return -ENOMEM; 1559 return -ENOMEM;
1557 } 1560 }
1558 as->magic = APM_BIOS_MAGIC; 1561 as->magic = APM_BIOS_MAGIC;
@@ -1574,6 +1577,7 @@ static int do_open(struct inode *inode, struct file *filp)
1574 user_list = as; 1577 user_list = as;
1575 spin_unlock(&user_list_lock); 1578 spin_unlock(&user_list_lock);
1576 filp->private_data = as; 1579 filp->private_data = as;
1580 unlock_kernel();
1577 return 0; 1581 return 0;
1578} 1582}
1579 1583
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index bd182b7616ee..7c36fb8a28d4 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -200,6 +200,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
200 * benefit in doing so. 200 * benefit in doing so.
201 */ 201 */
202 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { 202 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
203 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
203 if ((tseg>>PMD_SHIFT) < 204 if ((tseg>>PMD_SHIFT) <
204 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || 205 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
205 ((tseg>>PMD_SHIFT) < 206 ((tseg>>PMD_SHIFT) <
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
index 2026d2119cdb..1d181c40e2e1 100644
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -16,16 +16,6 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
16 16
17static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 17static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
18{ 18{
19 /* Cache sizes */
20 unsigned n;
21
22 n = c->extended_cpuid_level;
23 if (n >= 0x80000008) {
24 unsigned eax = cpuid_eax(0x80000008);
25 c->x86_virt_bits = (eax >> 8) & 0xff;
26 c->x86_phys_bits = eax & 0xff;
27 }
28
29 if (c->x86 == 0x6 && c->x86_model >= 0xf) { 19 if (c->x86 == 0x6 && c->x86_model >= 0xf) {
30 c->x86_cache_alignment = c->x86_clflush_size * 2; 20 c->x86_cache_alignment = c->x86_clflush_size * 2;
31 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 21 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index e7bf3c2dc5fe..c6bee77ca9e6 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -98,7 +98,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c)
98 98
99void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) 99void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
100{ 100{
101 unsigned int n, dummy, eax, ebx, ecx, edx; 101 unsigned int n, dummy, ebx, ecx, edx;
102 102
103 n = c->extended_cpuid_level; 103 n = c->extended_cpuid_level;
104 104
@@ -121,11 +121,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
121 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", 121 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
122 c->x86_cache_size, ecx & 0xFF); 122 c->x86_cache_size, ecx & 0xFF);
123 } 123 }
124 if (n >= 0x80000008) {
125 cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
126 c->x86_virt_bits = (eax >> 8) & 0xff;
127 c->x86_phys_bits = eax & 0xff;
128 }
129} 124}
130 125
131void __cpuinit detect_ht(struct cpuinfo_x86 *c) 126void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -314,6 +309,13 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
314 if (c->extended_cpuid_level >= 0x80000007) 309 if (c->extended_cpuid_level >= 0x80000007)
315 c->x86_power = cpuid_edx(0x80000007); 310 c->x86_power = cpuid_edx(0x80000007);
316 311
312 if (c->extended_cpuid_level >= 0x80000008) {
313 u32 eax = cpuid_eax(0x80000008);
314
315 c->x86_virt_bits = (eax >> 8) & 0xff;
316 c->x86_phys_bits = eax & 0xff;
317 }
318
317 /* Assume all 64-bit CPUs support 32-bit syscall */ 319 /* Assume all 64-bit CPUs support 32-bit syscall */
318 set_cpu_cap(c, X86_FEATURE_SYSCALL32); 320 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
319 321
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fe9224c51d37..70609efdf1da 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -226,6 +226,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
226 226
227 if (cpu_has_bts) 227 if (cpu_has_bts)
228 ds_init_intel(c); 228 ds_init_intel(c);
229
230#ifdef CONFIG_X86_NUMAQ
231 numaq_tsc_disable();
232#endif
229} 233}
230 234
231static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) 235static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
index 02f773399e39..1019c58d39f0 100644
--- a/arch/x86/kernel/cpu/intel_64.c
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -54,9 +54,6 @@ static void __cpuinit srat_detect_node(void)
54 54
55static void __cpuinit init_intel(struct cpuinfo_x86 *c) 55static void __cpuinit init_intel(struct cpuinfo_x86 *c)
56{ 56{
57 /* Cache sizes */
58 unsigned n;
59
60 init_intel_cacheinfo(c); 57 init_intel_cacheinfo(c);
61 if (c->cpuid_level > 9) { 58 if (c->cpuid_level > 9) {
62 unsigned eax = cpuid_eax(10); 59 unsigned eax = cpuid_eax(10);
@@ -78,13 +75,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
78 if (cpu_has_bts) 75 if (cpu_has_bts)
79 ds_init_intel(c); 76 ds_init_intel(c);
80 77
81 n = c->extended_cpuid_level;
82 if (n >= 0x80000008) {
83 unsigned eax = cpuid_eax(0x80000008);
84 c->x86_virt_bits = (eax >> 8) & 0xff;
85 c->x86_phys_bits = eax & 0xff;
86 }
87
88 if (c->x86 == 15) 78 if (c->x86 == 15)
89 c->x86_cache_alignment = c->x86_clflush_size * 2; 79 c->x86_cache_alignment = c->x86_clflush_size * 2;
90 if (c->x86 == 6) 80 if (c->x86 == 6)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 501ca1cea27d..c4a7ec31394c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -9,6 +9,7 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/smp_lock.h>
12#include <linux/string.h> 13#include <linux/string.h>
13#include <linux/rcupdate.h> 14#include <linux/rcupdate.h>
14#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
@@ -363,7 +364,7 @@ static void mcheck_check_cpu(void *info)
363 364
364static void mcheck_timer(struct work_struct *work) 365static void mcheck_timer(struct work_struct *work)
365{ 366{
366 on_each_cpu(mcheck_check_cpu, NULL, 1, 1); 367 on_each_cpu(mcheck_check_cpu, NULL, 1);
367 368
368 /* 369 /*
369 * Alert userspace if needed. If we logged an MCE, reduce the 370 * Alert userspace if needed. If we logged an MCE, reduce the
@@ -532,10 +533,12 @@ static int open_exclu; /* already open exclusive? */
532 533
533static int mce_open(struct inode *inode, struct file *file) 534static int mce_open(struct inode *inode, struct file *file)
534{ 535{
536 lock_kernel();
535 spin_lock(&mce_state_lock); 537 spin_lock(&mce_state_lock);
536 538
537 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { 539 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
538 spin_unlock(&mce_state_lock); 540 spin_unlock(&mce_state_lock);
541 unlock_kernel();
539 return -EBUSY; 542 return -EBUSY;
540 } 543 }
541 544
@@ -544,6 +547,7 @@ static int mce_open(struct inode *inode, struct file *file)
544 open_count++; 547 open_count++;
545 548
546 spin_unlock(&mce_state_lock); 549 spin_unlock(&mce_state_lock);
550 unlock_kernel();
547 551
548 return nonseekable_open(inode, file); 552 return nonseekable_open(inode, file);
549} 553}
@@ -617,7 +621,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
617 * Collect entries that were still getting written before the 621 * Collect entries that were still getting written before the
618 * synchronize. 622 * synchronize.
619 */ 623 */
620 on_each_cpu(collect_tscs, cpu_tsc, 1, 1); 624 on_each_cpu(collect_tscs, cpu_tsc, 1);
621 for (i = next; i < MCE_LOG_LEN; i++) { 625 for (i = next; i < MCE_LOG_LEN; i++) {
622 if (mcelog.entry[i].finished && 626 if (mcelog.entry[i].finished &&
623 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { 627 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
@@ -742,7 +746,7 @@ static void mce_restart(void)
742 if (next_interval) 746 if (next_interval)
743 cancel_delayed_work(&mcheck_work); 747 cancel_delayed_work(&mcheck_work);
744 /* Timer race is harmless here */ 748 /* Timer race is harmless here */
745 on_each_cpu(mce_init, NULL, 1, 1); 749 on_each_cpu(mce_init, NULL, 1);
746 next_interval = check_interval * HZ; 750 next_interval = check_interval * HZ;
747 if (next_interval) 751 if (next_interval)
748 schedule_delayed_work(&mcheck_work, 752 schedule_delayed_work(&mcheck_work,
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 00ccb6c14ec2..cc1fccdd31e0 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -59,7 +59,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
59 59
60static void mce_work_fn(struct work_struct *work) 60static void mce_work_fn(struct work_struct *work)
61{ 61{
62 on_each_cpu(mce_checkregs, NULL, 1, 1); 62 on_each_cpu(mce_checkregs, NULL, 1);
63 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); 63 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
64} 64}
65 65
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 105afe12beb0..6f23969c8faf 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -223,7 +223,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
223 atomic_set(&data.gate,0); 223 atomic_set(&data.gate,0);
224 224
225 /* Start the ball rolling on other CPUs */ 225 /* Start the ball rolling on other CPUs */
226 if (smp_call_function(ipi_handler, &data, 1, 0) != 0) 226 if (smp_call_function(ipi_handler, &data, 0) != 0)
227 panic("mtrr: timed out waiting for other CPUs\n"); 227 panic("mtrr: timed out waiting for other CPUs\n");
228 228
229 local_irq_save(flags); 229 local_irq_save(flags);
@@ -1682,7 +1682,7 @@ void mtrr_ap_init(void)
1682 */ 1682 */
1683void mtrr_save_state(void) 1683void mtrr_save_state(void)
1684{ 1684{
1685 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); 1685 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
1686} 1686}
1687 1687
1688static int __init mtrr_init_finialize(void) 1688static int __init mtrr_init_finialize(void)
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 2e9bef6e3aa3..6d4bdc02388a 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -189,7 +189,7 @@ void disable_lapic_nmi_watchdog(void)
189 if (atomic_read(&nmi_active) <= 0) 189 if (atomic_read(&nmi_active) <= 0)
190 return; 190 return;
191 191
192 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); 192 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
193 193
194 if (wd_ops) 194 if (wd_ops)
195 wd_ops->unreserve(); 195 wd_ops->unreserve();
@@ -213,7 +213,7 @@ void enable_lapic_nmi_watchdog(void)
213 return; 213 return;
214 } 214 }
215 215
216 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); 216 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
217 touch_nmi_watchdog(); 217 touch_nmi_watchdog();
218} 218}
219 219
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index daff52a62248..2de5fa2bbf77 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -33,6 +33,7 @@
33#include <linux/init.h> 33#include <linux/init.h>
34#include <linux/poll.h> 34#include <linux/poll.h>
35#include <linux/smp.h> 35#include <linux/smp.h>
36#include <linux/smp_lock.h>
36#include <linux/major.h> 37#include <linux/major.h>
37#include <linux/fs.h> 38#include <linux/fs.h>
38#include <linux/smp_lock.h> 39#include <linux/smp_lock.h>
@@ -95,7 +96,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
95 for (; count; count -= 16) { 96 for (; count; count -= 16) {
96 cmd.eax = pos; 97 cmd.eax = pos;
97 cmd.ecx = pos >> 32; 98 cmd.ecx = pos >> 32;
98 smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); 99 smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
99 if (copy_to_user(tmp, &cmd, 16)) 100 if (copy_to_user(tmp, &cmd, 16))
100 return -EFAULT; 101 return -EFAULT;
101 tmp += 16; 102 tmp += 16;
@@ -107,15 +108,23 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
107 108
108static int cpuid_open(struct inode *inode, struct file *file) 109static int cpuid_open(struct inode *inode, struct file *file)
109{ 110{
110 unsigned int cpu = iminor(file->f_path.dentry->d_inode); 111 unsigned int cpu;
111 struct cpuinfo_x86 *c = &cpu_data(cpu); 112 struct cpuinfo_x86 *c;
112 113 int ret = 0;
113 if (cpu >= NR_CPUS || !cpu_online(cpu)) 114
114 return -ENXIO; /* No such CPU */ 115 lock_kernel();
116
117 cpu = iminor(file->f_path.dentry->d_inode);
118 if (cpu >= NR_CPUS || !cpu_online(cpu)) {
119 ret = -ENXIO; /* No such CPU */
120 goto out;
121 }
122 c = &cpu_data(cpu);
115 if (c->cpuid_level < 0) 123 if (c->cpuid_level < 0)
116 return -EIO; /* CPUID not supported */ 124 ret = -EIO; /* CPUID not supported */
117 125out:
118 return 0; 126 unlock_kernel();
127 return ret;
119} 128}
120 129
121/* 130/*
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a5383ae2cbe3..28c29180b380 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1049,11 +1049,6 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
1049#endif 1049#endif
1050 1050
1051/* 1051/*
1052 * Last pfn which the user wants to use.
1053 */
1054unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
1055
1056/*
1057 * Find the highest page frame number we have available 1052 * Find the highest page frame number we have available
1058 */ 1053 */
1059static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) 1054static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
@@ -1085,8 +1080,6 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
1085 1080
1086 if (last_pfn > max_arch_pfn) 1081 if (last_pfn > max_arch_pfn)
1087 last_pfn = max_arch_pfn; 1082 last_pfn = max_arch_pfn;
1088 if (last_pfn > end_user_pfn)
1089 last_pfn = end_user_pfn;
1090 1083
1091 printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", 1084 printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
1092 last_pfn, max_arch_pfn); 1085 last_pfn, max_arch_pfn);
@@ -1131,12 +1124,6 @@ int __init e820_find_active_region(const struct e820entry *ei,
1131 if (*ei_endpfn > last_pfn) 1124 if (*ei_endpfn > last_pfn)
1132 *ei_endpfn = last_pfn; 1125 *ei_endpfn = last_pfn;
1133 1126
1134 /* Obey end_user_pfn to save on memmap */
1135 if (*ei_startpfn >= end_user_pfn)
1136 return 0;
1137 if (*ei_endpfn > end_user_pfn)
1138 *ei_endpfn = end_user_pfn;
1139
1140 return 1; 1127 return 1;
1141} 1128}
1142 1129
@@ -1201,7 +1188,6 @@ static int __init parse_memopt(char *p)
1201 1188
1202 userdef = 1; 1189 userdef = 1;
1203 mem_size = memparse(p, &p); 1190 mem_size = memparse(p, &p);
1204 end_user_pfn = mem_size>>PAGE_SHIFT;
1205 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); 1191 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
1206 1192
1207 return 0; 1193 return 0;
@@ -1245,10 +1231,9 @@ static int __init parse_memmap_opt(char *p)
1245 } else if (*p == '$') { 1231 } else if (*p == '$') {
1246 start_at = memparse(p+1, &p); 1232 start_at = memparse(p+1, &p);
1247 e820_add_region(start_at, mem_size, E820_RESERVED); 1233 e820_add_region(start_at, mem_size, E820_RESERVED);
1248 } else { 1234 } else
1249 end_user_pfn = (mem_size >> PAGE_SHIFT);
1250 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); 1235 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
1251 } 1236
1252 return *p == '\0' ? 0 : -EINVAL; 1237 return *p == '\0' ? 0 : -EINVAL;
1253} 1238}
1254early_param("memmap", parse_memmap_opt); 1239early_param("memmap", parse_memmap_opt);
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index a4665f37cfc5..a0e11c0cc872 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -120,7 +120,18 @@ static struct chipset early_qrk[] __initdata = {
120 {} 120 {}
121}; 121};
122 122
123static void __init check_dev_quirk(int num, int slot, int func) 123/**
124 * check_dev_quirk - apply early quirks to a given PCI device
125 * @num: bus number
126 * @slot: slot number
127 * @func: PCI function
128 *
129 * Check the vendor & device ID against the early quirks table.
130 *
131 * If the device is single function, let early_quirks() know so we don't
132 * poke at this device again.
133 */
134static int __init check_dev_quirk(int num, int slot, int func)
124{ 135{
125 u16 class; 136 u16 class;
126 u16 vendor; 137 u16 vendor;
@@ -131,7 +142,7 @@ static void __init check_dev_quirk(int num, int slot, int func)
131 class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE); 142 class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
132 143
133 if (class == 0xffff) 144 if (class == 0xffff)
134 return; 145 return -1; /* no class, treat as single function */
135 146
136 vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID); 147 vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID);
137 148
@@ -154,7 +165,9 @@ static void __init check_dev_quirk(int num, int slot, int func)
154 type = read_pci_config_byte(num, slot, func, 165 type = read_pci_config_byte(num, slot, func,
155 PCI_HEADER_TYPE); 166 PCI_HEADER_TYPE);
156 if (!(type & 0x80)) 167 if (!(type & 0x80))
157 return; 168 return -1;
169
170 return 0;
158} 171}
159 172
160void __init early_quirks(void) 173void __init early_quirks(void)
@@ -167,6 +180,9 @@ void __init early_quirks(void)
167 /* Poor man's PCI discovery */ 180 /* Poor man's PCI discovery */
168 for (num = 0; num < 32; num++) 181 for (num = 0; num < 32; num++)
169 for (slot = 0; slot < 32; slot++) 182 for (slot = 0; slot < 32; slot++)
170 for (func = 0; func < 8; func++) 183 for (func = 0; func < 8; func++) {
171 check_dev_quirk(num, slot, func); 184 /* Only probe function 0 on single fn devices */
185 if (check_dev_quirk(num, slot, func))
186 break;
187 }
172} 188}
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 643fd861b724..ff9e7350da54 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -196,7 +196,7 @@ static struct console simnow_console = {
196static struct console *early_console = &early_vga_console; 196static struct console *early_console = &early_vga_console;
197static int early_console_initialized; 197static int early_console_initialized;
198 198
199void early_printk(const char *fmt, ...) 199asmlinkage void early_printk(const char *fmt, ...)
200{ 200{
201 char buf[512]; 201 char buf[512];
202 int n; 202 int n;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index cfe28a715434..6bc07f0f1202 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -51,6 +51,7 @@
51#include <asm/percpu.h> 51#include <asm/percpu.h>
52#include <asm/dwarf2.h> 52#include <asm/dwarf2.h>
53#include <asm/processor-flags.h> 53#include <asm/processor-flags.h>
54#include <asm/ftrace.h>
54#include <asm/irq_vectors.h> 55#include <asm/irq_vectors.h>
55 56
56/* 57/*
@@ -1111,6 +1112,77 @@ ENDPROC(xen_failsafe_callback)
1111 1112
1112#endif /* CONFIG_XEN */ 1113#endif /* CONFIG_XEN */
1113 1114
1115#ifdef CONFIG_FTRACE
1116#ifdef CONFIG_DYNAMIC_FTRACE
1117
1118ENTRY(mcount)
1119 pushl %eax
1120 pushl %ecx
1121 pushl %edx
1122 movl 0xc(%esp), %eax
1123 subl $MCOUNT_INSN_SIZE, %eax
1124
1125.globl mcount_call
1126mcount_call:
1127 call ftrace_stub
1128
1129 popl %edx
1130 popl %ecx
1131 popl %eax
1132
1133 ret
1134END(mcount)
1135
1136ENTRY(ftrace_caller)
1137 pushl %eax
1138 pushl %ecx
1139 pushl %edx
1140 movl 0xc(%esp), %eax
1141 movl 0x4(%ebp), %edx
1142 subl $MCOUNT_INSN_SIZE, %eax
1143
1144.globl ftrace_call
1145ftrace_call:
1146 call ftrace_stub
1147
1148 popl %edx
1149 popl %ecx
1150 popl %eax
1151
1152.globl ftrace_stub
1153ftrace_stub:
1154 ret
1155END(ftrace_caller)
1156
1157#else /* ! CONFIG_DYNAMIC_FTRACE */
1158
1159ENTRY(mcount)
1160 cmpl $ftrace_stub, ftrace_trace_function
1161 jnz trace
1162.globl ftrace_stub
1163ftrace_stub:
1164 ret
1165
1166 /* taken from glibc */
1167trace:
1168 pushl %eax
1169 pushl %ecx
1170 pushl %edx
1171 movl 0xc(%esp), %eax
1172 movl 0x4(%ebp), %edx
1173 subl $MCOUNT_INSN_SIZE, %eax
1174
1175 call *ftrace_trace_function
1176
1177 popl %edx
1178 popl %ecx
1179 popl %eax
1180
1181 jmp ftrace_stub
1182END(mcount)
1183#endif /* CONFIG_DYNAMIC_FTRACE */
1184#endif /* CONFIG_FTRACE */
1185
1114.section .rodata,"a" 1186.section .rodata,"a"
1115#include "syscall_table_32.S" 1187#include "syscall_table_32.S"
1116 1188
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bb4e22f4892f..ae63e584c340 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -51,9 +51,115 @@
51#include <asm/page.h> 51#include <asm/page.h>
52#include <asm/irqflags.h> 52#include <asm/irqflags.h>
53#include <asm/paravirt.h> 53#include <asm/paravirt.h>
54#include <asm/ftrace.h>
54 55
55 .code64 56 .code64
56 57
58#ifdef CONFIG_FTRACE
59#ifdef CONFIG_DYNAMIC_FTRACE
60ENTRY(mcount)
61
62 subq $0x38, %rsp
63 movq %rax, (%rsp)
64 movq %rcx, 8(%rsp)
65 movq %rdx, 16(%rsp)
66 movq %rsi, 24(%rsp)
67 movq %rdi, 32(%rsp)
68 movq %r8, 40(%rsp)
69 movq %r9, 48(%rsp)
70
71 movq 0x38(%rsp), %rdi
72 subq $MCOUNT_INSN_SIZE, %rdi
73
74.globl mcount_call
75mcount_call:
76 call ftrace_stub
77
78 movq 48(%rsp), %r9
79 movq 40(%rsp), %r8
80 movq 32(%rsp), %rdi
81 movq 24(%rsp), %rsi
82 movq 16(%rsp), %rdx
83 movq 8(%rsp), %rcx
84 movq (%rsp), %rax
85 addq $0x38, %rsp
86
87 retq
88END(mcount)
89
90ENTRY(ftrace_caller)
91
92 /* taken from glibc */
93 subq $0x38, %rsp
94 movq %rax, (%rsp)
95 movq %rcx, 8(%rsp)
96 movq %rdx, 16(%rsp)
97 movq %rsi, 24(%rsp)
98 movq %rdi, 32(%rsp)
99 movq %r8, 40(%rsp)
100 movq %r9, 48(%rsp)
101
102 movq 0x38(%rsp), %rdi
103 movq 8(%rbp), %rsi
104 subq $MCOUNT_INSN_SIZE, %rdi
105
106.globl ftrace_call
107ftrace_call:
108 call ftrace_stub
109
110 movq 48(%rsp), %r9
111 movq 40(%rsp), %r8
112 movq 32(%rsp), %rdi
113 movq 24(%rsp), %rsi
114 movq 16(%rsp), %rdx
115 movq 8(%rsp), %rcx
116 movq (%rsp), %rax
117 addq $0x38, %rsp
118
119.globl ftrace_stub
120ftrace_stub:
121 retq
122END(ftrace_caller)
123
124#else /* ! CONFIG_DYNAMIC_FTRACE */
125ENTRY(mcount)
126 cmpq $ftrace_stub, ftrace_trace_function
127 jnz trace
128.globl ftrace_stub
129ftrace_stub:
130 retq
131
132trace:
133 /* taken from glibc */
134 subq $0x38, %rsp
135 movq %rax, (%rsp)
136 movq %rcx, 8(%rsp)
137 movq %rdx, 16(%rsp)
138 movq %rsi, 24(%rsp)
139 movq %rdi, 32(%rsp)
140 movq %r8, 40(%rsp)
141 movq %r9, 48(%rsp)
142
143 movq 0x38(%rsp), %rdi
144 movq 8(%rbp), %rsi
145 subq $MCOUNT_INSN_SIZE, %rdi
146
147 call *ftrace_trace_function
148
149 movq 48(%rsp), %r9
150 movq 40(%rsp), %r8
151 movq 32(%rsp), %rdi
152 movq 24(%rsp), %rsi
153 movq 16(%rsp), %rdx
154 movq 8(%rsp), %rcx
155 movq (%rsp), %rax
156 addq $0x38, %rsp
157
158 jmp ftrace_stub
159END(mcount)
160#endif /* CONFIG_DYNAMIC_FTRACE */
161#endif /* CONFIG_FTRACE */
162
57#ifndef CONFIG_PREEMPT 163#ifndef CONFIG_PREEMPT
58#define retint_kernel retint_restore_args 164#define retint_kernel retint_restore_args
59#endif 165#endif
@@ -710,6 +816,9 @@ END(invalidate_interrupt\num)
710ENTRY(call_function_interrupt) 816ENTRY(call_function_interrupt)
711 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt 817 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
712END(call_function_interrupt) 818END(call_function_interrupt)
819ENTRY(call_function_single_interrupt)
820 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
821END(call_function_single_interrupt)
713ENTRY(irq_move_cleanup_interrupt) 822ENTRY(irq_move_cleanup_interrupt)
714 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt 823 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
715END(irq_move_cleanup_interrupt) 824END(irq_move_cleanup_interrupt)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
new file mode 100644
index 000000000000..ab115cd15fdf
--- /dev/null
+++ b/arch/x86/kernel/ftrace.c
@@ -0,0 +1,141 @@
1/*
2 * Code for replacing ftrace calls with jumps.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 *
6 * Thanks goes to Ingo Molnar, for suggesting the idea.
7 * Mathieu Desnoyers, for suggesting postponing the modifications.
8 * Arjan van de Ven, for keeping me straight, and explaining to me
9 * the dangers of modifying code on the run.
10 */
11
12#include <linux/spinlock.h>
13#include <linux/hardirq.h>
14#include <linux/ftrace.h>
15#include <linux/percpu.h>
16#include <linux/init.h>
17#include <linux/list.h>
18
19#include <asm/alternative.h>
20#include <asm/ftrace.h>
21
22
23/* Long is fine, even if it is only 4 bytes ;-) */
24static long *ftrace_nop;
25
26union ftrace_code_union {
27 char code[MCOUNT_INSN_SIZE];
28 struct {
29 char e8;
30 int offset;
31 } __attribute__((packed));
32};
33
34
35static int notrace ftrace_calc_offset(long ip, long addr)
36{
37 return (int)(addr - ip);
38}
39
40notrace unsigned char *ftrace_nop_replace(void)
41{
42 return (char *)ftrace_nop;
43}
44
45notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
46{
47 static union ftrace_code_union calc;
48
49 calc.e8 = 0xe8;
50 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
51
52 /*
53 * No locking needed, this must be called via kstop_machine
54 * which in essence is like running on a uniprocessor machine.
55 */
56 return calc.code;
57}
58
59notrace int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code)
62{
63 unsigned replaced;
64 unsigned old = *(unsigned *)old_code; /* 4 bytes */
65 unsigned new = *(unsigned *)new_code; /* 4 bytes */
66 unsigned char newch = new_code[4];
67 int faulted = 0;
68
69 /*
70 * Note: Due to modules and __init, code can
71 * disappear and change, we need to protect against faulting
72 * as well as code changing.
73 *
74 * No real locking needed, this code is run through
75 * kstop_machine.
76 */
77 asm volatile (
78 "1: lock\n"
79 " cmpxchg %3, (%2)\n"
80 " jnz 2f\n"
81 " movb %b4, 4(%2)\n"
82 "2:\n"
83 ".section .fixup, \"ax\"\n"
84 "3: movl $1, %0\n"
85 " jmp 2b\n"
86 ".previous\n"
87 _ASM_EXTABLE(1b, 3b)
88 : "=r"(faulted), "=a"(replaced)
89 : "r"(ip), "r"(new), "c"(newch),
90 "0"(faulted), "a"(old)
91 : "memory");
92 sync_core();
93
94 if (replaced != old && replaced != new)
95 faulted = 2;
96
97 return faulted;
98}
99
100notrace int ftrace_update_ftrace_func(ftrace_func_t func)
101{
102 unsigned long ip = (unsigned long)(&ftrace_call);
103 unsigned char old[MCOUNT_INSN_SIZE], *new;
104 int ret;
105
106 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
107 new = ftrace_call_replace(ip, (unsigned long)func);
108 ret = ftrace_modify_code(ip, old, new);
109
110 return ret;
111}
112
113notrace int ftrace_mcount_set(unsigned long *data)
114{
115 unsigned long ip = (long)(&mcount_call);
116 unsigned long *addr = data;
117 unsigned char old[MCOUNT_INSN_SIZE], *new;
118
119 /*
120 * Replace the mcount stub with a pointer to the
121 * ip recorder function.
122 */
123 memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
124 new = ftrace_call_replace(ip, *addr);
125 *addr = ftrace_modify_code(ip, old, new);
126
127 return 0;
128}
129
130int __init ftrace_dyn_arch_init(void *data)
131{
132 const unsigned char *const *noptable = find_nop_table();
133
134 /* This is running in kstop_machine */
135
136 ftrace_mcount_set(data);
137
138 ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
139
140 return 0;
141}
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ea230ec69057..0ea6a19bfdfe 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -36,26 +36,15 @@ static inline void hpet_writel(unsigned long d, unsigned long a)
36} 36}
37 37
38#ifdef CONFIG_X86_64 38#ifdef CONFIG_X86_64
39
40#include <asm/pgtable.h> 39#include <asm/pgtable.h>
41 40#endif
42static inline void hpet_set_mapping(void)
43{
44 set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
45 __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
46 hpet_virt_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
47}
48
49static inline void hpet_clear_mapping(void)
50{
51 hpet_virt_address = NULL;
52}
53
54#else
55 41
56static inline void hpet_set_mapping(void) 42static inline void hpet_set_mapping(void)
57{ 43{
58 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); 44 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
45#ifdef CONFIG_X86_64
46 __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
47#endif
59} 48}
60 49
61static inline void hpet_clear_mapping(void) 50static inline void hpet_clear_mapping(void)
@@ -63,7 +52,6 @@ static inline void hpet_clear_mapping(void)
63 iounmap(hpet_virt_address); 52 iounmap(hpet_virt_address);
64 hpet_virt_address = NULL; 53 hpet_virt_address = NULL;
65} 54}
66#endif
67 55
68/* 56/*
69 * HPET command line enable / disable 57 * HPET command line enable / disable
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index deb43785e923..dd7ebee446af 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,7 +1,14 @@
1#include <linux/module.h> 1#include <linux/module.h>
2
2#include <asm/checksum.h> 3#include <asm/checksum.h>
3#include <asm/desc.h>
4#include <asm/pgtable.h> 4#include <asm/pgtable.h>
5#include <asm/desc.h>
6#include <asm/ftrace.h>
7
8#ifdef CONFIG_FTRACE
9/* mcount is defined in assembly */
10EXPORT_SYMBOL(mcount);
11#endif
5 12
6/* Networking helper routines. */ 13/* Networking helper routines. */
7EXPORT_SYMBOL(csum_partial_copy_generic); 14EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 382208d11f8d..a82065b0699e 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -59,13 +59,6 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
59static DEFINE_SPINLOCK(ioapic_lock); 59static DEFINE_SPINLOCK(ioapic_lock);
60static DEFINE_SPINLOCK(vector_lock); 60static DEFINE_SPINLOCK(vector_lock);
61 61
62static bool mask_ioapic_irq_2 __initdata;
63
64void __init force_mask_ioapic_irq_2(void)
65{
66 mask_ioapic_irq_2 = true;
67}
68
69int timer_through_8259 __initdata; 62int timer_through_8259 __initdata;
70 63
71/* 64/*
@@ -1576,7 +1569,7 @@ void /*__init*/ print_local_APIC(void *dummy)
1576 1569
1577void print_all_local_APICs(void) 1570void print_all_local_APICs(void)
1578{ 1571{
1579 on_each_cpu(print_local_APIC, NULL, 1, 1); 1572 on_each_cpu(print_local_APIC, NULL, 1);
1580} 1573}
1581 1574
1582void /*__init*/ print_PIC(void) 1575void /*__init*/ print_PIC(void)
@@ -2186,9 +2179,6 @@ static inline void __init check_timer(void)
2186 printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", 2179 printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
2187 vector, apic1, pin1, apic2, pin2); 2180 vector, apic1, pin1, apic2, pin2);
2188 2181
2189 if (mask_ioapic_irq_2)
2190 mask_IO_APIC_irq(2);
2191
2192 /* 2182 /*
2193 * Some BIOS writers are clueless and report the ExtINTA 2183 * Some BIOS writers are clueless and report the ExtINTA
2194 * I/O APIC input from the cascaded 8259A as the timer 2184 * I/O APIC input from the cascaded 8259A as the timer
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 2db0f98e2af5..39f0be37e9a1 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -96,13 +96,6 @@ static int no_timer_check;
96 96
97static int disable_timer_pin_1 __initdata; 97static int disable_timer_pin_1 __initdata;
98 98
99static bool mask_ioapic_irq_2 __initdata;
100
101void __init force_mask_ioapic_irq_2(void)
102{
103 mask_ioapic_irq_2 = true;
104}
105
106int timer_through_8259 __initdata; 99int timer_through_8259 __initdata;
107 100
108/* Where if anywhere is the i8259 connect in external int mode */ 101/* Where if anywhere is the i8259 connect in external int mode */
@@ -1314,7 +1307,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1314 1307
1315void print_all_local_APICs (void) 1308void print_all_local_APICs (void)
1316{ 1309{
1317 on_each_cpu(print_local_APIC, NULL, 1, 1); 1310 on_each_cpu(print_local_APIC, NULL, 1);
1318} 1311}
1319 1312
1320void __apicdebuginit print_PIC(void) 1313void __apicdebuginit print_PIC(void)
@@ -2020,9 +2013,6 @@ static inline void __init check_timer(void)
2020 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", 2013 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
2021 cfg->vector, apic1, pin1, apic2, pin2); 2014 cfg->vector, apic1, pin1, apic2, pin2);
2022 2015
2023 if (mask_ioapic_irq_2)
2024 mask_IO_APIC_irq(2);
2025
2026 /* 2016 /*
2027 * Some BIOS writers are clueless and report the ExtINTA 2017 * Some BIOS writers are clueless and report the ExtINTA
2028 * I/O APIC input from the cascaded 8259A as the timer 2018 * I/O APIC input from the cascaded 8259A as the timer
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 31f49e8f46a7..0373e88de95a 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -199,6 +199,10 @@ void __init native_init_IRQ(void)
199 /* IPI for generic function call */ 199 /* IPI for generic function call */
200 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 200 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
201 201
202 /* IPI for generic single function call */
203 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
204 call_function_single_interrupt);
205
202 /* Low priority IPI to cleanup after moving an irq */ 206 /* Low priority IPI to cleanup after moving an irq */
203 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 207 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
204#endif 208#endif
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 21f2bae98c15..a8449571858a 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
68 load_LDT(pc); 68 load_LDT(pc);
69 mask = cpumask_of_cpu(smp_processor_id()); 69 mask = cpumask_of_cpu(smp_processor_id());
70 if (!cpus_equal(current->mm->cpu_vm_mask, mask)) 70 if (!cpus_equal(current->mm->cpu_vm_mask, mask))
71 smp_call_function(flush_ldt, current->mm, 1, 1); 71 smp_call_function(flush_ldt, current->mm, 1);
72 preempt_enable(); 72 preempt_enable();
73#else 73#else
74 load_LDT(pc); 74 load_LDT(pc);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index f4960171bc66..8864230d55af 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -11,6 +11,8 @@
11#include <linux/delay.h> 11#include <linux/delay.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h>
15
14#include <asm/pgtable.h> 16#include <asm/pgtable.h>
15#include <asm/pgalloc.h> 17#include <asm/pgalloc.h>
16#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
@@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
107 unsigned long page_list[PAGES_NR]; 109 unsigned long page_list[PAGES_NR];
108 void *control_page; 110 void *control_page;
109 111
112 tracer_disable();
113
110 /* Interrupts aren't acceptable while we reboot */ 114 /* Interrupts aren't acceptable while we reboot */
111 local_irq_disable(); 115 local_irq_disable();
112 116
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 7830dc4a8380..9dd9262693a3 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -11,6 +11,8 @@
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/reboot.h> 12#include <linux/reboot.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h>
15
14#include <asm/pgtable.h> 16#include <asm/pgtable.h>
15#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
16#include <asm/mmu_context.h> 18#include <asm/mmu_context.h>
@@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
184 unsigned long page_list[PAGES_NR]; 186 unsigned long page_list[PAGES_NR];
185 void *control_page; 187 void *control_page;
186 188
189 tracer_disable();
190
187 /* Interrupts aren't acceptable while we reboot */ 191 /* Interrupts aren't acceptable while we reboot */
188 local_irq_disable(); 192 local_irq_disable();
189 193
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 9758fea87c5b..56b933119a04 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -76,6 +76,7 @@
76#include <linux/kernel.h> 76#include <linux/kernel.h>
77#include <linux/init.h> 77#include <linux/init.h>
78#include <linux/sched.h> 78#include <linux/sched.h>
79#include <linux/smp_lock.h>
79#include <linux/cpumask.h> 80#include <linux/cpumask.h>
80#include <linux/module.h> 81#include <linux/module.h>
81#include <linux/slab.h> 82#include <linux/slab.h>
@@ -423,6 +424,7 @@ out:
423 424
424static int microcode_open (struct inode *unused1, struct file *unused2) 425static int microcode_open (struct inode *unused1, struct file *unused2)
425{ 426{
427 cycle_kernel_lock();
426 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; 428 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
427} 429}
428 430
@@ -489,7 +491,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
489#define microcode_dev_exit() do { } while(0) 491#define microcode_dev_exit() do { } while(0)
490#endif 492#endif
491 493
492static long get_next_ucode_from_buffer(void **mc, void *buf, 494static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
493 unsigned long size, long offset) 495 unsigned long size, long offset)
494{ 496{
495 microcode_header_t *mc_header; 497 microcode_header_t *mc_header;
@@ -523,7 +525,7 @@ static int cpu_request_microcode(int cpu)
523 char name[30]; 525 char name[30];
524 struct cpuinfo_x86 *c = &cpu_data(cpu); 526 struct cpuinfo_x86 *c = &cpu_data(cpu);
525 const struct firmware *firmware; 527 const struct firmware *firmware;
526 void *buf; 528 const u8 *buf;
527 unsigned long size; 529 unsigned long size;
528 long offset = 0; 530 long offset = 0;
529 int error; 531 int error;
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 1f3abe048e93..a153b3905f60 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -117,12 +117,20 @@ static int msr_open(struct inode *inode, struct file *file)
117{ 117{
118 unsigned int cpu = iminor(file->f_path.dentry->d_inode); 118 unsigned int cpu = iminor(file->f_path.dentry->d_inode);
119 struct cpuinfo_x86 *c = &cpu_data(cpu); 119 struct cpuinfo_x86 *c = &cpu_data(cpu);
120 int ret = 0;
120 121
121 if (cpu >= NR_CPUS || !cpu_online(cpu)) 122 lock_kernel();
122 return -ENXIO; /* No such CPU */ 123 cpu = iminor(file->f_path.dentry->d_inode);
123 if (!cpu_has(c, X86_FEATURE_MSR))
124 return -EIO; /* MSR not supported */
125 124
125 if (cpu >= NR_CPUS || !cpu_online(cpu)) {
126 ret = -ENXIO; /* No such CPU */
127 goto out;
128 }
129 c = &cpu_data(cpu);
130 if (!cpu_has(c, X86_FEATURE_MSR))
131 ret = -EIO; /* MSR not supported */
132out:
133 unlock_kernel();
126 return 0; 134 return 0;
127} 135}
128 136
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 716b89284be0..ec024b3baad0 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -130,7 +130,7 @@ int __init check_nmi_watchdog(void)
130 130
131#ifdef CONFIG_SMP 131#ifdef CONFIG_SMP
132 if (nmi_watchdog == NMI_LOCAL_APIC) 132 if (nmi_watchdog == NMI_LOCAL_APIC)
133 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); 133 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
134#endif 134#endif
135 135
136 for_each_possible_cpu(cpu) 136 for_each_possible_cpu(cpu)
@@ -272,7 +272,7 @@ static void __acpi_nmi_enable(void *__unused)
272void acpi_nmi_enable(void) 272void acpi_nmi_enable(void)
273{ 273{
274 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) 274 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
275 on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); 275 on_each_cpu(__acpi_nmi_enable, NULL, 1);
276} 276}
277 277
278static void __acpi_nmi_disable(void *__unused) 278static void __acpi_nmi_disable(void *__unused)
@@ -286,7 +286,7 @@ static void __acpi_nmi_disable(void *__unused)
286void acpi_nmi_disable(void) 286void acpi_nmi_disable(void)
287{ 287{
288 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) 288 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
289 on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); 289 on_each_cpu(__acpi_nmi_disable, NULL, 1);
290} 290}
291 291
292void setup_apic_nmi_watchdog(void *unused) 292void setup_apic_nmi_watchdog(void *unused)
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index f0f1de1c4a1d..a23e8233b9ac 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -93,12 +93,13 @@ int __init get_memcfg_numaq(void)
93 return 1; 93 return 1;
94} 94}
95 95
96static int __init numaq_tsc_disable(void) 96void __init numaq_tsc_disable(void)
97{ 97{
98 if (!found_numaq)
99 return;
100
98 if (num_online_nodes() > 1) { 101 if (num_online_nodes() > 1) {
99 printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); 102 printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
100 setup_clear_cpu_cap(X86_FEATURE_TSC); 103 setup_clear_cpu_cap(X86_FEATURE_TSC);
101 } 104 }
102 return 0;
103} 105}
104arch_initcall(numaq_tsc_disable);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index d0d18db5d2a4..c3fe78406d18 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -630,6 +630,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
630 struct pci_dev *dev; 630 struct pci_dev *dev;
631 void *gatt; 631 void *gatt;
632 int i, error; 632 int i, error;
633 unsigned long start_pfn, end_pfn;
633 634
634 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); 635 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
635 aper_size = aper_base = info->aper_size = 0; 636 aper_size = aper_base = info->aper_size = 0;
@@ -674,6 +675,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
674 675
675 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", 676 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
676 aper_base, aper_size>>10); 677 aper_base, aper_size>>10);
678
679 /* need to map that range */
680 end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
681 if (end_pfn > max_low_pfn_mapped) {
682 start_pfn = (aper_base>>PAGE_SHIFT);
683 init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
684 }
677 return 0; 685 return 0;
678 686
679 nommu: 687 nommu:
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4061d63aabe7..4d629c62f4f8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,12 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/pm.h> 8#include <linux/pm.h>
9#include <linux/clockchips.h> 9#include <linux/clockchips.h>
10#include <asm/system.h>
11
12unsigned long idle_halt;
13EXPORT_SYMBOL(idle_halt);
14unsigned long idle_nomwait;
15EXPORT_SYMBOL(idle_nomwait);
10 16
11struct kmem_cache *task_xstate_cachep; 17struct kmem_cache *task_xstate_cachep;
12 18
@@ -132,7 +138,7 @@ void cpu_idle_wait(void)
132{ 138{
133 smp_mb(); 139 smp_mb();
134 /* kick all the CPUs so that they exit out of pm_idle */ 140 /* kick all the CPUs so that they exit out of pm_idle */
135 smp_call_function(do_nothing, NULL, 0, 1); 141 smp_call_function(do_nothing, NULL, 1);
136} 142}
137EXPORT_SYMBOL_GPL(cpu_idle_wait); 143EXPORT_SYMBOL_GPL(cpu_idle_wait);
138 144
@@ -325,7 +331,27 @@ static int __init idle_setup(char *str)
325 pm_idle = poll_idle; 331 pm_idle = poll_idle;
326 } else if (!strcmp(str, "mwait")) 332 } else if (!strcmp(str, "mwait"))
327 force_mwait = 1; 333 force_mwait = 1;
328 else 334 else if (!strcmp(str, "halt")) {
335 /*
336 * When the boot option of idle=halt is added, halt is
337 * forced to be used for CPU idle. In such case CPU C2/C3
338 * won't be used again.
339 * To continue to load the CPU idle driver, don't touch
340 * the boot_option_idle_override.
341 */
342 pm_idle = default_idle;
343 idle_halt = 1;
344 return 0;
345 } else if (!strcmp(str, "nomwait")) {
346 /*
347 * If the boot option of "idle=nomwait" is added,
348 * it means that mwait will be disabled for CPU C2/C3
349 * states. In such case it won't touch the variable
350 * of boot_option_idle_override.
351 */
352 idle_nomwait = 1;
353 return 0;
354 } else
329 return -1; 355 return -1;
330 356
331 boot_option_idle_override = 1; 357 boot_option_idle_override = 1;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9a139f6c9df3..0c3927accb00 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -142,7 +142,10 @@ void cpu_idle(void)
142 142
143 local_irq_disable(); 143 local_irq_disable();
144 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 144 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
145 /* Don't trace irqs off for idle */
146 stop_critical_timings();
145 pm_idle(); 147 pm_idle();
148 start_critical_timings();
146 } 149 }
147 tick_nohz_restart_sched_tick(); 150 tick_nohz_restart_sched_tick();
148 preempt_enable_no_resched(); 151 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index db5eb963e4df..a8e53626ac9a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -134,7 +134,10 @@ void cpu_idle(void)
134 */ 134 */
135 local_irq_disable(); 135 local_irq_disable();
136 enter_idle(); 136 enter_idle();
137 /* Don't trace irqs off for idle */
138 stop_critical_timings();
137 pm_idle(); 139 pm_idle();
140 start_critical_timings();
138 /* In many cases the interrupt that ended idle 141 /* In many cases the interrupt that ended idle
139 has already called exit_idle. But some idle 142 has already called exit_idle. But some idle
140 loops can be woken up without interrupt. */ 143 loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 79bdcd11c66e..d13858818100 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -266,6 +266,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
266 hpet_print_force_info(); 266 hpet_print_force_info();
267} 267}
268 268
269DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1,
270 old_ich_force_enable_hpet_user);
269DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, 271DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
270 old_ich_force_enable_hpet_user); 272 old_ich_force_enable_hpet_user);
271DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, 273DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2e78a143dec3..6121ffd46b9e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -684,6 +684,11 @@ void __init setup_arch(char **cmdline_p)
684 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 684 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
685 } 685 }
686 686
687#ifdef CONFIG_PCI
688 if (pci_early_dump_regs)
689 early_dump_pci_devices();
690#endif
691
687 finish_e820_parsing(); 692 finish_e820_parsing();
688 693
689#ifdef CONFIG_X86_32 694#ifdef CONFIG_X86_32
@@ -851,6 +856,14 @@ void __init setup_arch(char **cmdline_p)
851 init_cpu_to_node(); 856 init_cpu_to_node();
852#endif 857#endif
853 858
859#ifdef CONFIG_X86_NUMAQ
860 /*
861 * need to check online nodes num, call it
862 * here before time_init/tsc_init
863 */
864 numaq_tsc_disable();
865#endif
866
854 init_apic_mappings(); 867 init_apic_mappings();
855 ioapic_init_mappings(); 868 ioapic_init_mappings();
856 869
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 5fc310f746fc..cac68430d31f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -343,23 +343,23 @@ static const cpumask_t cpu_mask_none;
343/* 343/*
344 * Returns a pointer to the bitmask of CPUs on Node 'node'. 344 * Returns a pointer to the bitmask of CPUs on Node 'node'.
345 */ 345 */
346cpumask_t *_node_to_cpumask_ptr(int node) 346const cpumask_t *_node_to_cpumask_ptr(int node)
347{ 347{
348 if (node_to_cpumask_map == NULL) { 348 if (node_to_cpumask_map == NULL) {
349 printk(KERN_WARNING 349 printk(KERN_WARNING
350 "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", 350 "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
351 node); 351 node);
352 dump_stack(); 352 dump_stack();
353 return &cpu_online_map; 353 return (const cpumask_t *)&cpu_online_map;
354 } 354 }
355 if (node >= nr_node_ids) { 355 if (node >= nr_node_ids) {
356 printk(KERN_WARNING 356 printk(KERN_WARNING
357 "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", 357 "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n",
358 node, nr_node_ids); 358 node, nr_node_ids);
359 dump_stack(); 359 dump_stack();
360 return (cpumask_t *)&cpu_mask_none; 360 return &cpu_mask_none;
361 } 361 }
362 return (cpumask_t *)&node_to_cpumask_map[node]; 362 return &node_to_cpumask_map[node];
363} 363}
364EXPORT_SYMBOL(_node_to_cpumask_ptr); 364EXPORT_SYMBOL(_node_to_cpumask_ptr);
365 365
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 0cb7aadc87cd..361b7a4c640c 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -121,132 +121,23 @@ static void native_smp_send_reschedule(int cpu)
121 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 121 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
122} 122}
123 123
124/* 124void native_send_call_func_single_ipi(int cpu)
125 * Structure and data for smp_call_function(). This is designed to minimise
126 * static memory requirements. It also looks cleaner.
127 */
128static DEFINE_SPINLOCK(call_lock);
129
130struct call_data_struct {
131 void (*func) (void *info);
132 void *info;
133 atomic_t started;
134 atomic_t finished;
135 int wait;
136};
137
138void lock_ipi_call_lock(void)
139{ 125{
140 spin_lock_irq(&call_lock); 126 send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
141}
142
143void unlock_ipi_call_lock(void)
144{
145 spin_unlock_irq(&call_lock);
146}
147
148static struct call_data_struct *call_data;
149
150static void __smp_call_function(void (*func) (void *info), void *info,
151 int nonatomic, int wait)
152{
153 struct call_data_struct data;
154 int cpus = num_online_cpus() - 1;
155
156 if (!cpus)
157 return;
158
159 data.func = func;
160 data.info = info;
161 atomic_set(&data.started, 0);
162 data.wait = wait;
163 if (wait)
164 atomic_set(&data.finished, 0);
165
166 call_data = &data;
167 mb();
168
169 /* Send a message to all other CPUs and wait for them to respond */
170 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
171
172 /* Wait for response */
173 while (atomic_read(&data.started) != cpus)
174 cpu_relax();
175
176 if (wait)
177 while (atomic_read(&data.finished) != cpus)
178 cpu_relax();
179} 127}
180 128
181 129void native_send_call_func_ipi(cpumask_t mask)
182/**
183 * smp_call_function_mask(): Run a function on a set of other CPUs.
184 * @mask: The set of cpus to run on. Must not include the current cpu.
185 * @func: The function to run. This must be fast and non-blocking.
186 * @info: An arbitrary pointer to pass to the function.
187 * @wait: If true, wait (atomically) until function has completed on other CPUs.
188 *
189 * Returns 0 on success, else a negative status code.
190 *
191 * If @wait is true, then returns once @func has returned; otherwise
192 * it returns just before the target cpu calls @func.
193 *
194 * You must not call this function with disabled interrupts or from a
195 * hardware interrupt handler or from a bottom half handler.
196 */
197static int
198native_smp_call_function_mask(cpumask_t mask,
199 void (*func)(void *), void *info,
200 int wait)
201{ 130{
202 struct call_data_struct data;
203 cpumask_t allbutself; 131 cpumask_t allbutself;
204 int cpus;
205
206 /* Can deadlock when called with interrupts disabled */
207 WARN_ON(irqs_disabled());
208
209 /* Holding any lock stops cpus from going down. */
210 spin_lock(&call_lock);
211 132
212 allbutself = cpu_online_map; 133 allbutself = cpu_online_map;
213 cpu_clear(smp_processor_id(), allbutself); 134 cpu_clear(smp_processor_id(), allbutself);
214 135
215 cpus_and(mask, mask, allbutself);
216 cpus = cpus_weight(mask);
217
218 if (!cpus) {
219 spin_unlock(&call_lock);
220 return 0;
221 }
222
223 data.func = func;
224 data.info = info;
225 atomic_set(&data.started, 0);
226 data.wait = wait;
227 if (wait)
228 atomic_set(&data.finished, 0);
229
230 call_data = &data;
231 wmb();
232
233 /* Send a message to other CPUs */
234 if (cpus_equal(mask, allbutself) && 136 if (cpus_equal(mask, allbutself) &&
235 cpus_equal(cpu_online_map, cpu_callout_map)) 137 cpus_equal(cpu_online_map, cpu_callout_map))
236 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 138 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
237 else 139 else
238 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 140 send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
239
240 /* Wait for response */
241 while (atomic_read(&data.started) != cpus)
242 cpu_relax();
243
244 if (wait)
245 while (atomic_read(&data.finished) != cpus)
246 cpu_relax();
247 spin_unlock(&call_lock);
248
249 return 0;
250} 141}
251 142
252static void stop_this_cpu(void *dummy) 143static void stop_this_cpu(void *dummy)
@@ -268,18 +159,13 @@ static void stop_this_cpu(void *dummy)
268 159
269static void native_smp_send_stop(void) 160static void native_smp_send_stop(void)
270{ 161{
271 int nolock;
272 unsigned long flags; 162 unsigned long flags;
273 163
274 if (reboot_force) 164 if (reboot_force)
275 return; 165 return;
276 166
277 /* Don't deadlock on the call lock in panic */ 167 smp_call_function(stop_this_cpu, NULL, 0);
278 nolock = !spin_trylock(&call_lock);
279 local_irq_save(flags); 168 local_irq_save(flags);
280 __smp_call_function(stop_this_cpu, NULL, 0, 0);
281 if (!nolock)
282 spin_unlock(&call_lock);
283 disable_local_APIC(); 169 disable_local_APIC();
284 local_irq_restore(flags); 170 local_irq_restore(flags);
285} 171}
@@ -301,33 +187,28 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
301 187
302void smp_call_function_interrupt(struct pt_regs *regs) 188void smp_call_function_interrupt(struct pt_regs *regs)
303{ 189{
304 void (*func) (void *info) = call_data->func;
305 void *info = call_data->info;
306 int wait = call_data->wait;
307
308 ack_APIC_irq(); 190 ack_APIC_irq();
309 /*
310 * Notify initiating CPU that I've grabbed the data and am
311 * about to execute the function
312 */
313 mb();
314 atomic_inc(&call_data->started);
315 /*
316 * At this point the info structure may be out of scope unless wait==1
317 */
318 irq_enter(); 191 irq_enter();
319 (*func)(info); 192 generic_smp_call_function_interrupt();
320#ifdef CONFIG_X86_32 193#ifdef CONFIG_X86_32
321 __get_cpu_var(irq_stat).irq_call_count++; 194 __get_cpu_var(irq_stat).irq_call_count++;
322#else 195#else
323 add_pda(irq_call_count, 1); 196 add_pda(irq_call_count, 1);
324#endif 197#endif
325 irq_exit(); 198 irq_exit();
199}
326 200
327 if (wait) { 201void smp_call_function_single_interrupt(struct pt_regs *regs)
328 mb(); 202{
329 atomic_inc(&call_data->finished); 203 ack_APIC_irq();
330 } 204 irq_enter();
205 generic_smp_call_function_single_interrupt();
206#ifdef CONFIG_X86_32
207 __get_cpu_var(irq_stat).irq_call_count++;
208#else
209 add_pda(irq_call_count, 1);
210#endif
211 irq_exit();
331} 212}
332 213
333struct smp_ops smp_ops = { 214struct smp_ops smp_ops = {
@@ -338,7 +219,8 @@ struct smp_ops smp_ops = {
338 219
339 .smp_send_stop = native_smp_send_stop, 220 .smp_send_stop = native_smp_send_stop,
340 .smp_send_reschedule = native_smp_send_reschedule, 221 .smp_send_reschedule = native_smp_send_reschedule,
341 .smp_call_function_mask = native_smp_call_function_mask, 222
223 .send_call_func_ipi = native_send_call_func_ipi,
224 .send_call_func_single_ipi = native_send_call_func_single_ipi,
342}; 225};
343EXPORT_SYMBOL_GPL(smp_ops); 226EXPORT_SYMBOL_GPL(smp_ops);
344
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6cd002f3e20e..23c3b3d1f4cc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -328,12 +328,12 @@ static void __cpuinit start_secondary(void *unused)
328 * lock helps us to not include this cpu in a currently in progress 328 * lock helps us to not include this cpu in a currently in progress
329 * smp_call_function(). 329 * smp_call_function().
330 */ 330 */
331 lock_ipi_call_lock(); 331 ipi_call_lock_irq();
332#ifdef CONFIG_X86_IO_APIC 332#ifdef CONFIG_X86_IO_APIC
333 setup_vector_irq(smp_processor_id()); 333 setup_vector_irq(smp_processor_id());
334#endif 334#endif
335 cpu_set(smp_processor_id(), cpu_online_map); 335 cpu_set(smp_processor_id(), cpu_online_map);
336 unlock_ipi_call_lock(); 336 ipi_call_unlock_irq();
337 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 337 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
338 338
339 setup_secondary_clock(); 339 setup_secondary_clock();
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 3449064d141a..99941b37eca0 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -25,59 +25,3 @@ __cpuinit void init_gdt(int cpu)
25 per_cpu(cpu_number, cpu) = cpu; 25 per_cpu(cpu_number, cpu) = cpu;
26} 26}
27#endif 27#endif
28
29/**
30 * smp_call_function(): Run a function on all other CPUs.
31 * @func: The function to run. This must be fast and non-blocking.
32 * @info: An arbitrary pointer to pass to the function.
33 * @nonatomic: Unused.
34 * @wait: If true, wait (atomically) until function has completed on other CPUs.
35 *
36 * Returns 0 on success, else a negative status code.
37 *
38 * If @wait is true, then returns once @func has returned; otherwise
39 * it returns just before the target cpu calls @func.
40 *
41 * You must not call this function with disabled interrupts or from a
42 * hardware interrupt handler or from a bottom half handler.
43 */
44int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
45 int wait)
46{
47 return smp_call_function_mask(cpu_online_map, func, info, wait);
48}
49EXPORT_SYMBOL(smp_call_function);
50
51/**
52 * smp_call_function_single - Run a function on a specific CPU
53 * @cpu: The target CPU. Cannot be the calling CPU.
54 * @func: The function to run. This must be fast and non-blocking.
55 * @info: An arbitrary pointer to pass to the function.
56 * @nonatomic: Unused.
57 * @wait: If true, wait until function has completed on other CPUs.
58 *
59 * Returns 0 on success, else a negative status code.
60 *
61 * If @wait is true, then returns once @func has returned; otherwise
62 * it returns just before the target cpu calls @func.
63 */
64int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
65 int nonatomic, int wait)
66{
67 /* prevent preemption and reschedule on another processor */
68 int ret;
69 int me = get_cpu();
70 if (cpu == me) {
71 local_irq_disable();
72 func(info);
73 local_irq_enable();
74 put_cpu();
75 return 0;
76 }
77
78 ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
79
80 put_cpu();
81 return ret;
82}
83EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index c28c342c162f..a03e7f6d90c3 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -74,6 +74,7 @@ void save_stack_trace(struct stack_trace *trace)
74 if (trace->nr_entries < trace->max_entries) 74 if (trace->nr_entries < trace->max_entries)
75 trace->entries[trace->nr_entries++] = ULONG_MAX; 75 trace->entries[trace->nr_entries++] = ULONG_MAX;
76} 76}
77EXPORT_SYMBOL_GPL(save_stack_trace);
77 78
78void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 79void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
79{ 80{
@@ -81,3 +82,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
81 if (trace->nr_entries < trace->max_entries) 82 if (trace->nr_entries < trace->max_entries)
82 trace->entries[trace->nr_entries++] = ULONG_MAX; 83 trace->entries[trace->nr_entries++] = ULONG_MAX;
83} 84}
85EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 9bb2363851af..fec1ecedc9b7 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -238,6 +238,6 @@ static void do_flush_tlb_all(void *info)
238 238
239void flush_tlb_all(void) 239void flush_tlb_all(void)
240{ 240{
241 on_each_cpu(do_flush_tlb_all, NULL, 1, 1); 241 on_each_cpu(do_flush_tlb_all, NULL, 1);
242} 242}
243 243
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 5039d0f097a2..dcbf7a1159ea 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -275,5 +275,5 @@ static void do_flush_tlb_all(void *info)
275 275
276void flush_tlb_all(void) 276void flush_tlb_all(void)
277{ 277{
278 on_each_cpu(do_flush_tlb_all, NULL, 1, 1); 278 on_each_cpu(do_flush_tlb_all, NULL, 1);
279} 279}
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 74e992957ff6..2696a6837782 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -105,30 +105,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
105 105
106void printk_address(unsigned long address, int reliable) 106void printk_address(unsigned long address, int reliable)
107{ 107{
108#ifdef CONFIG_KALLSYMS 108 printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
109 unsigned long offset = 0, symsize;
110 const char *symname;
111 char *modname;
112 char *delim = ":";
113 char namebuf[KSYM_NAME_LEN];
114 char reliab[4] = "";
115
116 symname = kallsyms_lookup(address, &symsize, &offset,
117 &modname, namebuf);
118 if (!symname) {
119 printk(" [<%016lx>]\n", address);
120 return;
121 }
122 if (!reliable)
123 strcpy(reliab, "? ");
124
125 if (!modname)
126 modname = delim = "";
127 printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
128 address, reliab, delim, modname, delim, symname, offset, symsize);
129#else
130 printk(" [<%016lx>]\n", address);
131#endif
132} 109}
133 110
134static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 111static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3c36f92160c9..7603c0553909 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -358,6 +358,7 @@ static cycle_t read_tsc(void)
358 ret : clocksource_tsc.cycle_last; 358 ret : clocksource_tsc.cycle_last;
359} 359}
360 360
361#ifdef CONFIG_X86_64
361static cycle_t __vsyscall_fn vread_tsc(void) 362static cycle_t __vsyscall_fn vread_tsc(void)
362{ 363{
363 cycle_t ret = (cycle_t)vget_cycles(); 364 cycle_t ret = (cycle_t)vget_cycles();
@@ -365,6 +366,7 @@ static cycle_t __vsyscall_fn vread_tsc(void)
365 return ret >= __vsyscall_gtod_data.clock.cycle_last ? 366 return ret >= __vsyscall_gtod_data.clock.cycle_last ?
366 ret : __vsyscall_gtod_data.clock.cycle_last; 367 ret : __vsyscall_gtod_data.clock.cycle_last;
367} 368}
369#endif
368 370
369static struct clocksource clocksource_tsc = { 371static struct clocksource clocksource_tsc = {
370 .name = "tsc", 372 .name = "tsc",
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 2674f5796275..cdb2363697d2 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -49,16 +49,14 @@ SECTIONS
49 _etext = .; /* End of text section */ 49 _etext = .; /* End of text section */
50 } :text = 0x9090 50 } :text = 0x9090
51 51
52 NOTES :text :note
53
52 . = ALIGN(16); /* Exception table */ 54 . = ALIGN(16); /* Exception table */
53 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { 55 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
54 __start___ex_table = .; 56 __start___ex_table = .;
55 *(__ex_table) 57 *(__ex_table)
56 __stop___ex_table = .; 58 __stop___ex_table = .;
57 } 59 } :text = 0x9090
58
59 NOTES :text :note
60
61 BUG_TABLE :text
62 60
63 RODATA 61 RODATA
64 62
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fd246e22fe6b..63e5c1a22e88 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -19,7 +19,7 @@ PHDRS {
19 data PT_LOAD FLAGS(7); /* RWE */ 19 data PT_LOAD FLAGS(7); /* RWE */
20 user PT_LOAD FLAGS(7); /* RWE */ 20 user PT_LOAD FLAGS(7); /* RWE */
21 data.init PT_LOAD FLAGS(7); /* RWE */ 21 data.init PT_LOAD FLAGS(7); /* RWE */
22 note PT_NOTE FLAGS(4); /* R__ */ 22 note PT_NOTE FLAGS(0); /* ___ */
23} 23}
24SECTIONS 24SECTIONS
25{ 25{
@@ -40,16 +40,14 @@ SECTIONS
40 _etext = .; /* End of text section */ 40 _etext = .; /* End of text section */
41 } :text = 0x9090 41 } :text = 0x9090
42 42
43 NOTES :text :note
44
43 . = ALIGN(16); /* Exception table */ 45 . = ALIGN(16); /* Exception table */
44 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { 46 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
45 __start___ex_table = .; 47 __start___ex_table = .;
46 *(__ex_table) 48 *(__ex_table)
47 __stop___ex_table = .; 49 __stop___ex_table = .;
48 } 50 } :text = 0x9090
49
50 NOTES :text :note
51
52 BUG_TABLE :text
53 51
54 RODATA 52 RODATA
55 53
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index c87cbd84c3e5..0b8b6690a86d 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -42,7 +42,8 @@
42#include <asm/topology.h> 42#include <asm/topology.h>
43#include <asm/vgtod.h> 43#include <asm/vgtod.h>
44 44
45#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 45#define __vsyscall(nr) \
46 __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
46#define __syscall_clobber "r11","cx","memory" 47#define __syscall_clobber "r11","cx","memory"
47 48
48/* 49/*
@@ -278,7 +279,7 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
278{ 279{
279 long cpu = (long)arg; 280 long cpu = (long)arg;
280 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) 281 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
281 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); 282 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
282 return NOTIFY_DONE; 283 return NOTIFY_DONE;
283} 284}
284 285
@@ -301,7 +302,7 @@ static int __init vsyscall_init(void)
301#ifdef CONFIG_SYSCTL 302#ifdef CONFIG_SYSCTL
302 register_sysctl_table(kernel_root_table2); 303 register_sysctl_table(kernel_root_table2);
303#endif 304#endif
304 on_each_cpu(cpu_vsyscall_init, NULL, 0, 1); 305 on_each_cpu(cpu_vsyscall_init, NULL, 1);
305 hotcpu_notifier(cpu_vsyscall_notifier, 0); 306 hotcpu_notifier(cpu_vsyscall_notifier, 0);
306 return 0; 307 return 0;
307} 308}
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 2f306a826897..b545f371b5f5 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -2,13 +2,20 @@
2 All C exports should go in the respective C files. */ 2 All C exports should go in the respective C files. */
3 3
4#include <linux/module.h> 4#include <linux/module.h>
5#include <net/checksum.h>
6#include <linux/smp.h> 5#include <linux/smp.h>
7 6
7#include <net/checksum.h>
8
8#include <asm/processor.h> 9#include <asm/processor.h>
9#include <asm/uaccess.h>
10#include <asm/pgtable.h> 10#include <asm/pgtable.h>
11#include <asm/uaccess.h>
11#include <asm/desc.h> 12#include <asm/desc.h>
13#include <asm/ftrace.h>
14
15#ifdef CONFIG_FTRACE
16/* mcount is defined in assembly */
17EXPORT_SYMBOL(mcount);
18#endif
12 19
13EXPORT_SYMBOL(kernel_thread); 20EXPORT_SYMBOL(kernel_thread);
14 21
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 540e95179074..10ce6ee4c491 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -335,7 +335,7 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
335{ 335{
336 if (vmx->vcpu.cpu == -1) 336 if (vmx->vcpu.cpu == -1)
337 return; 337 return;
338 smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 0, 1); 338 smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1);
339 vmx->launched = 0; 339 vmx->launched = 0;
340} 340}
341 341
@@ -2968,7 +2968,7 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
2968 struct vcpu_vmx *vmx = to_vmx(vcpu); 2968 struct vcpu_vmx *vmx = to_vmx(vcpu);
2969 2969
2970 if (vmx->vmcs) { 2970 if (vmx->vmcs) {
2971 on_each_cpu(__vcpu_clear, vmx, 0, 1); 2971 on_each_cpu(__vcpu_clear, vmx, 1);
2972 free_vmcs(vmx->vmcs); 2972 free_vmcs(vmx->vmcs);
2973 vmx->vmcs = NULL; 2973 vmx->vmcs = NULL;
2974 } 2974 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 63a77caa59f1..0faa2546b1cd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4044,6 +4044,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
4044 * So need not to call smp_call_function_single() in that case. 4044 * So need not to call smp_call_function_single() in that case.
4045 */ 4045 */
4046 if (vcpu->guest_mode && vcpu->cpu != cpu) 4046 if (vcpu->guest_mode && vcpu->cpu != cpu)
4047 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); 4047 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
4048 put_cpu(); 4048 put_cpu();
4049} 4049}
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 83226e0a7ce4..aa3fa4119424 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -5,6 +5,7 @@
5obj-$(CONFIG_SMP) := msr-on-cpu.o 5obj-$(CONFIG_SMP) := msr-on-cpu.o
6 6
7lib-y := delay.o 7lib-y := delay.o
8lib-y += thunk_$(BITS).o
8lib-y += usercopy_$(BITS).o getuser.o putuser.o 9lib-y += usercopy_$(BITS).o getuser.o putuser.o
9lib-y += memcpy_$(BITS).o 10lib-y += memcpy_$(BITS).o
10 11
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c
index 57d043fa893e..d5a2b39f882b 100644
--- a/arch/x86/lib/msr-on-cpu.c
+++ b/arch/x86/lib/msr-on-cpu.c
@@ -30,10 +30,10 @@ static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe)
30 30
31 rv.msr_no = msr_no; 31 rv.msr_no = msr_no;
32 if (safe) { 32 if (safe) {
33 smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 0, 1); 33 smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
34 err = rv.err; 34 err = rv.err;
35 } else { 35 } else {
36 smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1); 36 smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
37 } 37 }
38 *l = rv.l; 38 *l = rv.l;
39 *h = rv.h; 39 *h = rv.h;
@@ -64,10 +64,10 @@ static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe)
64 rv.l = l; 64 rv.l = l;
65 rv.h = h; 65 rv.h = h;
66 if (safe) { 66 if (safe) {
67 smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 0, 1); 67 smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
68 err = rv.err; 68 err = rv.err;
69 } else { 69 } else {
70 smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1); 70 smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
71 } 71 }
72 72
73 return err; 73 return err;
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
new file mode 100644
index 000000000000..650b11e00ecc
--- /dev/null
+++ b/arch/x86/lib/thunk_32.S
@@ -0,0 +1,47 @@
1/*
2 * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
3 * Copyright 2008 by Steven Rostedt, Red Hat, Inc
4 * (inspired by Andi Kleen's thunk_64.S)
5 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */
7
8 #include <linux/linkage.h>
9
10#define ARCH_TRACE_IRQS_ON \
11 pushl %eax; \
12 pushl %ecx; \
13 pushl %edx; \
14 call trace_hardirqs_on; \
15 popl %edx; \
16 popl %ecx; \
17 popl %eax;
18
19#define ARCH_TRACE_IRQS_OFF \
20 pushl %eax; \
21 pushl %ecx; \
22 pushl %edx; \
23 call trace_hardirqs_off; \
24 popl %edx; \
25 popl %ecx; \
26 popl %eax;
27
28#ifdef CONFIG_TRACE_IRQFLAGS
29 /* put return address in eax (arg1) */
30 .macro thunk_ra name,func
31 .globl \name
32\name:
33 pushl %eax
34 pushl %ecx
35 pushl %edx
36 /* Place EIP in the arg1 */
37 movl 3*4(%esp), %eax
38 call \func
39 popl %edx
40 popl %ecx
41 popl %eax
42 ret
43 .endm
44
45 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
46 thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
47#endif
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index e009251d4e9f..bf9a7d5a5428 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -2,6 +2,7 @@
2 * Save registers before calling assembly functions. This avoids 2 * Save registers before calling assembly functions. This avoids
3 * disturbance of register allocation in some inline assembly constructs. 3 * disturbance of register allocation in some inline assembly constructs.
4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs. 4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
5 * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
5 * Subject to the GNU public license, v.2. No warranty of any kind. 6 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */ 7 */
7 8
@@ -42,8 +43,22 @@
42#endif 43#endif
43 44
44#ifdef CONFIG_TRACE_IRQFLAGS 45#ifdef CONFIG_TRACE_IRQFLAGS
45 thunk trace_hardirqs_on_thunk,trace_hardirqs_on 46 /* put return address in rdi (arg1) */
46 thunk trace_hardirqs_off_thunk,trace_hardirqs_off 47 .macro thunk_ra name,func
48 .globl \name
49\name:
50 CFI_STARTPROC
51 SAVE_ARGS
52 /* SAVE_ARGS pushs 9 elements */
53 /* the next element would be the rip */
54 movq 9*8(%rsp), %rdi
55 call \func
56 jmp restore
57 CFI_ENDPROC
58 .endm
59
60 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
61 thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
47#endif 62#endif
48 63
49#ifdef CONFIG_DEBUG_LOCK_ALLOC 64#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 8dedd01e909f..ee0fba092157 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -950,94 +950,24 @@ static void smp_stop_cpu_function(void *dummy)
950 halt(); 950 halt();
951} 951}
952 952
953static DEFINE_SPINLOCK(call_lock);
954
955struct call_data_struct {
956 void (*func) (void *info);
957 void *info;
958 volatile unsigned long started;
959 volatile unsigned long finished;
960 int wait;
961};
962
963static struct call_data_struct *call_data;
964
965/* execute a thread on a new CPU. The function to be called must be 953/* execute a thread on a new CPU. The function to be called must be
966 * previously set up. This is used to schedule a function for 954 * previously set up. This is used to schedule a function for
967 * execution on all CPUs - set up the function then broadcast a 955 * execution on all CPUs - set up the function then broadcast a
968 * function_interrupt CPI to come here on each CPU */ 956 * function_interrupt CPI to come here on each CPU */
969static void smp_call_function_interrupt(void) 957static void smp_call_function_interrupt(void)
970{ 958{
971 void (*func) (void *info) = call_data->func;
972 void *info = call_data->info;
973 /* must take copy of wait because call_data may be replaced
974 * unless the function is waiting for us to finish */
975 int wait = call_data->wait;
976 __u8 cpu = smp_processor_id();
977
978 /*
979 * Notify initiating CPU that I've grabbed the data and am
980 * about to execute the function
981 */
982 mb();
983 if (!test_and_clear_bit(cpu, &call_data->started)) {
984 /* If the bit wasn't set, this could be a replay */
985 printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion"
986 " with no call pending\n", cpu);
987 return;
988 }
989 /*
990 * At this point the info structure may be out of scope unless wait==1
991 */
992 irq_enter(); 959 irq_enter();
993 (*func) (info); 960 generic_smp_call_function_interrupt();
994 __get_cpu_var(irq_stat).irq_call_count++; 961 __get_cpu_var(irq_stat).irq_call_count++;
995 irq_exit(); 962 irq_exit();
996 if (wait) {
997 mb();
998 clear_bit(cpu, &call_data->finished);
999 }
1000} 963}
1001 964
1002static int 965static void smp_call_function_single_interrupt(void)
1003voyager_smp_call_function_mask(cpumask_t cpumask,
1004 void (*func) (void *info), void *info, int wait)
1005{ 966{
1006 struct call_data_struct data; 967 irq_enter();
1007 u32 mask = cpus_addr(cpumask)[0]; 968 generic_smp_call_function_single_interrupt();
1008 969 __get_cpu_var(irq_stat).irq_call_count++;
1009 mask &= ~(1 << smp_processor_id()); 970 irq_exit();
1010
1011 if (!mask)
1012 return 0;
1013
1014 /* Can deadlock when called with interrupts disabled */
1015 WARN_ON(irqs_disabled());
1016
1017 data.func = func;
1018 data.info = info;
1019 data.started = mask;
1020 data.wait = wait;
1021 if (wait)
1022 data.finished = mask;
1023
1024 spin_lock(&call_lock);
1025 call_data = &data;
1026 wmb();
1027 /* Send a message to all other CPUs and wait for them to respond */
1028 send_CPI(mask, VIC_CALL_FUNCTION_CPI);
1029
1030 /* Wait for response */
1031 while (data.started)
1032 barrier();
1033
1034 if (wait)
1035 while (data.finished)
1036 barrier();
1037
1038 spin_unlock(&call_lock);
1039
1040 return 0;
1041} 971}
1042 972
1043/* Sorry about the name. In an APIC based system, the APICs 973/* Sorry about the name. In an APIC based system, the APICs
@@ -1094,6 +1024,12 @@ void smp_qic_call_function_interrupt(struct pt_regs *regs)
1094 smp_call_function_interrupt(); 1024 smp_call_function_interrupt();
1095} 1025}
1096 1026
1027void smp_qic_call_function_single_interrupt(struct pt_regs *regs)
1028{
1029 ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI);
1030 smp_call_function_single_interrupt();
1031}
1032
1097void smp_vic_cpi_interrupt(struct pt_regs *regs) 1033void smp_vic_cpi_interrupt(struct pt_regs *regs)
1098{ 1034{
1099 struct pt_regs *old_regs = set_irq_regs(regs); 1035 struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1114,6 +1050,8 @@ void smp_vic_cpi_interrupt(struct pt_regs *regs)
1114 smp_enable_irq_interrupt(); 1050 smp_enable_irq_interrupt();
1115 if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu])) 1051 if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
1116 smp_call_function_interrupt(); 1052 smp_call_function_interrupt();
1053 if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu]))
1054 smp_call_function_single_interrupt();
1117 set_irq_regs(old_regs); 1055 set_irq_regs(old_regs);
1118} 1056}
1119 1057
@@ -1129,7 +1067,7 @@ static void do_flush_tlb_all(void *info)
1129/* flush the TLB of every active CPU in the system */ 1067/* flush the TLB of every active CPU in the system */
1130void flush_tlb_all(void) 1068void flush_tlb_all(void)
1131{ 1069{
1132 on_each_cpu(do_flush_tlb_all, 0, 1, 1); 1070 on_each_cpu(do_flush_tlb_all, 0, 1);
1133} 1071}
1134 1072
1135/* send a reschedule CPI to one CPU by physical CPU number*/ 1073/* send a reschedule CPI to one CPU by physical CPU number*/
@@ -1161,7 +1099,7 @@ int safe_smp_processor_id(void)
1161/* broadcast a halt to all other CPUs */ 1099/* broadcast a halt to all other CPUs */
1162static void voyager_smp_send_stop(void) 1100static void voyager_smp_send_stop(void)
1163{ 1101{
1164 smp_call_function(smp_stop_cpu_function, NULL, 1, 1); 1102 smp_call_function(smp_stop_cpu_function, NULL, 1);
1165} 1103}
1166 1104
1167/* this function is triggered in time.c when a clock tick fires 1105/* this function is triggered in time.c when a clock tick fires
@@ -1848,5 +1786,7 @@ struct smp_ops smp_ops = {
1848 1786
1849 .smp_send_stop = voyager_smp_send_stop, 1787 .smp_send_stop = voyager_smp_send_stop,
1850 .smp_send_reschedule = voyager_smp_send_reschedule, 1788 .smp_send_reschedule = voyager_smp_send_reschedule,
1851 .smp_call_function_mask = voyager_smp_call_function_mask, 1789
1790 .send_call_func_ipi = native_send_call_func_ipi,
1791 .send_call_func_single_ipi = native_send_call_func_single_ipi,
1852}; 1792};
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index c107641cd39b..9873716e9f76 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,6 +8,11 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
8 8
9obj-$(CONFIG_HIGHMEM) += highmem_32.o 9obj-$(CONFIG_HIGHMEM) += highmem_32.o
10 10
11obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
12obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
13mmiotrace-y := pf_in.o mmio-mod.o
14obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
15
11ifeq ($(CONFIG_X86_32),y) 16ifeq ($(CONFIG_X86_32),y)
12obj-$(CONFIG_NUMA) += discontig_32.o 17obj-$(CONFIG_NUMA) += discontig_32.o
13else 18else
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d0f5fce77d95..455f3fe67b42 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,6 +10,7 @@
10#include <linux/string.h> 10#include <linux/string.h>
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/ptrace.h> 12#include <linux/ptrace.h>
13#include <linux/mmiotrace.h>
13#include <linux/mman.h> 14#include <linux/mman.h>
14#include <linux/mm.h> 15#include <linux/mm.h>
15#include <linux/smp.h> 16#include <linux/smp.h>
@@ -49,6 +50,16 @@
49#define PF_RSVD (1<<3) 50#define PF_RSVD (1<<3)
50#define PF_INSTR (1<<4) 51#define PF_INSTR (1<<4)
51 52
53static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
54{
55#ifdef CONFIG_MMIOTRACE_HOOKS
56 if (unlikely(is_kmmio_active()))
57 if (kmmio_handler(regs, addr) == 1)
58 return -1;
59#endif
60 return 0;
61}
62
52static inline int notify_page_fault(struct pt_regs *regs) 63static inline int notify_page_fault(struct pt_regs *regs)
53{ 64{
54#ifdef CONFIG_KPROBES 65#ifdef CONFIG_KPROBES
@@ -598,6 +609,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
598 609
599 if (notify_page_fault(regs)) 610 if (notify_page_fault(regs))
600 return; 611 return;
612 if (unlikely(kmmio_fault(regs, address)))
613 return;
601 614
602 /* 615 /*
603 * We fault-in kernel-space virtual memory on-demand. The 616 * We fault-in kernel-space virtual memory on-demand. The
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 029e8cffca9e..9689a5138e64 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1035,6 +1035,8 @@ void mark_rodata_ro(void)
1035 unsigned long start = PFN_ALIGN(_text); 1035 unsigned long start = PFN_ALIGN(_text);
1036 unsigned long size = PFN_ALIGN(_etext) - start; 1036 unsigned long size = PFN_ALIGN(_etext) - start;
1037 1037
1038#ifndef CONFIG_DYNAMIC_FTRACE
1039 /* Dynamic tracing modifies the kernel text section */
1038 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 1040 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
1039 printk(KERN_INFO "Write protecting the kernel text: %luk\n", 1041 printk(KERN_INFO "Write protecting the kernel text: %luk\n",
1040 size >> 10); 1042 size >> 10);
@@ -1047,6 +1049,8 @@ void mark_rodata_ro(void)
1047 printk(KERN_INFO "Testing CPA: write protecting again\n"); 1049 printk(KERN_INFO "Testing CPA: write protecting again\n");
1048 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); 1050 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
1049#endif 1051#endif
1052#endif /* CONFIG_DYNAMIC_FTRACE */
1053
1050 start += size; 1054 start += size;
1051 size = (unsigned long)__end_rodata - start; 1055 size = (unsigned long)__end_rodata - start;
1052 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 1056 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 122bcef222fc..306049edd553 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -644,7 +644,7 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start,
644 unsigned long pud_phys; 644 unsigned long pud_phys;
645 pud_t *pud; 645 pud_t *pud;
646 646
647 next = start + PGDIR_SIZE; 647 next = (start + PGDIR_SIZE) & PGDIR_MASK;
648 if (next > end) 648 if (next > end)
649 next = end; 649 next = end;
650 650
@@ -763,6 +763,20 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
763 end_pfn = end>>PAGE_SHIFT; 763 end_pfn = end>>PAGE_SHIFT;
764 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); 764 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
765 765
766 /* try to merge same page size and continuous */
767 for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
768 unsigned long old_start;
769 if (mr[i].end != mr[i+1].start ||
770 mr[i].page_size_mask != mr[i+1].page_size_mask)
771 continue;
772 /* move it */
773 old_start = mr[i].start;
774 memmove(&mr[i], &mr[i+1],
775 (nr_range - 1 - i) * sizeof (struct map_range));
776 mr[i].start = old_start;
777 nr_range--;
778 }
779
766 for (i = 0; i < nr_range; i++) 780 for (i = 0; i < nr_range; i++)
767 printk(KERN_DEBUG " %010lx - %010lx page %s\n", 781 printk(KERN_DEBUG " %010lx - %010lx page %s\n",
768 mr[i].start, mr[i].end, 782 mr[i].start, mr[i].end,
@@ -977,6 +991,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
977void mark_rodata_ro(void) 991void mark_rodata_ro(void)
978{ 992{
979 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); 993 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
994 unsigned long rodata_start =
995 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
996
997#ifdef CONFIG_DYNAMIC_FTRACE
998 /* Dynamic tracing modifies the kernel text section */
999 start = rodata_start;
1000#endif
980 1001
981 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 1002 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
982 (end - start) >> 10); 1003 (end - start) >> 10);
@@ -986,8 +1007,7 @@ void mark_rodata_ro(void)
986 * The rodata section (but not the kernel text!) should also be 1007 * The rodata section (but not the kernel text!) should also be
987 * not-executable. 1008 * not-executable.
988 */ 1009 */
989 start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; 1010 set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
990 set_memory_nx(start, (end - start) >> PAGE_SHIFT);
991 1011
992 rodata_test(); 1012 rodata_test();
993 1013
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 115f13ee40c9..24c1d3c30186 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -12,6 +12,7 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/vmalloc.h> 14#include <linux/vmalloc.h>
15#include <linux/mmiotrace.h>
15 16
16#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
17#include <asm/e820.h> 18#include <asm/e820.h>
@@ -122,10 +123,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
122{ 123{
123 unsigned long pfn, offset, vaddr; 124 unsigned long pfn, offset, vaddr;
124 resource_size_t last_addr; 125 resource_size_t last_addr;
126 const resource_size_t unaligned_phys_addr = phys_addr;
127 const unsigned long unaligned_size = size;
125 struct vm_struct *area; 128 struct vm_struct *area;
126 unsigned long new_prot_val; 129 unsigned long new_prot_val;
127 pgprot_t prot; 130 pgprot_t prot;
128 int retval; 131 int retval;
132 void __iomem *ret_addr;
129 133
130 /* Don't allow wraparound or zero size */ 134 /* Don't allow wraparound or zero size */
131 last_addr = phys_addr + size - 1; 135 last_addr = phys_addr + size - 1;
@@ -233,7 +237,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
233 return NULL; 237 return NULL;
234 } 238 }
235 239
236 return (void __iomem *) (vaddr + offset); 240 ret_addr = (void __iomem *) (vaddr + offset);
241 mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
242
243 return ret_addr;
237} 244}
238 245
239/** 246/**
@@ -348,6 +355,8 @@ void iounmap(volatile void __iomem *addr)
348 addr = (volatile void __iomem *) 355 addr = (volatile void __iomem *)
349 (PAGE_MASK & (unsigned long __force)addr); 356 (PAGE_MASK & (unsigned long __force)addr);
350 357
358 mmiotrace_iounmap(addr);
359
351 /* Use the vm area unlocked, assuming the caller 360 /* Use the vm area unlocked, assuming the caller
352 ensures there isn't another iounmap for the same address 361 ensures there isn't another iounmap for the same address
353 in parallel. Reuse of the virtual address is prevented by 362 in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
new file mode 100644
index 000000000000..93d82038af4b
--- /dev/null
+++ b/arch/x86/mm/kmmio.c
@@ -0,0 +1,510 @@
1/* Support for MMIO probes.
2 * Benfit many code from kprobes
3 * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
4 * 2007 Alexander Eichner
5 * 2008 Pekka Paalanen <pq@iki.fi>
6 */
7
8#include <linux/list.h>
9#include <linux/rculist.h>
10#include <linux/spinlock.h>
11#include <linux/hash.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/uaccess.h>
16#include <linux/ptrace.h>
17#include <linux/preempt.h>
18#include <linux/percpu.h>
19#include <linux/kdebug.h>
20#include <linux/mutex.h>
21#include <linux/io.h>
22#include <asm/cacheflush.h>
23#include <asm/tlbflush.h>
24#include <linux/errno.h>
25#include <asm/debugreg.h>
26#include <linux/mmiotrace.h>
27
28#define KMMIO_PAGE_HASH_BITS 4
29#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
30
31struct kmmio_fault_page {
32 struct list_head list;
33 struct kmmio_fault_page *release_next;
34 unsigned long page; /* location of the fault page */
35
36 /*
37 * Number of times this page has been registered as a part
38 * of a probe. If zero, page is disarmed and this may be freed.
39 * Used only by writers (RCU).
40 */
41 int count;
42};
43
44struct kmmio_delayed_release {
45 struct rcu_head rcu;
46 struct kmmio_fault_page *release_list;
47};
48
49struct kmmio_context {
50 struct kmmio_fault_page *fpage;
51 struct kmmio_probe *probe;
52 unsigned long saved_flags;
53 unsigned long addr;
54 int active;
55};
56
57static DEFINE_SPINLOCK(kmmio_lock);
58
59/* Protected by kmmio_lock */
60unsigned int kmmio_count;
61
62/* Read-protected by RCU, write-protected by kmmio_lock. */
63static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
64static LIST_HEAD(kmmio_probes);
65
66static struct list_head *kmmio_page_list(unsigned long page)
67{
68 return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
69}
70
71/* Accessed per-cpu */
72static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
73
74/*
75 * this is basically a dynamic stabbing problem:
76 * Could use the existing prio tree code or
77 * Possible better implementations:
78 * The Interval Skip List: A Data Structure for Finding All Intervals That
79 * Overlap a Point (might be simple)
80 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
81 */
82/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
83static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
84{
85 struct kmmio_probe *p;
86 list_for_each_entry_rcu(p, &kmmio_probes, list) {
87 if (addr >= p->addr && addr <= (p->addr + p->len))
88 return p;
89 }
90 return NULL;
91}
92
93/* You must be holding RCU read lock. */
94static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
95{
96 struct list_head *head;
97 struct kmmio_fault_page *p;
98
99 page &= PAGE_MASK;
100 head = kmmio_page_list(page);
101 list_for_each_entry_rcu(p, head, list) {
102 if (p->page == page)
103 return p;
104 }
105 return NULL;
106}
107
108static void set_page_present(unsigned long addr, bool present,
109 unsigned int *pglevel)
110{
111 pteval_t pteval;
112 pmdval_t pmdval;
113 unsigned int level;
114 pmd_t *pmd;
115 pte_t *pte = lookup_address(addr, &level);
116
117 if (!pte) {
118 pr_err("kmmio: no pte for page 0x%08lx\n", addr);
119 return;
120 }
121
122 if (pglevel)
123 *pglevel = level;
124
125 switch (level) {
126 case PG_LEVEL_2M:
127 pmd = (pmd_t *)pte;
128 pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
129 if (present)
130 pmdval |= _PAGE_PRESENT;
131 set_pmd(pmd, __pmd(pmdval));
132 break;
133
134 case PG_LEVEL_4K:
135 pteval = pte_val(*pte) & ~_PAGE_PRESENT;
136 if (present)
137 pteval |= _PAGE_PRESENT;
138 set_pte_atomic(pte, __pte(pteval));
139 break;
140
141 default:
142 pr_err("kmmio: unexpected page level 0x%x.\n", level);
143 return;
144 }
145
146 __flush_tlb_one(addr);
147}
148
149/** Mark the given page as not present. Access to it will trigger a fault. */
150static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
151{
152 set_page_present(page & PAGE_MASK, false, pglevel);
153}
154
155/** Mark the given page as present. */
156static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
157{
158 set_page_present(page & PAGE_MASK, true, pglevel);
159}
160
161/*
162 * This is being called from do_page_fault().
163 *
164 * We may be in an interrupt or a critical section. Also prefecthing may
165 * trigger a page fault. We may be in the middle of process switch.
166 * We cannot take any locks, because we could be executing especially
167 * within a kmmio critical section.
168 *
169 * Local interrupts are disabled, so preemption cannot happen.
170 * Do not enable interrupts, do not sleep, and watch out for other CPUs.
171 */
172/*
173 * Interrupts are disabled on entry as trap3 is an interrupt gate
174 * and they remain disabled thorough out this function.
175 */
176int kmmio_handler(struct pt_regs *regs, unsigned long addr)
177{
178 struct kmmio_context *ctx;
179 struct kmmio_fault_page *faultpage;
180 int ret = 0; /* default to fault not handled */
181
182 /*
183 * Preemption is now disabled to prevent process switch during
184 * single stepping. We can only handle one active kmmio trace
185 * per cpu, so ensure that we finish it before something else
186 * gets to run. We also hold the RCU read lock over single
187 * stepping to avoid looking up the probe and kmmio_fault_page
188 * again.
189 */
190 preempt_disable();
191 rcu_read_lock();
192
193 faultpage = get_kmmio_fault_page(addr);
194 if (!faultpage) {
195 /*
196 * Either this page fault is not caused by kmmio, or
197 * another CPU just pulled the kmmio probe from under
198 * our feet. The latter case should not be possible.
199 */
200 goto no_kmmio;
201 }
202
203 ctx = &get_cpu_var(kmmio_ctx);
204 if (ctx->active) {
205 disarm_kmmio_fault_page(faultpage->page, NULL);
206 if (addr == ctx->addr) {
207 /*
208 * On SMP we sometimes get recursive probe hits on the
209 * same address. Context is already saved, fall out.
210 */
211 pr_debug("kmmio: duplicate probe hit on CPU %d, for "
212 "address 0x%08lx.\n",
213 smp_processor_id(), addr);
214 ret = 1;
215 goto no_kmmio_ctx;
216 }
217 /*
218 * Prevent overwriting already in-flight context.
219 * This should not happen, let's hope disarming at least
220 * prevents a panic.
221 */
222 pr_emerg("kmmio: recursive probe hit on CPU %d, "
223 "for address 0x%08lx. Ignoring.\n",
224 smp_processor_id(), addr);
225 pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
226 ctx->addr);
227 goto no_kmmio_ctx;
228 }
229 ctx->active++;
230
231 ctx->fpage = faultpage;
232 ctx->probe = get_kmmio_probe(addr);
233 ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
234 ctx->addr = addr;
235
236 if (ctx->probe && ctx->probe->pre_handler)
237 ctx->probe->pre_handler(ctx->probe, regs, addr);
238
239 /*
240 * Enable single-stepping and disable interrupts for the faulting
241 * context. Local interrupts must not get enabled during stepping.
242 */
243 regs->flags |= X86_EFLAGS_TF;
244 regs->flags &= ~X86_EFLAGS_IF;
245
246 /* Now we set present bit in PTE and single step. */
247 disarm_kmmio_fault_page(ctx->fpage->page, NULL);
248
249 /*
250 * If another cpu accesses the same page while we are stepping,
251 * the access will not be caught. It will simply succeed and the
252 * only downside is we lose the event. If this becomes a problem,
253 * the user should drop to single cpu before tracing.
254 */
255
256 put_cpu_var(kmmio_ctx);
257 return 1; /* fault handled */
258
259no_kmmio_ctx:
260 put_cpu_var(kmmio_ctx);
261no_kmmio:
262 rcu_read_unlock();
263 preempt_enable_no_resched();
264 return ret;
265}
266
267/*
268 * Interrupts are disabled on entry as trap1 is an interrupt gate
269 * and they remain disabled thorough out this function.
270 * This must always get called as the pair to kmmio_handler().
271 */
272static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
273{
274 int ret = 0;
275 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
276
277 if (!ctx->active) {
278 pr_debug("kmmio: spurious debug trap on CPU %d.\n",
279 smp_processor_id());
280 goto out;
281 }
282
283 if (ctx->probe && ctx->probe->post_handler)
284 ctx->probe->post_handler(ctx->probe, condition, regs);
285
286 arm_kmmio_fault_page(ctx->fpage->page, NULL);
287
288 regs->flags &= ~X86_EFLAGS_TF;
289 regs->flags |= ctx->saved_flags;
290
291 /* These were acquired in kmmio_handler(). */
292 ctx->active--;
293 BUG_ON(ctx->active);
294 rcu_read_unlock();
295 preempt_enable_no_resched();
296
297 /*
298 * if somebody else is singlestepping across a probe point, flags
299 * will have TF set, in which case, continue the remaining processing
300 * of do_debug, as if this is not a probe hit.
301 */
302 if (!(regs->flags & X86_EFLAGS_TF))
303 ret = 1;
304out:
305 put_cpu_var(kmmio_ctx);
306 return ret;
307}
308
309/* You must be holding kmmio_lock. */
310static int add_kmmio_fault_page(unsigned long page)
311{
312 struct kmmio_fault_page *f;
313
314 page &= PAGE_MASK;
315 f = get_kmmio_fault_page(page);
316 if (f) {
317 if (!f->count)
318 arm_kmmio_fault_page(f->page, NULL);
319 f->count++;
320 return 0;
321 }
322
323 f = kmalloc(sizeof(*f), GFP_ATOMIC);
324 if (!f)
325 return -1;
326
327 f->count = 1;
328 f->page = page;
329 list_add_rcu(&f->list, kmmio_page_list(f->page));
330
331 arm_kmmio_fault_page(f->page, NULL);
332
333 return 0;
334}
335
336/* You must be holding kmmio_lock. */
337static void release_kmmio_fault_page(unsigned long page,
338 struct kmmio_fault_page **release_list)
339{
340 struct kmmio_fault_page *f;
341
342 page &= PAGE_MASK;
343 f = get_kmmio_fault_page(page);
344 if (!f)
345 return;
346
347 f->count--;
348 BUG_ON(f->count < 0);
349 if (!f->count) {
350 disarm_kmmio_fault_page(f->page, NULL);
351 f->release_next = *release_list;
352 *release_list = f;
353 }
354}
355
356/*
357 * With page-unaligned ioremaps, one or two armed pages may contain
358 * addresses from outside the intended mapping. Events for these addresses
359 * are currently silently dropped. The events may result only from programming
360 * mistakes by accessing addresses before the beginning or past the end of a
361 * mapping.
362 */
363int register_kmmio_probe(struct kmmio_probe *p)
364{
365 unsigned long flags;
366 int ret = 0;
367 unsigned long size = 0;
368 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
369
370 spin_lock_irqsave(&kmmio_lock, flags);
371 if (get_kmmio_probe(p->addr)) {
372 ret = -EEXIST;
373 goto out;
374 }
375 kmmio_count++;
376 list_add_rcu(&p->list, &kmmio_probes);
377 while (size < size_lim) {
378 if (add_kmmio_fault_page(p->addr + size))
379 pr_err("kmmio: Unable to set page fault.\n");
380 size += PAGE_SIZE;
381 }
382out:
383 spin_unlock_irqrestore(&kmmio_lock, flags);
384 /*
385 * XXX: What should I do here?
386 * Here was a call to global_flush_tlb(), but it does not exist
387 * anymore. It seems it's not needed after all.
388 */
389 return ret;
390}
391EXPORT_SYMBOL(register_kmmio_probe);
392
393static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
394{
395 struct kmmio_delayed_release *dr = container_of(
396 head,
397 struct kmmio_delayed_release,
398 rcu);
399 struct kmmio_fault_page *p = dr->release_list;
400 while (p) {
401 struct kmmio_fault_page *next = p->release_next;
402 BUG_ON(p->count);
403 kfree(p);
404 p = next;
405 }
406 kfree(dr);
407}
408
409static void remove_kmmio_fault_pages(struct rcu_head *head)
410{
411 struct kmmio_delayed_release *dr = container_of(
412 head,
413 struct kmmio_delayed_release,
414 rcu);
415 struct kmmio_fault_page *p = dr->release_list;
416 struct kmmio_fault_page **prevp = &dr->release_list;
417 unsigned long flags;
418 spin_lock_irqsave(&kmmio_lock, flags);
419 while (p) {
420 if (!p->count)
421 list_del_rcu(&p->list);
422 else
423 *prevp = p->release_next;
424 prevp = &p->release_next;
425 p = p->release_next;
426 }
427 spin_unlock_irqrestore(&kmmio_lock, flags);
428 /* This is the real RCU destroy call. */
429 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
430}
431
432/*
433 * Remove a kmmio probe. You have to synchronize_rcu() before you can be
434 * sure that the callbacks will not be called anymore. Only after that
435 * you may actually release your struct kmmio_probe.
436 *
437 * Unregistering a kmmio fault page has three steps:
438 * 1. release_kmmio_fault_page()
439 * Disarm the page, wait a grace period to let all faults finish.
440 * 2. remove_kmmio_fault_pages()
441 * Remove the pages from kmmio_page_table.
442 * 3. rcu_free_kmmio_fault_pages()
443 * Actally free the kmmio_fault_page structs as with RCU.
444 */
445void unregister_kmmio_probe(struct kmmio_probe *p)
446{
447 unsigned long flags;
448 unsigned long size = 0;
449 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
450 struct kmmio_fault_page *release_list = NULL;
451 struct kmmio_delayed_release *drelease;
452
453 spin_lock_irqsave(&kmmio_lock, flags);
454 while (size < size_lim) {
455 release_kmmio_fault_page(p->addr + size, &release_list);
456 size += PAGE_SIZE;
457 }
458 list_del_rcu(&p->list);
459 kmmio_count--;
460 spin_unlock_irqrestore(&kmmio_lock, flags);
461
462 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
463 if (!drelease) {
464 pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
465 return;
466 }
467 drelease->release_list = release_list;
468
469 /*
470 * This is not really RCU here. We have just disarmed a set of
471 * pages so that they cannot trigger page faults anymore. However,
472 * we cannot remove the pages from kmmio_page_table,
473 * because a probe hit might be in flight on another CPU. The
474 * pages are collected into a list, and they will be removed from
475 * kmmio_page_table when it is certain that no probe hit related to
476 * these pages can be in flight. RCU grace period sounds like a
477 * good choice.
478 *
479 * If we removed the pages too early, kmmio page fault handler might
480 * not find the respective kmmio_fault_page and determine it's not
481 * a kmmio fault, when it actually is. This would lead to madness.
482 */
483 call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
484}
485EXPORT_SYMBOL(unregister_kmmio_probe);
486
487static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
488 void *args)
489{
490 struct die_args *arg = args;
491
492 if (val == DIE_DEBUG && (arg->err & DR_STEP))
493 if (post_kmmio_handler(arg->err, arg->regs) == 1)
494 return NOTIFY_STOP;
495
496 return NOTIFY_DONE;
497}
498
499static struct notifier_block nb_die = {
500 .notifier_call = kmmio_die_notifier
501};
502
503static int __init init_kmmio(void)
504{
505 int i;
506 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
507 INIT_LIST_HEAD(&kmmio_page_table[i]);
508 return register_die_notifier(&nb_die);
509}
510fs_initcall(init_kmmio); /* should be before device_initcall() */
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
new file mode 100644
index 000000000000..e7397e108beb
--- /dev/null
+++ b/arch/x86/mm/mmio-mod.c
@@ -0,0 +1,515 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2005
17 * Jeff Muizelaar, 2006, 2007
18 * Pekka Paalanen, 2008 <pq@iki.fi>
19 *
20 * Derived from the read-mod example from relay-examples by Tom Zanussi.
21 */
22#define DEBUG 1
23
24#include <linux/module.h>
25#include <linux/debugfs.h>
26#include <linux/uaccess.h>
27#include <linux/io.h>
28#include <linux/version.h>
29#include <linux/kallsyms.h>
30#include <asm/pgtable.h>
31#include <linux/mmiotrace.h>
32#include <asm/e820.h> /* for ISA_START_ADDRESS */
33#include <asm/atomic.h>
34#include <linux/percpu.h>
35#include <linux/cpu.h>
36
37#include "pf_in.h"
38
39#define NAME "mmiotrace: "
40
41struct trap_reason {
42 unsigned long addr;
43 unsigned long ip;
44 enum reason_type type;
45 int active_traces;
46};
47
48struct remap_trace {
49 struct list_head list;
50 struct kmmio_probe probe;
51 resource_size_t phys;
52 unsigned long id;
53};
54
55/* Accessed per-cpu. */
56static DEFINE_PER_CPU(struct trap_reason, pf_reason);
57static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace);
58
59#if 0 /* XXX: no way gather this info anymore */
60/* Access to this is not per-cpu. */
61static DEFINE_PER_CPU(atomic_t, dropped);
62#endif
63
64static struct dentry *marker_file;
65
66static DEFINE_MUTEX(mmiotrace_mutex);
67static DEFINE_SPINLOCK(trace_lock);
68static atomic_t mmiotrace_enabled;
69static LIST_HEAD(trace_list); /* struct remap_trace */
70
71/*
72 * Locking in this file:
73 * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections.
74 * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex
75 * and trace_lock.
76 * - Routines depending on is_enabled() must take trace_lock.
77 * - trace_list users must hold trace_lock.
78 * - is_enabled() guarantees that mmio_trace_record is allowed.
79 * - pre/post callbacks assume the effect of is_enabled() being true.
80 */
81
82/* module parameters */
83static unsigned long filter_offset;
84static int nommiotrace;
85static int trace_pc;
86
87module_param(filter_offset, ulong, 0);
88module_param(nommiotrace, bool, 0);
89module_param(trace_pc, bool, 0);
90
91MODULE_PARM_DESC(filter_offset, "Start address of traced mappings.");
92MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing.");
93MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions.");
94
95static bool is_enabled(void)
96{
97 return atomic_read(&mmiotrace_enabled);
98}
99
100#if 0 /* XXX: needs rewrite */
101/*
102 * Write callback for the debugfs entry:
103 * Read a marker and write it to the mmio trace log
104 */
105static ssize_t write_marker(struct file *file, const char __user *buffer,
106 size_t count, loff_t *ppos)
107{
108 char *event = NULL;
109 struct mm_io_header *headp;
110 ssize_t len = (count > 65535) ? 65535 : count;
111
112 event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
113 if (!event)
114 return -ENOMEM;
115
116 headp = (struct mm_io_header *)event;
117 headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
118 headp->data_len = len;
119
120 if (copy_from_user(event + sizeof(*headp), buffer, len)) {
121 kfree(event);
122 return -EFAULT;
123 }
124
125 spin_lock_irq(&trace_lock);
126#if 0 /* XXX: convert this to use tracing */
127 if (is_enabled())
128 relay_write(chan, event, sizeof(*headp) + len);
129 else
130#endif
131 len = -EINVAL;
132 spin_unlock_irq(&trace_lock);
133 kfree(event);
134 return len;
135}
136#endif
137
138static void print_pte(unsigned long address)
139{
140 unsigned int level;
141 pte_t *pte = lookup_address(address, &level);
142
143 if (!pte) {
144 pr_err(NAME "Error in %s: no pte for page 0x%08lx\n",
145 __func__, address);
146 return;
147 }
148
149 if (level == PG_LEVEL_2M) {
150 pr_emerg(NAME "4MB pages are not currently supported: "
151 "0x%08lx\n", address);
152 BUG();
153 }
154 pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address,
155 (unsigned long long)pte_val(*pte),
156 (unsigned long long)pte_val(*pte) & _PAGE_PRESENT);
157}
158
159/*
160 * For some reason the pre/post pairs have been called in an
161 * unmatched order. Report and die.
162 */
163static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
164{
165 const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
166 pr_emerg(NAME "unexpected fault for address: 0x%08lx, "
167 "last fault for address: 0x%08lx\n",
168 addr, my_reason->addr);
169 print_pte(addr);
170 print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip);
171 print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip);
172#ifdef __i386__
173 pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
174 regs->ax, regs->bx, regs->cx, regs->dx);
175 pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
176 regs->si, regs->di, regs->bp, regs->sp);
177#else
178 pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n",
179 regs->ax, regs->cx, regs->dx);
180 pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
181 regs->si, regs->di, regs->bp, regs->sp);
182#endif
183 put_cpu_var(pf_reason);
184 BUG();
185}
186
187static void pre(struct kmmio_probe *p, struct pt_regs *regs,
188 unsigned long addr)
189{
190 struct trap_reason *my_reason = &get_cpu_var(pf_reason);
191 struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
192 const unsigned long instptr = instruction_pointer(regs);
193 const enum reason_type type = get_ins_type(instptr);
194 struct remap_trace *trace = p->private;
195
196 /* it doesn't make sense to have more than one active trace per cpu */
197 if (my_reason->active_traces)
198 die_kmmio_nesting_error(regs, addr);
199 else
200 my_reason->active_traces++;
201
202 my_reason->type = type;
203 my_reason->addr = addr;
204 my_reason->ip = instptr;
205
206 my_trace->phys = addr - trace->probe.addr + trace->phys;
207 my_trace->map_id = trace->id;
208
209 /*
210 * Only record the program counter when requested.
211 * It may taint clean-room reverse engineering.
212 */
213 if (trace_pc)
214 my_trace->pc = instptr;
215 else
216 my_trace->pc = 0;
217
218 /*
219 * XXX: the timestamp recorded will be *after* the tracing has been
220 * done, not at the time we hit the instruction. SMP implications
221 * on event ordering?
222 */
223
224 switch (type) {
225 case REG_READ:
226 my_trace->opcode = MMIO_READ;
227 my_trace->width = get_ins_mem_width(instptr);
228 break;
229 case REG_WRITE:
230 my_trace->opcode = MMIO_WRITE;
231 my_trace->width = get_ins_mem_width(instptr);
232 my_trace->value = get_ins_reg_val(instptr, regs);
233 break;
234 case IMM_WRITE:
235 my_trace->opcode = MMIO_WRITE;
236 my_trace->width = get_ins_mem_width(instptr);
237 my_trace->value = get_ins_imm_val(instptr);
238 break;
239 default:
240 {
241 unsigned char *ip = (unsigned char *)instptr;
242 my_trace->opcode = MMIO_UNKNOWN_OP;
243 my_trace->width = 0;
244 my_trace->value = (*ip) << 16 | *(ip + 1) << 8 |
245 *(ip + 2);
246 }
247 }
248 put_cpu_var(cpu_trace);
249 put_cpu_var(pf_reason);
250}
251
252static void post(struct kmmio_probe *p, unsigned long condition,
253 struct pt_regs *regs)
254{
255 struct trap_reason *my_reason = &get_cpu_var(pf_reason);
256 struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
257
258 /* this should always return the active_trace count to 0 */
259 my_reason->active_traces--;
260 if (my_reason->active_traces) {
261 pr_emerg(NAME "unexpected post handler");
262 BUG();
263 }
264
265 switch (my_reason->type) {
266 case REG_READ:
267 my_trace->value = get_ins_reg_val(my_reason->ip, regs);
268 break;
269 default:
270 break;
271 }
272
273 mmio_trace_rw(my_trace);
274 put_cpu_var(cpu_trace);
275 put_cpu_var(pf_reason);
276}
277
278static void ioremap_trace_core(resource_size_t offset, unsigned long size,
279 void __iomem *addr)
280{
281 static atomic_t next_id;
282 struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL);
283 /* These are page-unaligned. */
284 struct mmiotrace_map map = {
285 .phys = offset,
286 .virt = (unsigned long)addr,
287 .len = size,
288 .opcode = MMIO_PROBE
289 };
290
291 if (!trace) {
292 pr_err(NAME "kmalloc failed in ioremap\n");
293 return;
294 }
295
296 *trace = (struct remap_trace) {
297 .probe = {
298 .addr = (unsigned long)addr,
299 .len = size,
300 .pre_handler = pre,
301 .post_handler = post,
302 .private = trace
303 },
304 .phys = offset,
305 .id = atomic_inc_return(&next_id)
306 };
307 map.map_id = trace->id;
308
309 spin_lock_irq(&trace_lock);
310 if (!is_enabled())
311 goto not_enabled;
312
313 mmio_trace_mapping(&map);
314 list_add_tail(&trace->list, &trace_list);
315 if (!nommiotrace)
316 register_kmmio_probe(&trace->probe);
317
318not_enabled:
319 spin_unlock_irq(&trace_lock);
320}
321
322void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
323 void __iomem *addr)
324{
325 if (!is_enabled()) /* recheck and proper locking in *_core() */
326 return;
327
328 pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n",
329 (unsigned long long)offset, size, addr);
330 if ((filter_offset) && (offset != filter_offset))
331 return;
332 ioremap_trace_core(offset, size, addr);
333}
334
335static void iounmap_trace_core(volatile void __iomem *addr)
336{
337 struct mmiotrace_map map = {
338 .phys = 0,
339 .virt = (unsigned long)addr,
340 .len = 0,
341 .opcode = MMIO_UNPROBE
342 };
343 struct remap_trace *trace;
344 struct remap_trace *tmp;
345 struct remap_trace *found_trace = NULL;
346
347 pr_debug(NAME "Unmapping %p.\n", addr);
348
349 spin_lock_irq(&trace_lock);
350 if (!is_enabled())
351 goto not_enabled;
352
353 list_for_each_entry_safe(trace, tmp, &trace_list, list) {
354 if ((unsigned long)addr == trace->probe.addr) {
355 if (!nommiotrace)
356 unregister_kmmio_probe(&trace->probe);
357 list_del(&trace->list);
358 found_trace = trace;
359 break;
360 }
361 }
362 map.map_id = (found_trace) ? found_trace->id : -1;
363 mmio_trace_mapping(&map);
364
365not_enabled:
366 spin_unlock_irq(&trace_lock);
367 if (found_trace) {
368 synchronize_rcu(); /* unregister_kmmio_probe() requirement */
369 kfree(found_trace);
370 }
371}
372
373void mmiotrace_iounmap(volatile void __iomem *addr)
374{
375 might_sleep();
376 if (is_enabled()) /* recheck and proper locking in *_core() */
377 iounmap_trace_core(addr);
378}
379
380static void clear_trace_list(void)
381{
382 struct remap_trace *trace;
383 struct remap_trace *tmp;
384
385 /*
386 * No locking required, because the caller ensures we are in a
387 * critical section via mutex, and is_enabled() is false,
388 * i.e. nothing can traverse or modify this list.
389 * Caller also ensures is_enabled() cannot change.
390 */
391 list_for_each_entry(trace, &trace_list, list) {
392 pr_notice(NAME "purging non-iounmapped "
393 "trace @0x%08lx, size 0x%lx.\n",
394 trace->probe.addr, trace->probe.len);
395 if (!nommiotrace)
396 unregister_kmmio_probe(&trace->probe);
397 }
398 synchronize_rcu(); /* unregister_kmmio_probe() requirement */
399
400 list_for_each_entry_safe(trace, tmp, &trace_list, list) {
401 list_del(&trace->list);
402 kfree(trace);
403 }
404}
405
406#ifdef CONFIG_HOTPLUG_CPU
407static cpumask_t downed_cpus;
408
409static void enter_uniprocessor(void)
410{
411 int cpu;
412 int err;
413
414 get_online_cpus();
415 downed_cpus = cpu_online_map;
416 cpu_clear(first_cpu(cpu_online_map), downed_cpus);
417 if (num_online_cpus() > 1)
418 pr_notice(NAME "Disabling non-boot CPUs...\n");
419 put_online_cpus();
420
421 for_each_cpu_mask(cpu, downed_cpus) {
422 err = cpu_down(cpu);
423 if (!err)
424 pr_info(NAME "CPU%d is down.\n", cpu);
425 else
426 pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err);
427 }
428 if (num_online_cpus() > 1)
429 pr_warning(NAME "multiple CPUs still online, "
430 "may miss events.\n");
431}
432
433static void leave_uniprocessor(void)
434{
435 int cpu;
436 int err;
437
438 if (cpus_weight(downed_cpus) == 0)
439 return;
440 pr_notice(NAME "Re-enabling CPUs...\n");
441 for_each_cpu_mask(cpu, downed_cpus) {
442 err = cpu_up(cpu);
443 if (!err)
444 pr_info(NAME "enabled CPU%d.\n", cpu);
445 else
446 pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err);
447 }
448}
449
450#else /* !CONFIG_HOTPLUG_CPU */
451static void enter_uniprocessor(void)
452{
453 if (num_online_cpus() > 1)
454 pr_warning(NAME "multiple CPUs are online, may miss events. "
455 "Suggest booting with maxcpus=1 kernel argument.\n");
456}
457
458static void leave_uniprocessor(void)
459{
460}
461#endif
462
463#if 0 /* XXX: out of order */
464static struct file_operations fops_marker = {
465 .owner = THIS_MODULE,
466 .write = write_marker
467};
468#endif
469
470void enable_mmiotrace(void)
471{
472 mutex_lock(&mmiotrace_mutex);
473 if (is_enabled())
474 goto out;
475
476#if 0 /* XXX: tracing does not support text entries */
477 marker_file = debugfs_create_file("marker", 0660, dir, NULL,
478 &fops_marker);
479 if (!marker_file)
480 pr_err(NAME "marker file creation failed.\n");
481#endif
482
483 if (nommiotrace)
484 pr_info(NAME "MMIO tracing disabled.\n");
485 enter_uniprocessor();
486 spin_lock_irq(&trace_lock);
487 atomic_inc(&mmiotrace_enabled);
488 spin_unlock_irq(&trace_lock);
489 pr_info(NAME "enabled.\n");
490out:
491 mutex_unlock(&mmiotrace_mutex);
492}
493
494void disable_mmiotrace(void)
495{
496 mutex_lock(&mmiotrace_mutex);
497 if (!is_enabled())
498 goto out;
499
500 spin_lock_irq(&trace_lock);
501 atomic_dec(&mmiotrace_enabled);
502 BUG_ON(is_enabled());
503 spin_unlock_irq(&trace_lock);
504
505 clear_trace_list(); /* guarantees: no more kmmio callbacks */
506 leave_uniprocessor();
507 if (marker_file) {
508 debugfs_remove(marker_file);
509 marker_file = NULL;
510 }
511
512 pr_info(NAME "disabled.\n");
513out:
514 mutex_unlock(&mmiotrace_mutex);
515}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 0389cb8f6b1a..65c6e46bf059 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -141,7 +141,7 @@ static void cpa_flush_all(unsigned long cache)
141{ 141{
142 BUG_ON(irqs_disabled()); 142 BUG_ON(irqs_disabled());
143 143
144 on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1); 144 on_each_cpu(__cpa_flush_all, (void *) cache, 1);
145} 145}
146 146
147static void __cpa_flush_range(void *arg) 147static void __cpa_flush_range(void *arg)
@@ -162,7 +162,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
162 BUG_ON(irqs_disabled()); 162 BUG_ON(irqs_disabled());
163 WARN_ON(PAGE_ALIGN(start) != start); 163 WARN_ON(PAGE_ALIGN(start) != start);
164 164
165 on_each_cpu(__cpa_flush_range, NULL, 1, 1); 165 on_each_cpu(__cpa_flush_range, NULL, 1);
166 166
167 if (!cache) 167 if (!cache)
168 return; 168 return;
@@ -262,6 +262,7 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
262 262
263 return pte_offset_kernel(pmd, address); 263 return pte_offset_kernel(pmd, address);
264} 264}
265EXPORT_SYMBOL_GPL(lookup_address);
265 266
266/* 267/*
267 * Set the new pmd in all the pgds we know about: 268 * Set the new pmd in all the pgds we know about:
@@ -658,11 +659,11 @@ static int cpa_process_alias(struct cpa_data *cpa)
658 struct cpa_data alias_cpa; 659 struct cpa_data alias_cpa;
659 int ret = 0; 660 int ret = 0;
660 661
661 if (cpa->pfn > max_pfn_mapped) 662 if (cpa->pfn >= max_pfn_mapped)
662 return 0; 663 return 0;
663 664
664#ifdef CONFIG_X86_64 665#ifdef CONFIG_X86_64
665 if (cpa->pfn > max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT))) 666 if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
666 return 0; 667 return 0;
667#endif 668#endif
668 /* 669 /*
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 749766c3c5cd..d4585077977a 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -449,8 +449,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
449 if (retval < 0) 449 if (retval < 0)
450 return 0; 450 return 0;
451 451
452 if (((pfn <= max_low_pfn_mapped) || 452 if (((pfn < max_low_pfn_mapped) ||
453 (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn <= max_pfn_mapped)) && 453 (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
454 ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { 454 ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
455 free_memtype(offset, offset + size); 455 free_memtype(offset, offset + size);
456 printk(KERN_INFO 456 printk(KERN_INFO
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
new file mode 100644
index 000000000000..efa1911e20ca
--- /dev/null
+++ b/arch/x86/mm/pf_in.c
@@ -0,0 +1,489 @@
1/*
2 * Fault Injection Test harness (FI)
3 * Copyright (C) Intel Crop.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
18 * USA.
19 *
20 */
21
22/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp
23 * Copyright by Intel Crop., 2002
24 * Louis Zhuang (louis.zhuang@intel.com)
25 *
26 * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007
27 */
28
29#include <linux/module.h>
30#include <linux/ptrace.h> /* struct pt_regs */
31#include "pf_in.h"
32
33#ifdef __i386__
34/* IA32 Manual 3, 2-1 */
35static unsigned char prefix_codes[] = {
36 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64,
37 0x65, 0x2E, 0x3E, 0x66, 0x67
38};
39/* IA32 Manual 3, 3-432*/
40static unsigned int reg_rop[] = {
41 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
42};
43static unsigned int reg_wop[] = { 0x88, 0x89 };
44static unsigned int imm_wop[] = { 0xC6, 0xC7 };
45/* IA32 Manual 3, 3-432*/
46static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 };
47static unsigned int rw32[] = {
48 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
49};
50static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F };
51static unsigned int mw16[] = { 0xB70F, 0xBF0F };
52static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 };
53static unsigned int mw64[] = {};
54#else /* not __i386__ */
55static unsigned char prefix_codes[] = {
56 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36,
57 0xF0, 0xF3, 0xF2,
58 /* REX Prefixes */
59 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
60 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f
61};
62/* AMD64 Manual 3, Appendix A*/
63static unsigned int reg_rop[] = {
64 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
65};
66static unsigned int reg_wop[] = { 0x88, 0x89 };
67static unsigned int imm_wop[] = { 0xC6, 0xC7 };
68static unsigned int rw8[] = { 0xC6, 0x88, 0x8A };
69static unsigned int rw32[] = {
70 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
71};
72/* 8 bit only */
73static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F };
74/* 16 bit only */
75static unsigned int mw16[] = { 0xB70F, 0xBF0F };
76/* 16 or 32 bit */
77static unsigned int mw32[] = { 0xC7 };
78/* 16, 32 or 64 bit */
79static unsigned int mw64[] = { 0x89, 0x8B };
80#endif /* not __i386__ */
81
82static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged,
83 int *rexr)
84{
85 int i;
86 unsigned char *p = addr;
87 *shorted = 0;
88 *enlarged = 0;
89 *rexr = 0;
90
91restart:
92 for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) {
93 if (*p == prefix_codes[i]) {
94 if (*p == 0x66)
95 *shorted = 1;
96#ifdef __amd64__
97 if ((*p & 0xf8) == 0x48)
98 *enlarged = 1;
99 if ((*p & 0xf4) == 0x44)
100 *rexr = 1;
101#endif
102 p++;
103 goto restart;
104 }
105 }
106
107 return (p - addr);
108}
109
110static int get_opcode(unsigned char *addr, unsigned int *opcode)
111{
112 int len;
113
114 if (*addr == 0x0F) {
115 /* 0x0F is extension instruction */
116 *opcode = *(unsigned short *)addr;
117 len = 2;
118 } else {
119 *opcode = *addr;
120 len = 1;
121 }
122
123 return len;
124}
125
126#define CHECK_OP_TYPE(opcode, array, type) \
127 for (i = 0; i < ARRAY_SIZE(array); i++) { \
128 if (array[i] == opcode) { \
129 rv = type; \
130 goto exit; \
131 } \
132 }
133
134enum reason_type get_ins_type(unsigned long ins_addr)
135{
136 unsigned int opcode;
137 unsigned char *p;
138 int shorted, enlarged, rexr;
139 int i;
140 enum reason_type rv = OTHERS;
141
142 p = (unsigned char *)ins_addr;
143 p += skip_prefix(p, &shorted, &enlarged, &rexr);
144 p += get_opcode(p, &opcode);
145
146 CHECK_OP_TYPE(opcode, reg_rop, REG_READ);
147 CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE);
148 CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE);
149
150exit:
151 return rv;
152}
153#undef CHECK_OP_TYPE
154
155static unsigned int get_ins_reg_width(unsigned long ins_addr)
156{
157 unsigned int opcode;
158 unsigned char *p;
159 int i, shorted, enlarged, rexr;
160
161 p = (unsigned char *)ins_addr;
162 p += skip_prefix(p, &shorted, &enlarged, &rexr);
163 p += get_opcode(p, &opcode);
164
165 for (i = 0; i < ARRAY_SIZE(rw8); i++)
166 if (rw8[i] == opcode)
167 return 1;
168
169 for (i = 0; i < ARRAY_SIZE(rw32); i++)
170 if (rw32[i] == opcode)
171 return (shorted ? 2 : (enlarged ? 8 : 4));
172
173 printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
174 return 0;
175}
176
177unsigned int get_ins_mem_width(unsigned long ins_addr)
178{
179 unsigned int opcode;
180 unsigned char *p;
181 int i, shorted, enlarged, rexr;
182
183 p = (unsigned char *)ins_addr;
184 p += skip_prefix(p, &shorted, &enlarged, &rexr);
185 p += get_opcode(p, &opcode);
186
187 for (i = 0; i < ARRAY_SIZE(mw8); i++)
188 if (mw8[i] == opcode)
189 return 1;
190
191 for (i = 0; i < ARRAY_SIZE(mw16); i++)
192 if (mw16[i] == opcode)
193 return 2;
194
195 for (i = 0; i < ARRAY_SIZE(mw32); i++)
196 if (mw32[i] == opcode)
197 return shorted ? 2 : 4;
198
199 for (i = 0; i < ARRAY_SIZE(mw64); i++)
200 if (mw64[i] == opcode)
201 return shorted ? 2 : (enlarged ? 8 : 4);
202
203 printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
204 return 0;
205}
206
207/*
208 * Define register ident in mod/rm byte.
209 * Note: these are NOT the same as in ptrace-abi.h.
210 */
211enum {
212 arg_AL = 0,
213 arg_CL = 1,
214 arg_DL = 2,
215 arg_BL = 3,
216 arg_AH = 4,
217 arg_CH = 5,
218 arg_DH = 6,
219 arg_BH = 7,
220
221 arg_AX = 0,
222 arg_CX = 1,
223 arg_DX = 2,
224 arg_BX = 3,
225 arg_SP = 4,
226 arg_BP = 5,
227 arg_SI = 6,
228 arg_DI = 7,
229#ifdef __amd64__
230 arg_R8 = 8,
231 arg_R9 = 9,
232 arg_R10 = 10,
233 arg_R11 = 11,
234 arg_R12 = 12,
235 arg_R13 = 13,
236 arg_R14 = 14,
237 arg_R15 = 15
238#endif
239};
240
241static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
242{
243 unsigned char *rv = NULL;
244
245 switch (no) {
246 case arg_AL:
247 rv = (unsigned char *)&regs->ax;
248 break;
249 case arg_BL:
250 rv = (unsigned char *)&regs->bx;
251 break;
252 case arg_CL:
253 rv = (unsigned char *)&regs->cx;
254 break;
255 case arg_DL:
256 rv = (unsigned char *)&regs->dx;
257 break;
258 case arg_AH:
259 rv = 1 + (unsigned char *)&regs->ax;
260 break;
261 case arg_BH:
262 rv = 1 + (unsigned char *)&regs->bx;
263 break;
264 case arg_CH:
265 rv = 1 + (unsigned char *)&regs->cx;
266 break;
267 case arg_DH:
268 rv = 1 + (unsigned char *)&regs->dx;
269 break;
270#ifdef __amd64__
271 case arg_R8:
272 rv = (unsigned char *)&regs->r8;
273 break;
274 case arg_R9:
275 rv = (unsigned char *)&regs->r9;
276 break;
277 case arg_R10:
278 rv = (unsigned char *)&regs->r10;
279 break;
280 case arg_R11:
281 rv = (unsigned char *)&regs->r11;
282 break;
283 case arg_R12:
284 rv = (unsigned char *)&regs->r12;
285 break;
286 case arg_R13:
287 rv = (unsigned char *)&regs->r13;
288 break;
289 case arg_R14:
290 rv = (unsigned char *)&regs->r14;
291 break;
292 case arg_R15:
293 rv = (unsigned char *)&regs->r15;
294 break;
295#endif
296 default:
297 printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
298 break;
299 }
300 return rv;
301}
302
303static unsigned long *get_reg_w32(int no, struct pt_regs *regs)
304{
305 unsigned long *rv = NULL;
306
307 switch (no) {
308 case arg_AX:
309 rv = &regs->ax;
310 break;
311 case arg_BX:
312 rv = &regs->bx;
313 break;
314 case arg_CX:
315 rv = &regs->cx;
316 break;
317 case arg_DX:
318 rv = &regs->dx;
319 break;
320 case arg_SP:
321 rv = &regs->sp;
322 break;
323 case arg_BP:
324 rv = &regs->bp;
325 break;
326 case arg_SI:
327 rv = &regs->si;
328 break;
329 case arg_DI:
330 rv = &regs->di;
331 break;
332#ifdef __amd64__
333 case arg_R8:
334 rv = &regs->r8;
335 break;
336 case arg_R9:
337 rv = &regs->r9;
338 break;
339 case arg_R10:
340 rv = &regs->r10;
341 break;
342 case arg_R11:
343 rv = &regs->r11;
344 break;
345 case arg_R12:
346 rv = &regs->r12;
347 break;
348 case arg_R13:
349 rv = &regs->r13;
350 break;
351 case arg_R14:
352 rv = &regs->r14;
353 break;
354 case arg_R15:
355 rv = &regs->r15;
356 break;
357#endif
358 default:
359 printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
360 }
361
362 return rv;
363}
364
365unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
366{
367 unsigned int opcode;
368 unsigned char mod_rm;
369 int reg;
370 unsigned char *p;
371 int i, shorted, enlarged, rexr;
372 unsigned long rv;
373
374 p = (unsigned char *)ins_addr;
375 p += skip_prefix(p, &shorted, &enlarged, &rexr);
376 p += get_opcode(p, &opcode);
377 for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
378 if (reg_rop[i] == opcode) {
379 rv = REG_READ;
380 goto do_work;
381 }
382
383 for (i = 0; i < ARRAY_SIZE(reg_wop); i++)
384 if (reg_wop[i] == opcode) {
385 rv = REG_WRITE;
386 goto do_work;
387 }
388
389 printk(KERN_ERR "mmiotrace: Not a register instruction, opcode "
390 "0x%02x\n", opcode);
391 goto err;
392
393do_work:
394 mod_rm = *p;
395 reg = ((mod_rm >> 3) & 0x7) | (rexr << 3);
396 switch (get_ins_reg_width(ins_addr)) {
397 case 1:
398 return *get_reg_w8(reg, regs);
399
400 case 2:
401 return *(unsigned short *)get_reg_w32(reg, regs);
402
403 case 4:
404 return *(unsigned int *)get_reg_w32(reg, regs);
405
406#ifdef __amd64__
407 case 8:
408 return *(unsigned long *)get_reg_w32(reg, regs);
409#endif
410
411 default:
412 printk(KERN_ERR "mmiotrace: Error width# %d\n", reg);
413 }
414
415err:
416 return 0;
417}
418
419unsigned long get_ins_imm_val(unsigned long ins_addr)
420{
421 unsigned int opcode;
422 unsigned char mod_rm;
423 unsigned char mod;
424 unsigned char *p;
425 int i, shorted, enlarged, rexr;
426 unsigned long rv;
427
428 p = (unsigned char *)ins_addr;
429 p += skip_prefix(p, &shorted, &enlarged, &rexr);
430 p += get_opcode(p, &opcode);
431 for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
432 if (imm_wop[i] == opcode) {
433 rv = IMM_WRITE;
434 goto do_work;
435 }
436
437 printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode "
438 "0x%02x\n", opcode);
439 goto err;
440
441do_work:
442 mod_rm = *p;
443 mod = mod_rm >> 6;
444 p++;
445 switch (mod) {
446 case 0:
447 /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */
448 /* AMD64: XXX Check for address size prefix? */
449 if ((mod_rm & 0x7) == 0x5)
450 p += 4;
451 break;
452
453 case 1:
454 p += 1;
455 break;
456
457 case 2:
458 p += 4;
459 break;
460
461 case 3:
462 default:
463 printk(KERN_ERR "mmiotrace: not a memory access instruction "
464 "at 0x%lx, rm_mod=0x%02x\n",
465 ins_addr, mod_rm);
466 }
467
468 switch (get_ins_reg_width(ins_addr)) {
469 case 1:
470 return *(unsigned char *)p;
471
472 case 2:
473 return *(unsigned short *)p;
474
475 case 4:
476 return *(unsigned int *)p;
477
478#ifdef __amd64__
479 case 8:
480 return *(unsigned long *)p;
481#endif
482
483 default:
484 printk(KERN_ERR "mmiotrace: Error: width.\n");
485 }
486
487err:
488 return 0;
489}
diff --git a/arch/x86/mm/pf_in.h b/arch/x86/mm/pf_in.h
new file mode 100644
index 000000000000..e05341a51a27
--- /dev/null
+++ b/arch/x86/mm/pf_in.h
@@ -0,0 +1,39 @@
1/*
2 * Fault Injection Test harness (FI)
3 * Copyright (C) Intel Crop.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
18 * USA.
19 *
20 */
21
22#ifndef __PF_H_
23#define __PF_H_
24
25enum reason_type {
26 NOT_ME, /* page fault is not in regions */
27 NOTHING, /* access others point in regions */
28 REG_READ, /* read from addr to reg */
29 REG_WRITE, /* write from reg to addr */
30 IMM_WRITE, /* write from imm to addr */
31 OTHERS /* Other instructions can not intercept */
32};
33
34enum reason_type get_ins_type(unsigned long ins_addr);
35unsigned int get_ins_mem_width(unsigned long ins_addr);
36unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs);
37unsigned long get_ins_imm_val(unsigned long ins_addr);
38
39#endif /* __PF_H_ */
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index f41d67f8f831..1eb2973a301c 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -156,10 +156,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
156 156
157 num_memory_chunks++; 157 num_memory_chunks++;
158 158
159 printk(KERN_DEBUG "Memory range %08lx to %08lx (type %x)" 159 printk(KERN_DEBUG "Memory range %08lx to %08lx"
160 " in proximity domain %02x %s\n", 160 " in proximity domain %02x %s\n",
161 start_pfn, end_pfn, 161 start_pfn, end_pfn,
162 memory_affinity->memory_type,
163 pxm, 162 pxm,
164 ((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ? 163 ((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
165 "enabled and removable" : "enabled" ) ); 164 "enabled and removable" : "enabled" ) );
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
new file mode 100644
index 000000000000..d877c5b423ef
--- /dev/null
+++ b/arch/x86/mm/testmmiotrace.c
@@ -0,0 +1,71 @@
1/*
2 * Written by Pekka Paalanen, 2008 <pq@iki.fi>
3 */
4#include <linux/module.h>
5#include <linux/io.h>
6
7#define MODULE_NAME "testmmiotrace"
8
9static unsigned long mmio_address;
10module_param(mmio_address, ulong, 0);
11MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
12
13static void do_write_test(void __iomem *p)
14{
15 unsigned int i;
16 for (i = 0; i < 256; i++)
17 iowrite8(i, p + i);
18 for (i = 1024; i < (5 * 1024); i += 2)
19 iowrite16(i * 12 + 7, p + i);
20 for (i = (5 * 1024); i < (16 * 1024); i += 4)
21 iowrite32(i * 212371 + 13, p + i);
22}
23
24static void do_read_test(void __iomem *p)
25{
26 unsigned int i;
27 for (i = 0; i < 256; i++)
28 ioread8(p + i);
29 for (i = 1024; i < (5 * 1024); i += 2)
30 ioread16(p + i);
31 for (i = (5 * 1024); i < (16 * 1024); i += 4)
32 ioread32(p + i);
33}
34
35static void do_test(void)
36{
37 void __iomem *p = ioremap_nocache(mmio_address, 0x4000);
38 if (!p) {
39 pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
40 return;
41 }
42 do_write_test(p);
43 do_read_test(p);
44 iounmap(p);
45}
46
47static int __init init(void)
48{
49 if (mmio_address == 0) {
50 pr_err(MODULE_NAME ": you have to use the module argument "
51 "mmio_address.\n");
52 pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
53 " YOU REALLY KNOW WHAT YOU ARE DOING!\n");
54 return -ENXIO;
55 }
56
57 pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
58 "in PCI address space, and writing "
59 "rubbish in there.\n", mmio_address);
60 do_test();
61 return 0;
62}
63
64static void __exit cleanup(void)
65{
66 pr_debug(MODULE_NAME ": unloaded.\n");
67}
68
69module_init(init);
70module_exit(cleanup);
71MODULE_LICENSE("GPL");
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 2b6ad5b9f9d5..7f3329b55d2e 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -218,8 +218,8 @@ static int nmi_setup(void)
218 } 218 }
219 219
220 } 220 }
221 on_each_cpu(nmi_save_registers, NULL, 0, 1); 221 on_each_cpu(nmi_save_registers, NULL, 1);
222 on_each_cpu(nmi_cpu_setup, NULL, 0, 1); 222 on_each_cpu(nmi_cpu_setup, NULL, 1);
223 nmi_enabled = 1; 223 nmi_enabled = 1;
224 return 0; 224 return 0;
225} 225}
@@ -271,7 +271,7 @@ static void nmi_shutdown(void)
271{ 271{
272 struct op_msrs *msrs = &get_cpu_var(cpu_msrs); 272 struct op_msrs *msrs = &get_cpu_var(cpu_msrs);
273 nmi_enabled = 0; 273 nmi_enabled = 0;
274 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); 274 on_each_cpu(nmi_cpu_shutdown, NULL, 1);
275 unregister_die_notifier(&profile_exceptions_nb); 275 unregister_die_notifier(&profile_exceptions_nb);
276 model->shutdown(msrs); 276 model->shutdown(msrs);
277 free_msrs(); 277 free_msrs();
@@ -286,7 +286,7 @@ static void nmi_cpu_start(void *dummy)
286 286
287static int nmi_start(void) 287static int nmi_start(void)
288{ 288{
289 on_each_cpu(nmi_cpu_start, NULL, 0, 1); 289 on_each_cpu(nmi_cpu_start, NULL, 1);
290 return 0; 290 return 0;
291} 291}
292 292
@@ -298,7 +298,7 @@ static void nmi_cpu_stop(void *dummy)
298 298
299static void nmi_stop(void) 299static void nmi_stop(void)
300{ 300{
301 on_each_cpu(nmi_cpu_stop, NULL, 0, 1); 301 on_each_cpu(nmi_cpu_stop, NULL, 1);
302} 302}
303 303
304struct op_counter_config counter_config[OP_MAX_COUNTER]; 304struct op_counter_config counter_config[OP_MAX_COUNTER];
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index a18141ae3f02..dbf532369711 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -578,7 +578,7 @@ static int __init enable_pci_io_ecs(void)
578 /* assume all cpus from fam10h have IO ECS */ 578 /* assume all cpus from fam10h have IO ECS */
579 if (boot_cpu_data.x86 < 0x10) 579 if (boot_cpu_data.x86 < 0x10)
580 return 0; 580 return 0;
581 on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1, 1); 581 on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1);
582 pci_probe |= PCI_HAS_IO_ECS; 582 pci_probe |= PCI_HAS_IO_ECS;
583 return 0; 583 return 0;
584} 584}
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 20b9f59f95df..b67732bbb85a 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -20,6 +20,7 @@
20unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | 20unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
21 PCI_PROBE_MMCONF; 21 PCI_PROBE_MMCONF;
22 22
23unsigned int pci_early_dump_regs;
23static int pci_bf_sort; 24static int pci_bf_sort;
24int pci_routeirq; 25int pci_routeirq;
25int pcibios_last_bus = -1; 26int pcibios_last_bus = -1;
@@ -31,7 +32,7 @@ struct pci_raw_ops *raw_pci_ext_ops;
31int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, 32int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
32 int reg, int len, u32 *val) 33 int reg, int len, u32 *val)
33{ 34{
34 if (reg < 256 && raw_pci_ops) 35 if (domain == 0 && reg < 256 && raw_pci_ops)
35 return raw_pci_ops->read(domain, bus, devfn, reg, len, val); 36 return raw_pci_ops->read(domain, bus, devfn, reg, len, val);
36 if (raw_pci_ext_ops) 37 if (raw_pci_ext_ops)
37 return raw_pci_ext_ops->read(domain, bus, devfn, reg, len, val); 38 return raw_pci_ext_ops->read(domain, bus, devfn, reg, len, val);
@@ -41,7 +42,7 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
41int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, 42int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
42 int reg, int len, u32 val) 43 int reg, int len, u32 val)
43{ 44{
44 if (reg < 256 && raw_pci_ops) 45 if (domain == 0 && reg < 256 && raw_pci_ops)
45 return raw_pci_ops->write(domain, bus, devfn, reg, len, val); 46 return raw_pci_ops->write(domain, bus, devfn, reg, len, val);
46 if (raw_pci_ext_ops) 47 if (raw_pci_ext_ops)
47 return raw_pci_ext_ops->write(domain, bus, devfn, reg, len, val); 48 return raw_pci_ext_ops->write(domain, bus, devfn, reg, len, val);
@@ -121,6 +122,21 @@ void __init dmi_check_skip_isa_align(void)
121 dmi_check_system(can_skip_pciprobe_dmi_table); 122 dmi_check_system(can_skip_pciprobe_dmi_table);
122} 123}
123 124
125static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
126{
127 struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE];
128
129 if (pci_probe & PCI_NOASSIGN_ROMS) {
130 if (rom_r->parent)
131 return;
132 if (rom_r->start) {
133 /* we deal with BIOS assigned ROM later */
134 return;
135 }
136 rom_r->start = rom_r->end = rom_r->flags = 0;
137 }
138}
139
124/* 140/*
125 * Called after each bus is probed, but before its children 141 * Called after each bus is probed, but before its children
126 * are examined. 142 * are examined.
@@ -128,7 +144,11 @@ void __init dmi_check_skip_isa_align(void)
128 144
129void __devinit pcibios_fixup_bus(struct pci_bus *b) 145void __devinit pcibios_fixup_bus(struct pci_bus *b)
130{ 146{
147 struct pci_dev *dev;
148
131 pci_read_bridge_bases(b); 149 pci_read_bridge_bases(b);
150 list_for_each_entry(dev, &b->devices, bus_list)
151 pcibios_fixup_device_resources(dev);
132} 152}
133 153
134/* 154/*
@@ -481,12 +501,18 @@ char * __devinit pcibios_setup(char *str)
481 else if (!strcmp(str, "rom")) { 501 else if (!strcmp(str, "rom")) {
482 pci_probe |= PCI_ASSIGN_ROMS; 502 pci_probe |= PCI_ASSIGN_ROMS;
483 return NULL; 503 return NULL;
504 } else if (!strcmp(str, "norom")) {
505 pci_probe |= PCI_NOASSIGN_ROMS;
506 return NULL;
484 } else if (!strcmp(str, "assign-busses")) { 507 } else if (!strcmp(str, "assign-busses")) {
485 pci_probe |= PCI_ASSIGN_ALL_BUSSES; 508 pci_probe |= PCI_ASSIGN_ALL_BUSSES;
486 return NULL; 509 return NULL;
487 } else if (!strcmp(str, "use_crs")) { 510 } else if (!strcmp(str, "use_crs")) {
488 pci_probe |= PCI_USE__CRS; 511 pci_probe |= PCI_USE__CRS;
489 return NULL; 512 return NULL;
513 } else if (!strcmp(str, "earlydump")) {
514 pci_early_dump_regs = 1;
515 return NULL;
490 } else if (!strcmp(str, "routeirq")) { 516 } else if (!strcmp(str, "routeirq")) {
491 pci_routeirq = 1; 517 pci_routeirq = 1;
492 return NULL; 518 return NULL;
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 42df4b6606df..858dbe3399f9 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -49,7 +49,14 @@ void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val)
49{ 49{
50 PDprintk("%x writing to %x: %x\n", slot, offset, val); 50 PDprintk("%x writing to %x: %x\n", slot, offset, val);
51 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); 51 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
52 outb(val, 0xcfc); 52 outb(val, 0xcfc + (offset&3));
53}
54
55void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val)
56{
57 PDprintk("%x writing to %x: %x\n", slot, offset, val);
58 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
59 outw(val, 0xcfc + (offset&2));
53} 60}
54 61
55int early_pci_allowed(void) 62int early_pci_allowed(void)
@@ -57,3 +64,54 @@ int early_pci_allowed(void)
57 return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) == 64 return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) ==
58 PCI_PROBE_CONF1; 65 PCI_PROBE_CONF1;
59} 66}
67
68void early_dump_pci_device(u8 bus, u8 slot, u8 func)
69{
70 int i;
71 int j;
72 u32 val;
73
74 printk("PCI: %02x:%02x:%02x", bus, slot, func);
75
76 for (i = 0; i < 256; i += 4) {
77 if (!(i & 0x0f))
78 printk("\n%04x:",i);
79
80 val = read_pci_config(bus, slot, func, i);
81 for (j = 0; j < 4; j++) {
82 printk(" %02x", val & 0xff);
83 val >>= 8;
84 }
85 }
86 printk("\n");
87}
88
89void early_dump_pci_devices(void)
90{
91 unsigned bus, slot, func;
92
93 if (!early_pci_allowed())
94 return;
95
96 for (bus = 0; bus < 256; bus++) {
97 for (slot = 0; slot < 32; slot++) {
98 for (func = 0; func < 8; func++) {
99 u32 class;
100 u8 type;
101 class = read_pci_config(bus, slot, func,
102 PCI_CLASS_REVISION);
103 if (class == 0xffffffff)
104 break;
105
106 early_dump_pci_device(bus, slot, func);
107
108 /* No multi-function device? */
109 type = read_pci_config_byte(bus, slot, func,
110 PCI_HEADER_TYPE);
111 if (!(type & 0x80))
112 break;
113 }
114 }
115 }
116}
117
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 5281e343dd9f..2aafb67dc5f1 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -334,9 +334,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
334 flags = new_flags; 334 flags = new_flags;
335 } 335 }
336 336
337 if (((vma->vm_pgoff <= max_low_pfn_mapped) || 337 if (((vma->vm_pgoff < max_low_pfn_mapped) ||
338 (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) && 338 (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
339 vma->vm_pgoff <= max_pfn_mapped)) && 339 vma->vm_pgoff < max_pfn_mapped)) &&
340 ioremap_change_attr((unsigned long)__va(addr), len, flags)) { 340 ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
341 free_memtype(addr, addr + len); 341 free_memtype(addr, addr + len);
342 return -EINVAL; 342 return -EINVAL;
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index dc568c6b83f8..6a06a2eb0597 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -45,7 +45,8 @@ struct irq_router {
45 char *name; 45 char *name;
46 u16 vendor, device; 46 u16 vendor, device;
47 int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq); 47 int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
48 int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new); 48 int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq,
49 int new);
49}; 50};
50 51
51struct irq_router_handler { 52struct irq_router_handler {
@@ -77,7 +78,8 @@ static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
77 for (i = 0; i < rt->size; i++) 78 for (i = 0; i < rt->size; i++)
78 sum += addr[i]; 79 sum += addr[i];
79 if (!sum) { 80 if (!sum) {
80 DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt); 81 DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n",
82 rt);
81 return rt; 83 return rt;
82 } 84 }
83 return NULL; 85 return NULL;
@@ -183,7 +185,8 @@ static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset,
183 return (nr & 1) ? (x >> 4) : (x & 0xf); 185 return (nr & 1) ? (x >> 4) : (x & 0xf);
184} 186}
185 187
186static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val) 188static void write_config_nybble(struct pci_dev *router, unsigned offset,
189 unsigned nr, unsigned int val)
187{ 190{
188 u8 x; 191 u8 x;
189 unsigned reg = offset + (nr >> 1); 192 unsigned reg = offset + (nr >> 1);
@@ -467,7 +470,8 @@ static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int
467 return inb(0xc01) & 0xf; 470 return inb(0xc01) & 0xf;
468} 471}
469 472
470static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) 473static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev,
474 int pirq, int irq)
471{ 475{
472 outb(pirq, 0xc00); 476 outb(pirq, 0xc00);
473 outb(irq, 0xc01); 477 outb(irq, 0xc01);
@@ -660,7 +664,8 @@ static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router
660} 664}
661 665
662 666
663static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) 667static __init int serverworks_router_probe(struct irq_router *r,
668 struct pci_dev *router, u16 device)
664{ 669{
665 switch (device) { 670 switch (device) {
666 case PCI_DEVICE_ID_SERVERWORKS_OSB4: 671 case PCI_DEVICE_ID_SERVERWORKS_OSB4:
@@ -827,10 +832,12 @@ static void __init pirq_find_router(struct irq_router *r)
827 832
828 for (h = pirq_routers; h->vendor; h++) { 833 for (h = pirq_routers; h->vendor; h++) {
829 /* First look for a router match */ 834 /* First look for a router match */
830 if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) 835 if (rt->rtr_vendor == h->vendor &&
836 h->probe(r, pirq_router_dev, rt->rtr_device))
831 break; 837 break;
832 /* Fall back to a device match */ 838 /* Fall back to a device match */
833 if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device)) 839 if (pirq_router_dev->vendor == h->vendor &&
840 h->probe(r, pirq_router_dev, pirq_router_dev->device))
834 break; 841 break;
835 } 842 }
836 printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", 843 printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
@@ -845,11 +852,13 @@ static void __init pirq_find_router(struct irq_router *r)
845static struct irq_info *pirq_get_info(struct pci_dev *dev) 852static struct irq_info *pirq_get_info(struct pci_dev *dev)
846{ 853{
847 struct irq_routing_table *rt = pirq_table; 854 struct irq_routing_table *rt = pirq_table;
848 int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); 855 int entries = (rt->size - sizeof(struct irq_routing_table)) /
856 sizeof(struct irq_info);
849 struct irq_info *info; 857 struct irq_info *info;
850 858
851 for (info = rt->slots; entries--; info++) 859 for (info = rt->slots; entries--; info++)
852 if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) 860 if (info->bus == dev->bus->number &&
861 PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
853 return info; 862 return info;
854 return NULL; 863 return NULL;
855} 864}
@@ -890,7 +899,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
890 DBG(" -> not routed\n" KERN_DEBUG); 899 DBG(" -> not routed\n" KERN_DEBUG);
891 return 0; 900 return 0;
892 } 901 }
893 DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs); 902 DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask,
903 pirq_table->exclusive_irqs);
894 mask &= pcibios_irq_mask; 904 mask &= pcibios_irq_mask;
895 905
896 /* Work around broken HP Pavilion Notebooks which assign USB to 906 /* Work around broken HP Pavilion Notebooks which assign USB to
@@ -903,7 +913,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
903 } 913 }
904 914
905 /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */ 915 /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */
906 if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) { 916 if (acer_tm360_irqrouting && dev->irq == 11 &&
917 dev->vendor == PCI_VENDOR_ID_O2) {
907 pirq = 0x68; 918 pirq = 0x68;
908 mask = 0x400; 919 mask = 0x400;
909 dev->irq = r->get(pirq_router_dev, dev, pirq); 920 dev->irq = r->get(pirq_router_dev, dev, pirq);
@@ -920,15 +931,16 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
920 newirq = 0; 931 newirq = 0;
921 else 932 else
922 printk("\n" KERN_WARNING 933 printk("\n" KERN_WARNING
923 "PCI: IRQ %i for device %s doesn't match PIRQ mask " 934 "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n"
924 "- try pci=usepirqmask\n" KERN_DEBUG, newirq, 935 KERN_DEBUG, newirq,
925 pci_name(dev)); 936 pci_name(dev));
926 } 937 }
927 if (!newirq && assign) { 938 if (!newirq && assign) {
928 for (i = 0; i < 16; i++) { 939 for (i = 0; i < 16; i++) {
929 if (!(mask & (1 << i))) 940 if (!(mask & (1 << i)))
930 continue; 941 continue;
931 if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, IRQF_SHARED)) 942 if (pirq_penalty[i] < pirq_penalty[newirq] &&
943 can_request_irq(i, IRQF_SHARED))
932 newirq = i; 944 newirq = i;
933 } 945 }
934 } 946 }
@@ -944,7 +956,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
944 DBG(" -> got IRQ %d\n", irq); 956 DBG(" -> got IRQ %d\n", irq);
945 msg = "Found"; 957 msg = "Found";
946 eisa_set_level_irq(irq); 958 eisa_set_level_irq(irq);
947 } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { 959 } else if (newirq && r->set &&
960 (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
948 DBG(" -> assigning IRQ %d", newirq); 961 DBG(" -> assigning IRQ %d", newirq);
949 if (r->set(pirq_router_dev, dev, pirq, newirq)) { 962 if (r->set(pirq_router_dev, dev, pirq, newirq)) {
950 eisa_set_level_irq(newirq); 963 eisa_set_level_irq(newirq);
@@ -962,7 +975,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
962 } else 975 } else
963 return 0; 976 return 0;
964 } 977 }
965 printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev)); 978 printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq,
979 pci_name(dev));
966 980
967 /* Update IRQ for all devices with the same pirq value */ 981 /* Update IRQ for all devices with the same pirq value */
968 while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { 982 while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
@@ -974,7 +988,10 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
974 if (!info) 988 if (!info)
975 continue; 989 continue;
976 if (info->irq[pin].link == pirq) { 990 if (info->irq[pin].link == pirq) {
977 /* We refuse to override the dev->irq information. Give a warning! */ 991 /*
992 * We refuse to override the dev->irq
993 * information. Give a warning!
994 */
978 if (dev2->irq && dev2->irq != irq && \ 995 if (dev2->irq && dev2->irq != irq && \
979 (!(pci_probe & PCI_USE_PIRQ_MASK) || \ 996 (!(pci_probe & PCI_USE_PIRQ_MASK) || \
980 ((1 << dev2->irq) & mask))) { 997 ((1 << dev2->irq) & mask))) {
@@ -987,7 +1004,9 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
987 dev2->irq = irq; 1004 dev2->irq = irq;
988 pirq_penalty[irq]++; 1005 pirq_penalty[irq]++;
989 if (dev != dev2) 1006 if (dev != dev2)
990 printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2)); 1007 printk(KERN_INFO
1008 "PCI: Sharing IRQ %d with %s\n",
1009 irq, pci_name(dev2));
991 } 1010 }
992 } 1011 }
993 return 1; 1012 return 1;
@@ -1001,15 +1020,21 @@ static void __init pcibios_fixup_irqs(void)
1001 DBG(KERN_DEBUG "PCI: IRQ fixup\n"); 1020 DBG(KERN_DEBUG "PCI: IRQ fixup\n");
1002 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1021 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1003 /* 1022 /*
1004 * If the BIOS has set an out of range IRQ number, just ignore it. 1023 * If the BIOS has set an out of range IRQ number, just
1005 * Also keep track of which IRQ's are already in use. 1024 * ignore it. Also keep track of which IRQ's are
1025 * already in use.
1006 */ 1026 */
1007 if (dev->irq >= 16) { 1027 if (dev->irq >= 16) {
1008 DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq); 1028 DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n",
1029 pci_name(dev), dev->irq);
1009 dev->irq = 0; 1030 dev->irq = 0;
1010 } 1031 }
1011 /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */ 1032 /*
1012 if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000) 1033 * If the IRQ is already assigned to a PCI device,
1034 * ignore its ISA use penalty
1035 */
1036 if (pirq_penalty[dev->irq] >= 100 &&
1037 pirq_penalty[dev->irq] < 100000)
1013 pirq_penalty[dev->irq] = 0; 1038 pirq_penalty[dev->irq] = 0;
1014 pirq_penalty[dev->irq]++; 1039 pirq_penalty[dev->irq]++;
1015 } 1040 }
@@ -1025,8 +1050,13 @@ static void __init pcibios_fixup_irqs(void)
1025 int irq; 1050 int irq;
1026 1051
1027 if (pin) { 1052 if (pin) {
1028 pin--; /* interrupt pins are numbered starting from 1 */ 1053 /*
1029 irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); 1054 * interrupt pins are numbered starting
1055 * from 1
1056 */
1057 pin--;
1058 irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
1059 PCI_SLOT(dev->devfn), pin);
1030 /* 1060 /*
1031 * Busses behind bridges are typically not listed in the MP-table. 1061 * Busses behind bridges are typically not listed in the MP-table.
1032 * In this case we have to look up the IRQ based on the parent bus, 1062 * In this case we have to look up the IRQ based on the parent bus,
@@ -1067,7 +1097,8 @@ static int __init fix_broken_hp_bios_irq9(const struct dmi_system_id *d)
1067{ 1097{
1068 if (!broken_hp_bios_irq9) { 1098 if (!broken_hp_bios_irq9) {
1069 broken_hp_bios_irq9 = 1; 1099 broken_hp_bios_irq9 = 1;
1070 printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); 1100 printk(KERN_INFO "%s detected - fixing broken IRQ routing\n",
1101 d->ident);
1071 } 1102 }
1072 return 0; 1103 return 0;
1073} 1104}
@@ -1080,7 +1111,8 @@ static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d)
1080{ 1111{
1081 if (!acer_tm360_irqrouting) { 1112 if (!acer_tm360_irqrouting) {
1082 acer_tm360_irqrouting = 1; 1113 acer_tm360_irqrouting = 1;
1083 printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); 1114 printk(KERN_INFO "%s detected - fixing broken IRQ routing\n",
1115 d->ident);
1084 } 1116 }
1085 return 0; 1117 return 0;
1086} 1118}
@@ -1092,7 +1124,8 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = {
1092 .matches = { 1124 .matches = {
1093 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), 1125 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
1094 DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"), 1126 DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"),
1095 DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"), 1127 DMI_MATCH(DMI_PRODUCT_VERSION,
1128 "HP Pavilion Notebook Model GE"),
1096 DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"), 1129 DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"),
1097 }, 1130 },
1098 }, 1131 },
@@ -1131,7 +1164,10 @@ int __init pcibios_irq_init(void)
1131 if (!(pirq_table->exclusive_irqs & (1 << i))) 1164 if (!(pirq_table->exclusive_irqs & (1 << i)))
1132 pirq_penalty[i] += 100; 1165 pirq_penalty[i] += 100;
1133 } 1166 }
1134 /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */ 1167 /*
1168 * If we're using the I/O APIC, avoid using the PCI IRQ
1169 * routing table
1170 */
1135 if (io_apic_assign_pci_irqs) 1171 if (io_apic_assign_pci_irqs)
1136 pirq_table = NULL; 1172 pirq_table = NULL;
1137 } 1173 }
@@ -1175,7 +1211,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
1175 if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { 1211 if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
1176 char *msg = ""; 1212 char *msg = "";
1177 1213
1178 pin--; /* interrupt pins are numbered starting from 1 */ 1214 pin--; /* interrupt pins are numbered starting from 1 */
1179 1215
1180 if (io_apic_assign_pci_irqs) { 1216 if (io_apic_assign_pci_irqs) {
1181 int irq; 1217 int irq;
@@ -1195,13 +1231,16 @@ static int pirq_enable_irq(struct pci_dev *dev)
1195 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 1231 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
1196 PCI_SLOT(bridge->devfn), pin); 1232 PCI_SLOT(bridge->devfn), pin);
1197 if (irq >= 0) 1233 if (irq >= 0)
1198 printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", 1234 printk(KERN_WARNING
1199 pci_name(bridge), 'A' + pin, irq); 1235 "PCI: using PPB %s[%c] to get irq %d\n",
1236 pci_name(bridge),
1237 'A' + pin, irq);
1200 dev = bridge; 1238 dev = bridge;
1201 } 1239 }
1202 dev = temp_dev; 1240 dev = temp_dev;
1203 if (irq >= 0) { 1241 if (irq >= 0) {
1204 printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", 1242 printk(KERN_INFO
1243 "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
1205 pci_name(dev), 'A' + pin, irq); 1244 pci_name(dev), 'A' + pin, irq);
1206 dev->irq = irq; 1245 dev->irq = irq;
1207 return 0; 1246 return 0;
@@ -1212,12 +1251,17 @@ static int pirq_enable_irq(struct pci_dev *dev)
1212 else 1251 else
1213 msg = " Please try using pci=biosirq."; 1252 msg = " Please try using pci=biosirq.";
1214 1253
1215 /* With IDE legacy devices the IRQ lookup failure is not a problem.. */ 1254 /*
1216 if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5)) 1255 * With IDE legacy devices the IRQ lookup failure is not
1256 * a problem..
1257 */
1258 if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE &&
1259 !(dev->class & 0x5))
1217 return 0; 1260 return 0;
1218 1261
1219 printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", 1262 printk(KERN_WARNING
1220 'A' + pin, pci_name(dev), msg); 1263 "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
1264 'A' + pin, pci_name(dev), msg);
1221 } 1265 }
1222 return 0; 1266 return 0;
1223} 1267}
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index b2270a55b0cf..3e25deb821ac 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -28,6 +28,7 @@
28#define PCI_USE__CRS 0x10000 28#define PCI_USE__CRS 0x10000
29#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 29#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000
30#define PCI_HAS_IO_ECS 0x40000 30#define PCI_HAS_IO_ECS 0x40000
31#define PCI_NOASSIGN_ROMS 0x80000
31 32
32extern unsigned int pci_probe; 33extern unsigned int pci_probe;
33extern unsigned long pirq_table_addr; 34extern unsigned long pirq_table_addr;
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index efa2ba7c6005..1ef0f90813d6 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -23,7 +23,7 @@
23 23
24#define gtod vdso_vsyscall_gtod_data 24#define gtod vdso_vsyscall_gtod_data
25 25
26static long vdso_fallback_gettime(long clock, struct timespec *ts) 26notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
27{ 27{
28 long ret; 28 long ret;
29 asm("syscall" : "=a" (ret) : 29 asm("syscall" : "=a" (ret) :
@@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts)
31 return ret; 31 return ret;
32} 32}
33 33
34static inline long vgetns(void) 34notrace static inline long vgetns(void)
35{ 35{
36 long v; 36 long v;
37 cycles_t (*vread)(void); 37 cycles_t (*vread)(void);
@@ -40,7 +40,7 @@ static inline long vgetns(void)
40 return (v * gtod->clock.mult) >> gtod->clock.shift; 40 return (v * gtod->clock.mult) >> gtod->clock.shift;
41} 41}
42 42
43static noinline int do_realtime(struct timespec *ts) 43notrace static noinline int do_realtime(struct timespec *ts)
44{ 44{
45 unsigned long seq, ns; 45 unsigned long seq, ns;
46 do { 46 do {
@@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts)
54} 54}
55 55
56/* Copy of the version in kernel/time.c which we cannot directly access */ 56/* Copy of the version in kernel/time.c which we cannot directly access */
57static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) 57notrace static void
58vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
58{ 59{
59 while (nsec >= NSEC_PER_SEC) { 60 while (nsec >= NSEC_PER_SEC) {
60 nsec -= NSEC_PER_SEC; 61 nsec -= NSEC_PER_SEC;
@@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
68 ts->tv_nsec = nsec; 69 ts->tv_nsec = nsec;
69} 70}
70 71
71static noinline int do_monotonic(struct timespec *ts) 72notrace static noinline int do_monotonic(struct timespec *ts)
72{ 73{
73 unsigned long seq, ns, secs; 74 unsigned long seq, ns, secs;
74 do { 75 do {
@@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts)
82 return 0; 83 return 0;
83} 84}
84 85
85int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 86notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
86{ 87{
87 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) 88 if (likely(gtod->sysctl_enabled && gtod->clock.vread))
88 switch (clock) { 89 switch (clock) {
@@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
96int clock_gettime(clockid_t, struct timespec *) 97int clock_gettime(clockid_t, struct timespec *)
97 __attribute__((weak, alias("__vdso_clock_gettime"))); 98 __attribute__((weak, alias("__vdso_clock_gettime")));
98 99
99int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 100notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
100{ 101{
101 long ret; 102 long ret;
102 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { 103 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index c8097f17f8a9..9fbc6b20026b 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -13,7 +13,8 @@
13#include <asm/vgtod.h> 13#include <asm/vgtod.h>
14#include "vextern.h" 14#include "vextern.h"
15 15
16long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) 16notrace long
17__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
17{ 18{
18 unsigned int p; 19 unsigned int p;
19 20
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d11dda7ebd7a..402f3e2c7bee 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1243,7 +1243,9 @@ static const struct smp_ops xen_smp_ops __initdata = {
1243 1243
1244 .smp_send_stop = xen_smp_send_stop, 1244 .smp_send_stop = xen_smp_send_stop,
1245 .smp_send_reschedule = xen_smp_send_reschedule, 1245 .smp_send_reschedule = xen_smp_send_reschedule,
1246 .smp_call_function_mask = xen_smp_call_function_mask, 1246
1247 .send_call_func_ipi = xen_smp_send_call_function_ipi,
1248 .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
1247}; 1249};
1248#endif /* CONFIG_SMP */ 1250#endif /* CONFIG_SMP */
1249 1251
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 42b3b9ed641d..ff0aa74afaa1 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -796,7 +796,7 @@ static void drop_mm_ref(struct mm_struct *mm)
796 } 796 }
797 797
798 if (!cpus_empty(mask)) 798 if (!cpus_empty(mask))
799 xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); 799 smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
800} 800}
801#else 801#else
802static void drop_mm_ref(struct mm_struct *mm) 802static void drop_mm_ref(struct mm_struct *mm)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d2e3c20127d7..233156f39b7f 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -36,27 +36,14 @@
36#include "mmu.h" 36#include "mmu.h"
37 37
38cpumask_t xen_cpu_initialized_map; 38cpumask_t xen_cpu_initialized_map;
39static DEFINE_PER_CPU(int, resched_irq) = -1;
40static DEFINE_PER_CPU(int, callfunc_irq) = -1;
41static DEFINE_PER_CPU(int, debug_irq) = -1;
42
43/*
44 * Structure and data for smp_call_function(). This is designed to minimise
45 * static memory requirements. It also looks cleaner.
46 */
47static DEFINE_SPINLOCK(call_lock);
48 39
49struct call_data_struct { 40static DEFINE_PER_CPU(int, resched_irq);
50 void (*func) (void *info); 41static DEFINE_PER_CPU(int, callfunc_irq);
51 void *info; 42static DEFINE_PER_CPU(int, callfuncsingle_irq);
52 atomic_t started; 43static DEFINE_PER_CPU(int, debug_irq) = -1;
53 atomic_t finished;
54 int wait;
55};
56 44
57static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); 45static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
58 46static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
59static struct call_data_struct *call_data;
60 47
61/* 48/*
62 * Reschedule call back. Nothing to do, 49 * Reschedule call back. Nothing to do,
@@ -128,6 +115,17 @@ static int xen_smp_intr_init(unsigned int cpu)
128 goto fail; 115 goto fail;
129 per_cpu(debug_irq, cpu) = rc; 116 per_cpu(debug_irq, cpu) = rc;
130 117
118 callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
119 rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
120 cpu,
121 xen_call_function_single_interrupt,
122 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
123 callfunc_name,
124 NULL);
125 if (rc < 0)
126 goto fail;
127 per_cpu(callfuncsingle_irq, cpu) = rc;
128
131 return 0; 129 return 0;
132 130
133 fail: 131 fail:
@@ -137,6 +135,9 @@ static int xen_smp_intr_init(unsigned int cpu)
137 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); 135 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
138 if (per_cpu(debug_irq, cpu) >= 0) 136 if (per_cpu(debug_irq, cpu) >= 0)
139 unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); 137 unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
138 if (per_cpu(callfuncsingle_irq, cpu) >= 0)
139 unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
140
140 return rc; 141 return rc;
141} 142}
142 143
@@ -336,7 +337,7 @@ static void stop_self(void *v)
336 337
337void xen_smp_send_stop(void) 338void xen_smp_send_stop(void)
338{ 339{
339 smp_call_function(stop_self, NULL, 0, 0); 340 smp_call_function(stop_self, NULL, 0);
340} 341}
341 342
342void xen_smp_send_reschedule(int cpu) 343void xen_smp_send_reschedule(int cpu)
@@ -344,7 +345,6 @@ void xen_smp_send_reschedule(int cpu)
344 xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); 345 xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
345} 346}
346 347
347
348static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) 348static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
349{ 349{
350 unsigned cpu; 350 unsigned cpu;
@@ -355,83 +355,42 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
355 xen_send_IPI_one(cpu, vector); 355 xen_send_IPI_one(cpu, vector);
356} 356}
357 357
358void xen_smp_send_call_function_ipi(cpumask_t mask)
359{
360 int cpu;
361
362 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
363
364 /* Make sure other vcpus get a chance to run if they need to. */
365 for_each_cpu_mask(cpu, mask) {
366 if (xen_vcpu_stolen(cpu)) {
367 HYPERVISOR_sched_op(SCHEDOP_yield, 0);
368 break;
369 }
370 }
371}
372
373void xen_smp_send_call_function_single_ipi(int cpu)
374{
375 xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
376}
377
358static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) 378static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
359{ 379{
360 void (*func) (void *info) = call_data->func;
361 void *info = call_data->info;
362 int wait = call_data->wait;
363
364 /*
365 * Notify initiating CPU that I've grabbed the data and am
366 * about to execute the function
367 */
368 mb();
369 atomic_inc(&call_data->started);
370 /*
371 * At this point the info structure may be out of scope unless wait==1
372 */
373 irq_enter(); 380 irq_enter();
374 (*func)(info); 381 generic_smp_call_function_interrupt();
375 __get_cpu_var(irq_stat).irq_call_count++; 382 __get_cpu_var(irq_stat).irq_call_count++;
376 irq_exit(); 383 irq_exit();
377 384
378 if (wait) {
379 mb(); /* commit everything before setting finished */
380 atomic_inc(&call_data->finished);
381 }
382
383 return IRQ_HANDLED; 385 return IRQ_HANDLED;
384} 386}
385 387
386int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), 388static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
387 void *info, int wait)
388{ 389{
389 struct call_data_struct data; 390 irq_enter();
390 int cpus, cpu; 391 generic_smp_call_function_single_interrupt();
391 bool yield; 392 __get_cpu_var(irq_stat).irq_call_count++;
392 393 irq_exit();
393 /* Holding any lock stops cpus from going down. */
394 spin_lock(&call_lock);
395
396 cpu_clear(smp_processor_id(), mask);
397
398 cpus = cpus_weight(mask);
399 if (!cpus) {
400 spin_unlock(&call_lock);
401 return 0;
402 }
403
404 /* Can deadlock when called with interrupts disabled */
405 WARN_ON(irqs_disabled());
406
407 data.func = func;
408 data.info = info;
409 atomic_set(&data.started, 0);
410 data.wait = wait;
411 if (wait)
412 atomic_set(&data.finished, 0);
413
414 call_data = &data;
415 mb(); /* write everything before IPI */
416
417 /* Send a message to other CPUs and wait for them to respond */
418 xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
419
420 /* Make sure other vcpus get a chance to run if they need to. */
421 yield = false;
422 for_each_cpu_mask(cpu, mask)
423 if (xen_vcpu_stolen(cpu))
424 yield = true;
425
426 if (yield)
427 HYPERVISOR_sched_op(SCHEDOP_yield, 0);
428
429 /* Wait for response */
430 while (atomic_read(&data.started) != cpus ||
431 (wait && atomic_read(&data.finished) != cpus))
432 cpu_relax();
433
434 spin_unlock(&call_lock);
435 394
436 return 0; 395 return IRQ_HANDLED;
437} 396}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d852ddbb3448..6f4b1045c1c2 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -55,13 +55,8 @@ void xen_smp_cpus_done(unsigned int max_cpus);
55 55
56void xen_smp_send_stop(void); 56void xen_smp_send_stop(void);
57void xen_smp_send_reschedule(int cpu); 57void xen_smp_send_reschedule(int cpu);
58int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic, 58void xen_smp_send_call_function_ipi(cpumask_t mask);
59 int wait); 59void xen_smp_send_call_function_single_ipi(int cpu);
60int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
61 int nonatomic, int wait);
62
63int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
64 void *info, int wait);
65 60
66extern cpumask_t xen_cpu_initialized_map; 61extern cpumask_t xen_cpu_initialized_map;
67 62