aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-09-26 16:07:55 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-26 16:07:55 -0400
commitb278240839e20fa9384ea430df463b367b90e04e (patch)
treef99f0c8cdd4cc7f177cd75440e6bd181cded7fb3
parentdd77a4ee0f3981693d4229aa1d57cea9e526ff47 (diff)
parent3f75f42d7733e73aca5c78326489efd4189e0111 (diff)
Merge branch 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6
* 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6: (225 commits) [PATCH] Don't set calgary iommu as default y [PATCH] i386/x86-64: New Intel feature flags [PATCH] x86: Add a cumulative thermal throttle event counter. [PATCH] i386: Make the jiffies compares use the 64bit safe macros. [PATCH] x86: Refactor thermal throttle processing [PATCH] Add 64bit jiffies compares (for use with get_jiffies_64) [PATCH] Fix unwinder warning in traps.c [PATCH] x86: Allow disabling early pci scans with pci=noearly or disallowing conf1 [PATCH] x86: Move direct PCI scanning functions out of line [PATCH] i386/x86-64: Make all early PCI scans dependent on CONFIG_PCI [PATCH] Don't leak NT bit into next task [PATCH] i386/x86-64: Work around gcc bug with noreturn functions in unwinder [PATCH] Fix some broken white space in ia32_signal.c [PATCH] Initialize argument registers for 32bit signal handlers. [PATCH] Remove all traces of signal number conversion [PATCH] Don't synchronize time reading on single core AMD systems [PATCH] Remove outdated comment in x86-64 mmconfig code [PATCH] Use string instructions for Core2 copy/clear [PATCH] x86: - restore i8259A eoi status on resume [PATCH] i386: Split multi-line printk in oops output. ...
-rw-r--r--Documentation/HOWTO3
-rw-r--r--Documentation/filesystems/proc.txt14
-rw-r--r--Documentation/kbuild/makefiles.txt5
-rw-r--r--Documentation/kernel-parameters.txt6
-rw-r--r--Documentation/x86_64/boot-options.txt7
-rw-r--r--Documentation/x86_64/kernel-stacks99
-rw-r--r--arch/i386/Kconfig17
-rw-r--r--arch/i386/Makefile8
-rw-r--r--arch/i386/boot/edd.S97
-rw-r--r--arch/i386/boot/setup.S4
-rw-r--r--arch/i386/defconfig1063
-rw-r--r--arch/i386/kernel/Makefile3
-rw-r--r--arch/i386/kernel/acpi/Makefile2
-rw-r--r--arch/i386/kernel/acpi/boot.c181
-rw-r--r--arch/i386/kernel/acpi/earlyquirk.c6
-rw-r--r--arch/i386/kernel/apic.c31
-rw-r--r--arch/i386/kernel/cpu/amd.c7
-rw-r--r--arch/i386/kernel/cpu/centaur.c24
-rw-r--r--arch/i386/kernel/cpu/common.c8
-rw-r--r--arch/i386/kernel/cpu/cpu.h2
-rw-r--r--arch/i386/kernel/cpu/cyrix.c42
-rw-r--r--arch/i386/kernel/cpu/intel.c3
-rw-r--r--arch/i386/kernel/cpu/mcheck/Makefile2
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c26
-rw-r--r--arch/i386/kernel/cpu/mcheck/therm_throt.c180
-rw-r--r--arch/i386/kernel/cpu/nexgen.c9
-rw-r--r--arch/i386/kernel/cpu/proc.c4
-rw-r--r--arch/i386/kernel/cpu/rise.c4
-rw-r--r--arch/i386/kernel/cpu/transmeta.c7
-rw-r--r--arch/i386/kernel/cpu/umc.c7
-rw-r--r--arch/i386/kernel/crash.c22
-rw-r--r--arch/i386/kernel/entry.S110
-rw-r--r--arch/i386/kernel/head.S67
-rw-r--r--arch/i386/kernel/i8259.c6
-rw-r--r--arch/i386/kernel/io_apic.c125
-rw-r--r--arch/i386/kernel/machine_kexec.c140
-rw-r--r--arch/i386/kernel/mca.c8
-rw-r--r--arch/i386/kernel/mpparse.c70
-rw-r--r--arch/i386/kernel/nmi.c940
-rw-r--r--arch/i386/kernel/process.c14
-rw-r--r--arch/i386/kernel/ptrace.c10
-rw-r--r--arch/i386/kernel/relocate_kernel.S162
-rw-r--r--arch/i386/kernel/semaphore.c134
-rw-r--r--arch/i386/kernel/setup.c367
-rw-r--r--arch/i386/kernel/smpboot.c19
-rw-r--r--arch/i386/kernel/stacktrace.c98
-rw-r--r--arch/i386/kernel/syscall_table.S1
-rw-r--r--arch/i386/kernel/time.c23
-rw-r--r--arch/i386/kernel/topology.c21
-rw-r--r--arch/i386/kernel/traps.c224
-rw-r--r--arch/i386/kernel/tsc.c2
-rw-r--r--arch/i386/lib/Makefile2
-rw-r--r--arch/i386/lib/semaphore.S217
-rw-r--r--arch/i386/mach-generic/bigsmp.c1
-rw-r--r--arch/i386/mach-generic/es7000.c1
-rw-r--r--arch/i386/mach-generic/probe.c60
-rw-r--r--arch/i386/mach-generic/summit.c1
-rw-r--r--arch/i386/mm/discontig.c5
-rw-r--r--arch/i386/mm/extable.c2
-rw-r--r--arch/i386/mm/fault.c25
-rw-r--r--arch/i386/mm/highmem.c2
-rw-r--r--arch/i386/mm/init.c38
-rw-r--r--arch/i386/oprofile/nmi_int.c88
-rw-r--r--arch/i386/oprofile/nmi_timer_int.c35
-rw-r--r--arch/i386/oprofile/op_model_athlon.c54
-rw-r--r--arch/i386/oprofile/op_model_p4.c152
-rw-r--r--arch/i386/oprofile/op_model_ppro.c65
-rw-r--r--arch/i386/oprofile/op_x86_model.h1
-rw-r--r--arch/i386/pci/Makefile2
-rw-r--r--arch/i386/pci/common.c4
-rw-r--r--arch/i386/pci/direct.c25
-rw-r--r--arch/i386/pci/early.c52
-rw-r--r--arch/i386/pci/init.c9
-rw-r--r--arch/i386/pci/mmconfig.c41
-rw-r--r--arch/i386/pci/pci.h7
-rw-r--r--arch/s390/kernel/stacktrace.c17
-rw-r--r--arch/um/sys-i386/Makefile2
-rw-r--r--arch/x86_64/Kconfig40
-rw-r--r--arch/x86_64/Makefile10
-rw-r--r--arch/x86_64/boot/compressed/Makefile3
-rw-r--r--arch/x86_64/boot/setup.S4
-rw-r--r--arch/x86_64/defconfig109
-rw-r--r--arch/x86_64/ia32/ia32_aout.c8
-rw-r--r--arch/x86_64/ia32/ia32_signal.c53
-rw-r--r--arch/x86_64/ia32/ia32entry.S9
-rw-r--r--arch/x86_64/ia32/ptrace32.c10
-rw-r--r--arch/x86_64/ia32/sys_ia32.c52
-rw-r--r--arch/x86_64/kernel/Makefile9
-rw-r--r--arch/x86_64/kernel/aperture.c25
-rw-r--r--arch/x86_64/kernel/apic.c229
-rw-r--r--arch/x86_64/kernel/crash.c26
-rw-r--r--arch/x86_64/kernel/e820.c118
-rw-r--r--arch/x86_64/kernel/early-quirks.c122
-rw-r--r--arch/x86_64/kernel/early_printk.c20
-rw-r--r--arch/x86_64/kernel/entry.S63
-rw-r--r--arch/x86_64/kernel/genapic_cluster.c1
-rw-r--r--arch/x86_64/kernel/genapic_flat.c5
-rw-r--r--arch/x86_64/kernel/head.S15
-rw-r--r--arch/x86_64/kernel/head64.c44
-rw-r--r--arch/x86_64/kernel/i8259.c15
-rw-r--r--arch/x86_64/kernel/io_apic.c482
-rw-r--r--arch/x86_64/kernel/ioport.c1
-rw-r--r--arch/x86_64/kernel/irq.c12
-rw-r--r--arch/x86_64/kernel/machine_kexec.c99
-rw-r--r--arch/x86_64/kernel/mce.c29
-rw-r--r--arch/x86_64/kernel/mce_intel.c30
-rw-r--r--arch/x86_64/kernel/mpparse.c238
-rw-r--r--arch/x86_64/kernel/nmi.c840
-rw-r--r--arch/x86_64/kernel/pci-calgary.c142
-rw-r--r--arch/x86_64/kernel/pci-dma.c7
-rw-r--r--arch/x86_64/kernel/pci-gart.c3
-rw-r--r--arch/x86_64/kernel/pci-nommu.c1
-rw-r--r--arch/x86_64/kernel/process.c110
-rw-r--r--arch/x86_64/kernel/ptrace.c29
-rw-r--r--arch/x86_64/kernel/relocate_kernel.S171
-rw-r--r--arch/x86_64/kernel/setup.c241
-rw-r--r--arch/x86_64/kernel/setup64.c45
-rw-r--r--arch/x86_64/kernel/signal.c87
-rw-r--r--arch/x86_64/kernel/smp.c23
-rw-r--r--arch/x86_64/kernel/smpboot.c14
-rw-r--r--arch/x86_64/kernel/stacktrace.c220
-rw-r--r--arch/x86_64/kernel/tce.c12
-rw-r--r--arch/x86_64/kernel/time.c94
-rw-r--r--arch/x86_64/kernel/trampoline.S2
-rw-r--r--arch/x86_64/kernel/traps.c204
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S25
-rw-r--r--arch/x86_64/kernel/vsmp.c3
-rw-r--r--arch/x86_64/kernel/vsyscall.c98
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c1
-rw-r--r--arch/x86_64/lib/Makefile2
-rw-r--r--arch/x86_64/lib/clear_page.S47
-rw-r--r--arch/x86_64/lib/copy_page.S53
-rw-r--r--arch/x86_64/lib/copy_user.S153
-rw-r--r--arch/x86_64/lib/csum-copy.S26
-rw-r--r--arch/x86_64/lib/getuser.S32
-rw-r--r--arch/x86_64/lib/iomap_copy.S10
-rw-r--r--arch/x86_64/lib/memcpy.S69
-rw-r--r--arch/x86_64/lib/memset.S79
-rw-r--r--arch/x86_64/lib/putuser.S32
-rw-r--r--arch/x86_64/lib/rwlock.S38
-rw-r--r--arch/x86_64/lib/thunk.S43
-rw-r--r--arch/x86_64/mm/fault.c22
-rw-r--r--arch/x86_64/mm/init.c58
-rw-r--r--arch/x86_64/mm/k8topology.c3
-rw-r--r--arch/x86_64/mm/numa.c11
-rw-r--r--arch/x86_64/mm/pageattr.c24
-rw-r--r--arch/x86_64/mm/srat.c2
-rw-r--r--arch/x86_64/pci/Makefile3
-rw-r--r--arch/x86_64/pci/mmconfig.c44
-rw-r--r--drivers/char/hpet.c4
-rw-r--r--drivers/pci/pci.c5
-rw-r--r--fs/binfmt_elf.c3
-rw-r--r--fs/compat.c5
-rw-r--r--include/asm-i386/acpi.h14
-rw-r--r--include/asm-i386/alternative-asm.i14
-rw-r--r--include/asm-i386/apic.h16
-rw-r--r--include/asm-i386/desc.h121
-rw-r--r--include/asm-i386/dwarf2.h11
-rw-r--r--include/asm-i386/e820.h2
-rw-r--r--include/asm-i386/frame.i24
-rw-r--r--include/asm-i386/genapic.h69
-rw-r--r--include/asm-i386/intel_arch_perfmon.h14
-rw-r--r--include/asm-i386/io_apic.h11
-rw-r--r--include/asm-i386/kexec.h27
-rw-r--r--include/asm-i386/mach-es7000/mach_apic.h4
-rw-r--r--include/asm-i386/mach-summit/mach_apic.h11
-rw-r--r--include/asm-i386/mutex.h16
-rw-r--r--include/asm-i386/nmi.h37
-rw-r--r--include/asm-i386/pgtable.h2
-rw-r--r--include/asm-i386/ptrace.h9
-rw-r--r--include/asm-i386/rwlock.h48
-rw-r--r--include/asm-i386/rwsem.h62
-rw-r--r--include/asm-i386/segment.h17
-rw-r--r--include/asm-i386/semaphore.h49
-rw-r--r--include/asm-i386/smp.h20
-rw-r--r--include/asm-i386/spinlock.h134
-rw-r--r--include/asm-i386/stacktrace.h1
-rw-r--r--include/asm-i386/therm_throt.h9
-rw-r--r--include/asm-i386/tlbflush.h4
-rw-r--r--include/asm-i386/tsc.h1
-rw-r--r--include/asm-i386/unistd.h3
-rw-r--r--include/asm-i386/unwind.h8
-rw-r--r--include/asm-ia64/module.h3
-rw-r--r--include/asm-um/alternative-asm.i6
-rw-r--r--include/asm-um/frame.i6
-rw-r--r--include/asm-x86_64/acpi.h2
-rw-r--r--include/asm-x86_64/alternative-asm.i14
-rw-r--r--include/asm-x86_64/apic.h9
-rw-r--r--include/asm-x86_64/bitops.h2
-rw-r--r--include/asm-x86_64/calgary.h7
-rw-r--r--include/asm-x86_64/dwarf2.h8
-rw-r--r--include/asm-x86_64/e820.h9
-rw-r--r--include/asm-x86_64/fixmap.h4
-rw-r--r--include/asm-x86_64/genapic.h1
-rw-r--r--include/asm-x86_64/i387.h9
-rw-r--r--include/asm-x86_64/intel_arch_perfmon.h14
-rw-r--r--include/asm-x86_64/io_apic.h6
-rw-r--r--include/asm-x86_64/irq.h2
-rw-r--r--include/asm-x86_64/kexec.h29
-rw-r--r--include/asm-x86_64/linkage.h2
-rw-r--r--include/asm-x86_64/mach_apic.h1
-rw-r--r--include/asm-x86_64/mce.h2
-rw-r--r--include/asm-x86_64/mmx.h14
-rw-r--r--include/asm-x86_64/mpspec.h11
-rw-r--r--include/asm-x86_64/msr.h11
-rw-r--r--include/asm-x86_64/mutex.h20
-rw-r--r--include/asm-x86_64/nmi.h38
-rw-r--r--include/asm-x86_64/pci-direct.h42
-rw-r--r--include/asm-x86_64/pda.h109
-rw-r--r--include/asm-x86_64/percpu.h10
-rw-r--r--include/asm-x86_64/pgtable.h8
-rw-r--r--include/asm-x86_64/proto.h15
-rw-r--r--include/asm-x86_64/rwlock.h64
-rw-r--r--include/asm-x86_64/segment.h5
-rw-r--r--include/asm-x86_64/semaphore.h40
-rw-r--r--include/asm-x86_64/signal.h4
-rw-r--r--include/asm-x86_64/smp.h29
-rw-r--r--include/asm-x86_64/spinlock.h79
-rw-r--r--include/asm-x86_64/stacktrace.h18
-rw-r--r--include/asm-x86_64/system.h5
-rw-r--r--include/asm-x86_64/tce.h1
-rw-r--r--include/asm-x86_64/therm_throt.h1
-rw-r--r--include/asm-x86_64/thread_info.h9
-rw-r--r--include/asm-x86_64/tlbflush.h70
-rw-r--r--include/asm-x86_64/uaccess.h68
-rw-r--r--include/asm-x86_64/unistd.h5
-rw-r--r--include/asm-x86_64/unwind.h9
-rw-r--r--include/asm-x86_64/vsyscall.h9
-rw-r--r--include/linux/edd.h1
-rw-r--r--include/linux/getcpu.h16
-rw-r--r--include/linux/jiffies.h15
-rw-r--r--include/linux/kernel.h1
-rw-r--r--include/linux/linkage.h6
-rw-r--r--include/linux/sched.h14
-rw-r--r--include/linux/stacktrace.h7
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--include/linux/vermagic.h4
-rw-r--r--init/main.c14
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/lockdep.c9
-rw-r--r--kernel/panic.c13
-rw-r--r--kernel/spinlock.c5
-rw-r--r--kernel/sys.c31
-rw-r--r--kernel/sysctl.c23
-rw-r--r--kernel/unwind.c35
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/hweight.c10
-rw-r--r--scripts/Kbuild.include7
-rw-r--r--scripts/gcc-x86_64-has-stack-protector.sh6
250 files changed, 6891 insertions, 5288 deletions
diff --git a/Documentation/HOWTO b/Documentation/HOWTO
index 915ae8c986c6..1d6560413cc5 100644
--- a/Documentation/HOWTO
+++ b/Documentation/HOWTO
@@ -358,7 +358,8 @@ Here is a list of some of the different kernel trees available:
358 quilt trees: 358 quilt trees:
359 - USB, PCI, Driver Core, and I2C, Greg Kroah-Hartman <gregkh@suse.de> 359 - USB, PCI, Driver Core, and I2C, Greg Kroah-Hartman <gregkh@suse.de>
360 kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ 360 kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
361 361 - x86-64, partly i386, Andi Kleen <ak@suse.de>
362 ftp.firstfloor.org:/pub/ak/x86_64/quilt/
362 363
363Bug Reporting 364Bug Reporting
364------------- 365-------------
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 99902ae6804e..7db71d6fba82 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1124,11 +1124,15 @@ debugging information is displayed on console.
1124NMI switch that most IA32 servers have fires unknown NMI up, for example. 1124NMI switch that most IA32 servers have fires unknown NMI up, for example.
1125If a system hangs up, try pressing the NMI switch. 1125If a system hangs up, try pressing the NMI switch.
1126 1126
1127[NOTE] 1127nmi_watchdog
1128 This function and oprofile share a NMI callback. Therefore this function 1128------------
1129 cannot be enabled when oprofile is activated. 1129
1130 And NMI watchdog will be disabled when the value in this file is set to 1130Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero
1131 non-zero. 1131the NMI watchdog is enabled and will continuously test all online cpus to
1132determine whether or not they are still functioning properly.
1133
1134Because the NMI watchdog shares registers with oprofile, by disabling the NMI
1135watchdog, oprofile may have more registers to utilize.
1132 1136
1133 1137
11342.4 /proc/sys/vm - The virtual memory subsystem 11382.4 /proc/sys/vm - The virtual memory subsystem
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index b7d6abb501a6..e2cbd59cf2d0 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -421,6 +421,11 @@ more details, with real examples.
421 The second argument is optional, and if supplied will be used 421 The second argument is optional, and if supplied will be used
422 if first argument is not supported. 422 if first argument is not supported.
423 423
424 as-instr
425 as-instr checks if the assembler reports a specific instruction
426 and then outputs either option1 or option2
427 C escapes are supported in the test instruction
428
424 cc-option 429 cc-option
425 cc-option is used to check if $(CC) supports a given option, and not 430 cc-option is used to check if $(CC) supports a given option, and not
426 supported to use an optional second option. 431 supported to use an optional second option.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 766abdab94e7..c918cc3f65fb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1240,7 +1240,11 @@ running once the system is up.
1240 bootloader. This is currently used on 1240 bootloader. This is currently used on
1241 IXP2000 systems where the bus has to be 1241 IXP2000 systems where the bus has to be
1242 configured a certain way for adjunct CPUs. 1242 configured a certain way for adjunct CPUs.
1243 1243 noearly [X86] Don't do any early type 1 scanning.
1244 This might help on some broken boards which
1245 machine check when some devices' config space
1246 is read. But various workarounds are disabled
1247 and some IOMMU drivers will not work.
1244 pcmv= [HW,PCMCIA] BadgePAD 4 1248 pcmv= [HW,PCMCIA] BadgePAD 4
1245 1249
1246 pd. [PARIDE] 1250 pd. [PARIDE]
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 6da24e7a56cb..4303e0c12476 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -245,6 +245,13 @@ Debugging
245 newfallback: use new unwinder but fall back to old if it gets 245 newfallback: use new unwinder but fall back to old if it gets
246 stuck (default) 246 stuck (default)
247 247
248 call_trace=[old|both|newfallback|new]
249 old: use old inexact backtracer
250 new: use new exact dwarf2 unwinder
251 both: print entries from both
252 newfallback: use new unwinder but fall back to old if it gets
253 stuck (default)
254
248Misc 255Misc
249 256
250 noreplacement Don't replace instructions with more appropriate ones 257 noreplacement Don't replace instructions with more appropriate ones
diff --git a/Documentation/x86_64/kernel-stacks b/Documentation/x86_64/kernel-stacks
new file mode 100644
index 000000000000..bddfddd466ab
--- /dev/null
+++ b/Documentation/x86_64/kernel-stacks
@@ -0,0 +1,99 @@
1Most of the text from Keith Owens, hacked by AK
2
3x86_64 page size (PAGE_SIZE) is 4K.
4
5Like all other architectures, x86_64 has a kernel stack for every
6active thread. These thread stacks are THREAD_SIZE (2*PAGE_SIZE) big.
7These stacks contain useful data as long as a thread is alive or a
8zombie. While the thread is in user space the kernel stack is empty
9except for the thread_info structure at the bottom.
10
11In addition to the per thread stacks, there are specialized stacks
12associated with each cpu. These stacks are only used while the kernel
13is in control on that cpu, when a cpu returns to user space the
14specialized stacks contain no useful data. The main cpu stacks is
15
16* Interrupt stack. IRQSTACKSIZE
17
18 Used for external hardware interrupts. If this is the first external
19 hardware interrupt (i.e. not a nested hardware interrupt) then the
20 kernel switches from the current task to the interrupt stack. Like
21 the split thread and interrupt stacks on i386 (with CONFIG_4KSTACKS),
22 this gives more room for kernel interrupt processing without having
23 to increase the size of every per thread stack.
24
25 The interrupt stack is also used when processing a softirq.
26
27Switching to the kernel interrupt stack is done by software based on a
28per CPU interrupt nest counter. This is needed because x86-64 "IST"
29hardware stacks cannot nest without races.
30
31x86_64 also has a feature which is not available on i386, the ability
32to automatically switch to a new stack for designated events such as
33double fault or NMI, which makes it easier to handle these unusual
34events on x86_64. This feature is called the Interrupt Stack Table
35(IST). There can be up to 7 IST entries per cpu. The IST code is an
36index into the Task State Segment (TSS), the IST entries in the TSS
37point to dedicated stacks, each stack can be a different size.
38
39An IST is selected by an non-zero value in the IST field of an
40interrupt-gate descriptor. When an interrupt occurs and the hardware
41loads such a descriptor, the hardware automatically sets the new stack
42pointer based on the IST value, then invokes the interrupt handler. If
43software wants to allow nested IST interrupts then the handler must
44adjust the IST values on entry to and exit from the interrupt handler.
45(this is occasionally done, e.g. for debug exceptions)
46
47Events with different IST codes (i.e. with different stacks) can be
48nested. For example, a debug interrupt can safely be interrupted by an
49NMI. arch/x86_64/kernel/entry.S::paranoidentry adjusts the stack
50pointers on entry to and exit from all IST events, in theory allowing
51IST events with the same code to be nested. However in most cases, the
52stack size allocated to an IST assumes no nesting for the same code.
53If that assumption is ever broken then the stacks will become corrupt.
54
55The currently assigned IST stacks are :-
56
57* STACKFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
58
59 Used for interrupt 12 - Stack Fault Exception (#SS).
60
61 This allows to recover from invalid stack segments. Rarely
62 happens.
63
64* DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
65
66 Used for interrupt 8 - Double Fault Exception (#DF).
67
68 Invoked when handling a exception causes another exception. Happens
69 when the kernel is very confused (e.g. kernel stack pointer corrupt)
70 Using a separate stack allows to recover from it well enough in many
71 cases to still output an oops.
72
73* NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
74
75 Used for non-maskable interrupts (NMI).
76
77 NMI can be delivered at any time, including when the kernel is in the
78 middle of switching stacks. Using IST for NMI events avoids making
79 assumptions about the previous state of the kernel stack.
80
81* DEBUG_STACK. DEBUG_STKSZ
82
83 Used for hardware debug interrupts (interrupt 1) and for software
84 debug interrupts (INT3).
85
86 When debugging a kernel, debug interrupts (both hardware and
87 software) can occur at any time. Using IST for these interrupts
88 avoids making assumptions about the previous state of the kernel
89 stack.
90
91* MCE_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
92
93 Used for interrupt 18 - Machine Check Exception (#MC).
94
95 MCE can be delivered at any time, including when the kernel is in the
96 middle of switching stacks. Using IST for MCE events avoids making
97 assumptions about the previous state of the kernel stack.
98
99For more details see the Intel IA32 or AMD AMD64 architecture manuals.
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 6189b0c28d6f..758044f5e718 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -166,7 +166,6 @@ config X86_VISWS
166 166
167config X86_GENERICARCH 167config X86_GENERICARCH
168 bool "Generic architecture (Summit, bigsmp, ES7000, default)" 168 bool "Generic architecture (Summit, bigsmp, ES7000, default)"
169 depends on SMP
170 help 169 help
171 This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. 170 This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
172 It is intended for a generic binary kernel. 171 It is intended for a generic binary kernel.
@@ -263,7 +262,7 @@ source "kernel/Kconfig.preempt"
263 262
264config X86_UP_APIC 263config X86_UP_APIC
265 bool "Local APIC support on uniprocessors" 264 bool "Local APIC support on uniprocessors"
266 depends on !SMP && !(X86_VISWS || X86_VOYAGER) 265 depends on !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH)
267 help 266 help
268 A local APIC (Advanced Programmable Interrupt Controller) is an 267 A local APIC (Advanced Programmable Interrupt Controller) is an
269 integrated interrupt controller in the CPU. If you have a single-CPU 268 integrated interrupt controller in the CPU. If you have a single-CPU
@@ -288,12 +287,12 @@ config X86_UP_IOAPIC
288 287
289config X86_LOCAL_APIC 288config X86_LOCAL_APIC
290 bool 289 bool
291 depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) 290 depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH
292 default y 291 default y
293 292
294config X86_IO_APIC 293config X86_IO_APIC
295 bool 294 bool
296 depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) 295 depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH
297 default y 296 default y
298 297
299config X86_VISWS_APIC 298config X86_VISWS_APIC
@@ -741,8 +740,7 @@ config SECCOMP
741source kernel/Kconfig.hz 740source kernel/Kconfig.hz
742 741
743config KEXEC 742config KEXEC
744 bool "kexec system call (EXPERIMENTAL)" 743 bool "kexec system call"
745 depends on EXPERIMENTAL
746 help 744 help
747 kexec is a system call that implements the ability to shutdown your 745 kexec is a system call that implements the ability to shutdown your
748 current kernel, and to start another kernel. It is like a reboot 746 current kernel, and to start another kernel. It is like a reboot
@@ -763,6 +761,13 @@ config CRASH_DUMP
763 depends on HIGHMEM 761 depends on HIGHMEM
764 help 762 help
765 Generate crash dump after being started by kexec. 763 Generate crash dump after being started by kexec.
764 This should be normally only set in special crash dump kernels
765 which are loaded in the main kernel with kexec-tools into
766 a specially reserved region and then later executed after
767 a crash by kdump/kexec. The crash dump kernel must be compiled
768 to a memory address not used by the main kernel or BIOS using
769 PHYSICAL_START.
770 For more details see Documentation/kdump/kdump.txt
766 771
767config PHYSICAL_START 772config PHYSICAL_START
768 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) 773 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index 3e4adb1e2244..7cc0b189b82b 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -46,6 +46,14 @@ cflags-y += -ffreestanding
46# a lot more stack due to the lack of sharing of stacklots: 46# a lot more stack due to the lack of sharing of stacklots:
47CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;) 47CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;)
48 48
49# do binutils support CFI?
50cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
51AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
52
53# is .cfi_signal_frame supported too?
54cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
55AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
56
49CFLAGS += $(cflags-y) 57CFLAGS += $(cflags-y)
50 58
51# Default subarch .c files 59# Default subarch .c files
diff --git a/arch/i386/boot/edd.S b/arch/i386/boot/edd.S
index 4b84ea216f2b..34321368011a 100644
--- a/arch/i386/boot/edd.S
+++ b/arch/i386/boot/edd.S
@@ -15,42 +15,95 @@
15#include <asm/setup.h> 15#include <asm/setup.h>
16 16
17#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) 17#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
18
19# It is assumed that %ds == INITSEG here
20
18 movb $0, (EDD_MBR_SIG_NR_BUF) 21 movb $0, (EDD_MBR_SIG_NR_BUF)
19 movb $0, (EDDNR) 22 movb $0, (EDDNR)
20 23
21# Check the command line for two options: 24# Check the command line for options:
22# edd=of disables EDD completely (edd=off) 25# edd=of disables EDD completely (edd=off)
23# edd=sk skips the MBR test (edd=skipmbr) 26# edd=sk skips the MBR test (edd=skipmbr)
27# edd=on re-enables EDD (edd=on)
28
24 pushl %esi 29 pushl %esi
25 cmpl $0, %cs:cmd_line_ptr 30 movw $edd_mbr_sig_start, %di # Default to edd=on
26 jz done_cl 31
27 movl %cs:(cmd_line_ptr), %esi 32 movl %cs:(cmd_line_ptr), %esi
28# ds:esi has the pointer to the command line now 33 andl %esi, %esi
29 movl $(COMMAND_LINE_SIZE-7), %ecx 34 jz old_cl # Old boot protocol?
30# loop through kernel command line one byte at a time 35
31cl_loop: 36# Convert to a real-mode pointer in fs:si
32 cmpl $EDD_CL_EQUALS, (%si) 37 movl %esi, %eax
38 shrl $4, %eax
39 movw %ax, %fs
40 andw $0xf, %si
41 jmp have_cl_pointer
42
43# Old-style boot protocol?
44old_cl:
45 push %ds # aka INITSEG
46 pop %fs
47
48 cmpw $0xa33f, (0x20)
49 jne done_cl # No command line at all?
50 movw (0x22), %si # Pointer relative to INITSEG
51
52# fs:si has the pointer to the command line now
53have_cl_pointer:
54
55# Loop through kernel command line one byte at a time. Just in
56# case the loader is buggy and failed to null-terminate the command line
57# terminate if we get close enough to the end of the segment that we
58# cannot fit "edd=XX"...
59cl_atspace:
60 cmpw $-5, %si # Watch for segment wraparound
61 jae done_cl
62 movl %fs:(%si), %eax
63 andb %al, %al # End of line?
64 jz done_cl
65 cmpl $EDD_CL_EQUALS, %eax
33 jz found_edd_equals 66 jz found_edd_equals
34 incl %esi 67 cmpb $0x20, %al # <= space consider whitespace
35 loop cl_loop 68 ja cl_skipword
36 jmp done_cl 69 incw %si
70 jmp cl_atspace
71
72cl_skipword:
73 cmpw $-5, %si # Watch for segment wraparound
74 jae done_cl
75 movb %fs:(%si), %al # End of string?
76 andb %al, %al
77 jz done_cl
78 cmpb $0x20, %al
79 jbe cl_atspace
80 incw %si
81 jmp cl_skipword
82
37found_edd_equals: 83found_edd_equals:
38# only looking at first two characters after equals 84# only looking at first two characters after equals
39 addl $4, %esi 85# late overrides early on the command line, so keep going after finding something
40 cmpw $EDD_CL_OFF, (%si) # edd=of 86 movw %fs:4(%si), %ax
41 jz do_edd_off 87 cmpw $EDD_CL_OFF, %ax # edd=of
42 cmpw $EDD_CL_SKIP, (%si) # edd=sk 88 je do_edd_off
43 jz do_edd_skipmbr 89 cmpw $EDD_CL_SKIP, %ax # edd=sk
44 jmp done_cl 90 je do_edd_skipmbr
91 cmpw $EDD_CL_ON, %ax # edd=on
92 je do_edd_on
93 jmp cl_skipword
45do_edd_skipmbr: 94do_edd_skipmbr:
46 popl %esi 95 movw $edd_start, %di
47 jmp edd_start 96 jmp cl_skipword
48do_edd_off: 97do_edd_off:
49 popl %esi 98 movw $edd_done, %di
50 jmp edd_done 99 jmp cl_skipword
100do_edd_on:
101 movw $edd_mbr_sig_start, %di
102 jmp cl_skipword
103
51done_cl: 104done_cl:
52 popl %esi 105 popl %esi
53 106 jmpw *%di
54 107
55# Read the first sector of each BIOS disk device and store the 4-byte signature 108# Read the first sector of each BIOS disk device and store the 4-byte signature
56edd_mbr_sig_start: 109edd_mbr_sig_start:
diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
index d2b684cd620a..3aec4538a113 100644
--- a/arch/i386/boot/setup.S
+++ b/arch/i386/boot/setup.S
@@ -494,12 +494,12 @@ no_voyager:
494 movw %cs, %ax # aka SETUPSEG 494 movw %cs, %ax # aka SETUPSEG
495 subw $DELTA_INITSEG, %ax # aka INITSEG 495 subw $DELTA_INITSEG, %ax # aka INITSEG
496 movw %ax, %ds 496 movw %ax, %ds
497 movw $0, (0x1ff) # default is no pointing device 497 movb $0, (0x1ff) # default is no pointing device
498 int $0x11 # int 0x11: equipment list 498 int $0x11 # int 0x11: equipment list
499 testb $0x04, %al # check if mouse installed 499 testb $0x04, %al # check if mouse installed
500 jz no_psmouse 500 jz no_psmouse
501 501
502 movw $0xAA, (0x1ff) # device present 502 movb $0xAA, (0x1ff) # device present
503no_psmouse: 503no_psmouse:
504 504
505#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) 505#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index 89ebb7a316ab..1a29bfa26d0c 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -1,41 +1,51 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.18-git5
4# Tue Sep 26 09:30:47 2006
3# 5#
4CONFIG_X86_32=y 6CONFIG_X86_32=y
7CONFIG_GENERIC_TIME=y
8CONFIG_LOCKDEP_SUPPORT=y
9CONFIG_STACKTRACE_SUPPORT=y
5CONFIG_SEMAPHORE_SLEEPERS=y 10CONFIG_SEMAPHORE_SLEEPERS=y
6CONFIG_X86=y 11CONFIG_X86=y
7CONFIG_MMU=y 12CONFIG_MMU=y
8CONFIG_GENERIC_ISA_DMA=y 13CONFIG_GENERIC_ISA_DMA=y
9CONFIG_GENERIC_IOMAP=y 14CONFIG_GENERIC_IOMAP=y
15CONFIG_GENERIC_HWEIGHT=y
10CONFIG_ARCH_MAY_HAVE_PC_FDC=y 16CONFIG_ARCH_MAY_HAVE_PC_FDC=y
11CONFIG_DMI=y 17CONFIG_DMI=y
18CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
12 19
13# 20#
14# Code maturity level options 21# Code maturity level options
15# 22#
16CONFIG_EXPERIMENTAL=y 23CONFIG_EXPERIMENTAL=y
17CONFIG_BROKEN_ON_SMP=y 24CONFIG_LOCK_KERNEL=y
18CONFIG_INIT_ENV_ARG_LIMIT=32 25CONFIG_INIT_ENV_ARG_LIMIT=32
19 26
20# 27#
21# General setup 28# General setup
22# 29#
23CONFIG_LOCALVERSION="" 30CONFIG_LOCALVERSION=""
24# CONFIG_LOCALVERSION_AUTO is not set 31CONFIG_LOCALVERSION_AUTO=y
25CONFIG_SWAP=y 32CONFIG_SWAP=y
26CONFIG_SYSVIPC=y 33CONFIG_SYSVIPC=y
27# CONFIG_POSIX_MQUEUE is not set 34CONFIG_POSIX_MQUEUE=y
28# CONFIG_BSD_PROCESS_ACCT is not set 35# CONFIG_BSD_PROCESS_ACCT is not set
29CONFIG_SYSCTL=y 36# CONFIG_TASKSTATS is not set
30# CONFIG_AUDIT is not set 37# CONFIG_AUDIT is not set
31CONFIG_IKCONFIG=y 38CONFIG_IKCONFIG=y
32CONFIG_IKCONFIG_PROC=y 39CONFIG_IKCONFIG_PROC=y
40# CONFIG_CPUSETS is not set
41# CONFIG_RELAY is not set
33CONFIG_INITRAMFS_SOURCE="" 42CONFIG_INITRAMFS_SOURCE=""
34CONFIG_UID16=y
35CONFIG_VM86=y
36CONFIG_CC_OPTIMIZE_FOR_SIZE=y 43CONFIG_CC_OPTIMIZE_FOR_SIZE=y
37# CONFIG_EMBEDDED is not set 44# CONFIG_EMBEDDED is not set
45CONFIG_UID16=y
46CONFIG_SYSCTL=y
38CONFIG_KALLSYMS=y 47CONFIG_KALLSYMS=y
48CONFIG_KALLSYMS_ALL=y
39# CONFIG_KALLSYMS_EXTRA_PASS is not set 49# CONFIG_KALLSYMS_EXTRA_PASS is not set
40CONFIG_HOTPLUG=y 50CONFIG_HOTPLUG=y
41CONFIG_PRINTK=y 51CONFIG_PRINTK=y
@@ -45,11 +55,9 @@ CONFIG_BASE_FULL=y
45CONFIG_FUTEX=y 55CONFIG_FUTEX=y
46CONFIG_EPOLL=y 56CONFIG_EPOLL=y
47CONFIG_SHMEM=y 57CONFIG_SHMEM=y
48CONFIG_CC_ALIGN_FUNCTIONS=0
49CONFIG_CC_ALIGN_LABELS=0
50CONFIG_CC_ALIGN_LOOPS=0
51CONFIG_CC_ALIGN_JUMPS=0
52CONFIG_SLAB=y 58CONFIG_SLAB=y
59CONFIG_VM_EVENT_COUNTERS=y
60CONFIG_RT_MUTEXES=y
53# CONFIG_TINY_SHMEM is not set 61# CONFIG_TINY_SHMEM is not set
54CONFIG_BASE_SMALL=0 62CONFIG_BASE_SMALL=0
55# CONFIG_SLOB is not set 63# CONFIG_SLOB is not set
@@ -60,41 +68,45 @@ CONFIG_BASE_SMALL=0
60CONFIG_MODULES=y 68CONFIG_MODULES=y
61CONFIG_MODULE_UNLOAD=y 69CONFIG_MODULE_UNLOAD=y
62CONFIG_MODULE_FORCE_UNLOAD=y 70CONFIG_MODULE_FORCE_UNLOAD=y
63CONFIG_OBSOLETE_MODPARM=y
64# CONFIG_MODVERSIONS is not set 71# CONFIG_MODVERSIONS is not set
65# CONFIG_MODULE_SRCVERSION_ALL is not set 72# CONFIG_MODULE_SRCVERSION_ALL is not set
66# CONFIG_KMOD is not set 73# CONFIG_KMOD is not set
74CONFIG_STOP_MACHINE=y
67 75
68# 76#
69# Block layer 77# Block layer
70# 78#
71# CONFIG_LBD is not set 79CONFIG_LBD=y
80# CONFIG_BLK_DEV_IO_TRACE is not set
81# CONFIG_LSF is not set
72 82
73# 83#
74# IO Schedulers 84# IO Schedulers
75# 85#
76CONFIG_IOSCHED_NOOP=y 86CONFIG_IOSCHED_NOOP=y
77# CONFIG_IOSCHED_AS is not set 87CONFIG_IOSCHED_AS=y
78# CONFIG_IOSCHED_DEADLINE is not set 88CONFIG_IOSCHED_DEADLINE=y
79CONFIG_IOSCHED_CFQ=y 89CONFIG_IOSCHED_CFQ=y
80# CONFIG_DEFAULT_AS is not set 90CONFIG_DEFAULT_AS=y
81# CONFIG_DEFAULT_DEADLINE is not set 91# CONFIG_DEFAULT_DEADLINE is not set
82CONFIG_DEFAULT_CFQ=y 92# CONFIG_DEFAULT_CFQ is not set
83# CONFIG_DEFAULT_NOOP is not set 93# CONFIG_DEFAULT_NOOP is not set
84CONFIG_DEFAULT_IOSCHED="cfq" 94CONFIG_DEFAULT_IOSCHED="anticipatory"
85 95
86# 96#
87# Processor type and features 97# Processor type and features
88# 98#
89CONFIG_X86_PC=y 99CONFIG_SMP=y
100# CONFIG_X86_PC is not set
90# CONFIG_X86_ELAN is not set 101# CONFIG_X86_ELAN is not set
91# CONFIG_X86_VOYAGER is not set 102# CONFIG_X86_VOYAGER is not set
92# CONFIG_X86_NUMAQ is not set 103# CONFIG_X86_NUMAQ is not set
93# CONFIG_X86_SUMMIT is not set 104# CONFIG_X86_SUMMIT is not set
94# CONFIG_X86_BIGSMP is not set 105# CONFIG_X86_BIGSMP is not set
95# CONFIG_X86_VISWS is not set 106# CONFIG_X86_VISWS is not set
96# CONFIG_X86_GENERICARCH is not set 107CONFIG_X86_GENERICARCH=y
97# CONFIG_X86_ES7000 is not set 108# CONFIG_X86_ES7000 is not set
109CONFIG_X86_CYCLONE_TIMER=y
98# CONFIG_M386 is not set 110# CONFIG_M386 is not set
99# CONFIG_M486 is not set 111# CONFIG_M486 is not set
100# CONFIG_M586 is not set 112# CONFIG_M586 is not set
@@ -102,11 +114,11 @@ CONFIG_X86_PC=y
102# CONFIG_M586MMX is not set 114# CONFIG_M586MMX is not set
103# CONFIG_M686 is not set 115# CONFIG_M686 is not set
104# CONFIG_MPENTIUMII is not set 116# CONFIG_MPENTIUMII is not set
105# CONFIG_MPENTIUMIII is not set 117CONFIG_MPENTIUMIII=y
106# CONFIG_MPENTIUMM is not set 118# CONFIG_MPENTIUMM is not set
107# CONFIG_MPENTIUM4 is not set 119# CONFIG_MPENTIUM4 is not set
108# CONFIG_MK6 is not set 120# CONFIG_MK6 is not set
109CONFIG_MK7=y 121# CONFIG_MK7 is not set
110# CONFIG_MK8 is not set 122# CONFIG_MK8 is not set
111# CONFIG_MCRUSOE is not set 123# CONFIG_MCRUSOE is not set
112# CONFIG_MEFFICEON is not set 124# CONFIG_MEFFICEON is not set
@@ -117,10 +129,10 @@ CONFIG_MK7=y
117# CONFIG_MGEODE_LX is not set 129# CONFIG_MGEODE_LX is not set
118# CONFIG_MCYRIXIII is not set 130# CONFIG_MCYRIXIII is not set
119# CONFIG_MVIAC3_2 is not set 131# CONFIG_MVIAC3_2 is not set
120# CONFIG_X86_GENERIC is not set 132CONFIG_X86_GENERIC=y
121CONFIG_X86_CMPXCHG=y 133CONFIG_X86_CMPXCHG=y
122CONFIG_X86_XADD=y 134CONFIG_X86_XADD=y
123CONFIG_X86_L1_CACHE_SHIFT=6 135CONFIG_X86_L1_CACHE_SHIFT=7
124CONFIG_RWSEM_XCHGADD_ALGORITHM=y 136CONFIG_RWSEM_XCHGADD_ALGORITHM=y
125CONFIG_GENERIC_CALIBRATE_DELAY=y 137CONFIG_GENERIC_CALIBRATE_DELAY=y
126CONFIG_X86_WP_WORKS_OK=y 138CONFIG_X86_WP_WORKS_OK=y
@@ -131,26 +143,28 @@ CONFIG_X86_CMPXCHG64=y
131CONFIG_X86_GOOD_APIC=y 143CONFIG_X86_GOOD_APIC=y
132CONFIG_X86_INTEL_USERCOPY=y 144CONFIG_X86_INTEL_USERCOPY=y
133CONFIG_X86_USE_PPRO_CHECKSUM=y 145CONFIG_X86_USE_PPRO_CHECKSUM=y
134CONFIG_X86_USE_3DNOW=y
135CONFIG_X86_TSC=y 146CONFIG_X86_TSC=y
136# CONFIG_HPET_TIMER is not set 147CONFIG_HPET_TIMER=y
137# CONFIG_SMP is not set 148CONFIG_HPET_EMULATE_RTC=y
138CONFIG_PREEMPT_NONE=y 149CONFIG_NR_CPUS=32
139# CONFIG_PREEMPT_VOLUNTARY is not set 150CONFIG_SCHED_SMT=y
151CONFIG_SCHED_MC=y
152# CONFIG_PREEMPT_NONE is not set
153CONFIG_PREEMPT_VOLUNTARY=y
140# CONFIG_PREEMPT is not set 154# CONFIG_PREEMPT is not set
141CONFIG_X86_UP_APIC=y 155CONFIG_PREEMPT_BKL=y
142CONFIG_X86_UP_IOAPIC=y
143CONFIG_X86_LOCAL_APIC=y 156CONFIG_X86_LOCAL_APIC=y
144CONFIG_X86_IO_APIC=y 157CONFIG_X86_IO_APIC=y
145CONFIG_X86_MCE=y 158CONFIG_X86_MCE=y
146CONFIG_X86_MCE_NONFATAL=y 159CONFIG_X86_MCE_NONFATAL=y
147# CONFIG_X86_MCE_P4THERMAL is not set 160CONFIG_X86_MCE_P4THERMAL=y
161CONFIG_VM86=y
148# CONFIG_TOSHIBA is not set 162# CONFIG_TOSHIBA is not set
149# CONFIG_I8K is not set 163# CONFIG_I8K is not set
150# CONFIG_X86_REBOOTFIXUPS is not set 164# CONFIG_X86_REBOOTFIXUPS is not set
151# CONFIG_MICROCODE is not set 165CONFIG_MICROCODE=y
152# CONFIG_X86_MSR is not set 166CONFIG_X86_MSR=y
153# CONFIG_X86_CPUID is not set 167CONFIG_X86_CPUID=y
154 168
155# 169#
156# Firmware Drivers 170# Firmware Drivers
@@ -158,68 +172,67 @@ CONFIG_X86_MCE_NONFATAL=y
158# CONFIG_EDD is not set 172# CONFIG_EDD is not set
159# CONFIG_DELL_RBU is not set 173# CONFIG_DELL_RBU is not set
160# CONFIG_DCDBAS is not set 174# CONFIG_DCDBAS is not set
161CONFIG_NOHIGHMEM=y 175# CONFIG_NOHIGHMEM is not set
162# CONFIG_HIGHMEM4G is not set 176CONFIG_HIGHMEM4G=y
163# CONFIG_HIGHMEM64G is not set 177# CONFIG_HIGHMEM64G is not set
164CONFIG_VMSPLIT_3G=y
165# CONFIG_VMSPLIT_3G_OPT is not set
166# CONFIG_VMSPLIT_2G is not set
167# CONFIG_VMSPLIT_1G is not set
168CONFIG_PAGE_OFFSET=0xC0000000 178CONFIG_PAGE_OFFSET=0xC0000000
169CONFIG_ARCH_FLATMEM_ENABLE=y 179CONFIG_HIGHMEM=y
170CONFIG_ARCH_SPARSEMEM_ENABLE=y
171CONFIG_ARCH_SELECT_MEMORY_MODEL=y
172CONFIG_SELECT_MEMORY_MODEL=y 180CONFIG_SELECT_MEMORY_MODEL=y
173CONFIG_FLATMEM_MANUAL=y 181CONFIG_FLATMEM_MANUAL=y
174# CONFIG_DISCONTIGMEM_MANUAL is not set 182# CONFIG_DISCONTIGMEM_MANUAL is not set
175# CONFIG_SPARSEMEM_MANUAL is not set 183# CONFIG_SPARSEMEM_MANUAL is not set
176CONFIG_FLATMEM=y 184CONFIG_FLATMEM=y
177CONFIG_FLAT_NODE_MEM_MAP=y 185CONFIG_FLAT_NODE_MEM_MAP=y
178CONFIG_SPARSEMEM_STATIC=y 186# CONFIG_SPARSEMEM_STATIC is not set
179CONFIG_SPLIT_PTLOCK_CPUS=4 187CONFIG_SPLIT_PTLOCK_CPUS=4
188CONFIG_RESOURCES_64BIT=y
189# CONFIG_HIGHPTE is not set
180# CONFIG_MATH_EMULATION is not set 190# CONFIG_MATH_EMULATION is not set
181CONFIG_MTRR=y 191CONFIG_MTRR=y
182# CONFIG_EFI is not set 192# CONFIG_EFI is not set
193# CONFIG_IRQBALANCE is not set
183CONFIG_REGPARM=y 194CONFIG_REGPARM=y
184# CONFIG_SECCOMP is not set 195CONFIG_SECCOMP=y
185CONFIG_HZ_100=y 196# CONFIG_HZ_100 is not set
186# CONFIG_HZ_250 is not set 197CONFIG_HZ_250=y
187# CONFIG_HZ_1000 is not set 198# CONFIG_HZ_1000 is not set
188CONFIG_HZ=100 199CONFIG_HZ=250
189# CONFIG_KEXEC is not set 200# CONFIG_KEXEC is not set
201# CONFIG_CRASH_DUMP is not set
190CONFIG_PHYSICAL_START=0x100000 202CONFIG_PHYSICAL_START=0x100000
191CONFIG_DOUBLEFAULT=y 203# CONFIG_HOTPLUG_CPU is not set
204CONFIG_COMPAT_VDSO=y
205CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
192 206
193# 207#
194# Power management options (ACPI, APM) 208# Power management options (ACPI, APM)
195# 209#
196CONFIG_PM=y 210CONFIG_PM=y
197# CONFIG_PM_LEGACY is not set 211CONFIG_PM_LEGACY=y
198# CONFIG_PM_DEBUG is not set 212# CONFIG_PM_DEBUG is not set
199CONFIG_SOFTWARE_SUSPEND=y
200CONFIG_PM_STD_PARTITION=""
201 213
202# 214#
203# ACPI (Advanced Configuration and Power Interface) Support 215# ACPI (Advanced Configuration and Power Interface) Support
204# 216#
205CONFIG_ACPI=y 217CONFIG_ACPI=y
206# CONFIG_ACPI_SLEEP is not set 218CONFIG_ACPI_AC=y
207# CONFIG_ACPI_AC is not set 219CONFIG_ACPI_BATTERY=y
208# CONFIG_ACPI_BATTERY is not set 220CONFIG_ACPI_BUTTON=y
209# CONFIG_ACPI_BUTTON is not set
210# CONFIG_ACPI_VIDEO is not set 221# CONFIG_ACPI_VIDEO is not set
211# CONFIG_ACPI_HOTKEY is not set 222# CONFIG_ACPI_HOTKEY is not set
212# CONFIG_ACPI_FAN is not set 223CONFIG_ACPI_FAN=y
213# CONFIG_ACPI_PROCESSOR is not set 224# CONFIG_ACPI_DOCK is not set
225CONFIG_ACPI_PROCESSOR=y
226CONFIG_ACPI_THERMAL=y
214# CONFIG_ACPI_ASUS is not set 227# CONFIG_ACPI_ASUS is not set
215# CONFIG_ACPI_IBM is not set 228# CONFIG_ACPI_IBM is not set
216# CONFIG_ACPI_TOSHIBA is not set 229# CONFIG_ACPI_TOSHIBA is not set
217CONFIG_ACPI_BLACKLIST_YEAR=0 230CONFIG_ACPI_BLACKLIST_YEAR=2001
218# CONFIG_ACPI_DEBUG is not set 231CONFIG_ACPI_DEBUG=y
219CONFIG_ACPI_EC=y 232CONFIG_ACPI_EC=y
220CONFIG_ACPI_POWER=y 233CONFIG_ACPI_POWER=y
221CONFIG_ACPI_SYSTEM=y 234CONFIG_ACPI_SYSTEM=y
222# CONFIG_X86_PM_TIMER is not set 235CONFIG_X86_PM_TIMER=y
223# CONFIG_ACPI_CONTAINER is not set 236# CONFIG_ACPI_CONTAINER is not set
224 237
225# 238#
@@ -230,7 +243,41 @@ CONFIG_ACPI_SYSTEM=y
230# 243#
231# CPU Frequency scaling 244# CPU Frequency scaling
232# 245#
233# CONFIG_CPU_FREQ is not set 246CONFIG_CPU_FREQ=y
247CONFIG_CPU_FREQ_TABLE=y
248CONFIG_CPU_FREQ_DEBUG=y
249CONFIG_CPU_FREQ_STAT=y
250# CONFIG_CPU_FREQ_STAT_DETAILS is not set
251CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
252# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
253CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
254# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
255CONFIG_CPU_FREQ_GOV_USERSPACE=y
256CONFIG_CPU_FREQ_GOV_ONDEMAND=y
257# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
258
259#
260# CPUFreq processor drivers
261#
262CONFIG_X86_ACPI_CPUFREQ=y
263# CONFIG_X86_POWERNOW_K6 is not set
264# CONFIG_X86_POWERNOW_K7 is not set
265CONFIG_X86_POWERNOW_K8=y
266CONFIG_X86_POWERNOW_K8_ACPI=y
267# CONFIG_X86_GX_SUSPMOD is not set
268# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
269# CONFIG_X86_SPEEDSTEP_ICH is not set
270# CONFIG_X86_SPEEDSTEP_SMI is not set
271# CONFIG_X86_P4_CLOCKMOD is not set
272# CONFIG_X86_CPUFREQ_NFORCE2 is not set
273# CONFIG_X86_LONGRUN is not set
274# CONFIG_X86_LONGHAUL is not set
275
276#
277# shared options
278#
279CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y
280# CONFIG_X86_SPEEDSTEP_LIB is not set
234 281
235# 282#
236# Bus options (PCI, PCMCIA, EISA, MCA, ISA) 283# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
@@ -244,12 +291,13 @@ CONFIG_PCI_BIOS=y
244CONFIG_PCI_DIRECT=y 291CONFIG_PCI_DIRECT=y
245CONFIG_PCI_MMCONFIG=y 292CONFIG_PCI_MMCONFIG=y
246# CONFIG_PCIEPORTBUS is not set 293# CONFIG_PCIEPORTBUS is not set
247# CONFIG_PCI_MSI is not set 294CONFIG_PCI_MSI=y
248# CONFIG_PCI_LEGACY_PROC is not set 295# CONFIG_PCI_DEBUG is not set
249CONFIG_ISA_DMA_API=y 296CONFIG_ISA_DMA_API=y
250# CONFIG_ISA is not set 297# CONFIG_ISA is not set
251# CONFIG_MCA is not set 298# CONFIG_MCA is not set
252# CONFIG_SCx200 is not set 299# CONFIG_SCx200 is not set
300CONFIG_K8_NB=y
253 301
254# 302#
255# PCCARD (PCMCIA/CardBus) support 303# PCCARD (PCMCIA/CardBus) support
@@ -278,93 +326,54 @@ CONFIG_NET=y
278# 326#
279# CONFIG_NETDEBUG is not set 327# CONFIG_NETDEBUG is not set
280CONFIG_PACKET=y 328CONFIG_PACKET=y
281CONFIG_PACKET_MMAP=y 329# CONFIG_PACKET_MMAP is not set
282CONFIG_UNIX=y 330CONFIG_UNIX=y
331CONFIG_XFRM=y
332# CONFIG_XFRM_USER is not set
333# CONFIG_XFRM_SUB_POLICY is not set
283# CONFIG_NET_KEY is not set 334# CONFIG_NET_KEY is not set
284CONFIG_INET=y 335CONFIG_INET=y
285# CONFIG_IP_MULTICAST is not set 336CONFIG_IP_MULTICAST=y
286# CONFIG_IP_ADVANCED_ROUTER is not set 337# CONFIG_IP_ADVANCED_ROUTER is not set
287CONFIG_IP_FIB_HASH=y 338CONFIG_IP_FIB_HASH=y
288# CONFIG_IP_PNP is not set 339CONFIG_IP_PNP=y
340CONFIG_IP_PNP_DHCP=y
341# CONFIG_IP_PNP_BOOTP is not set
342# CONFIG_IP_PNP_RARP is not set
289# CONFIG_NET_IPIP is not set 343# CONFIG_NET_IPIP is not set
290# CONFIG_NET_IPGRE is not set 344# CONFIG_NET_IPGRE is not set
345# CONFIG_IP_MROUTE is not set
291# CONFIG_ARPD is not set 346# CONFIG_ARPD is not set
292# CONFIG_SYN_COOKIES is not set 347# CONFIG_SYN_COOKIES is not set
293# CONFIG_INET_AH is not set 348# CONFIG_INET_AH is not set
294# CONFIG_INET_ESP is not set 349# CONFIG_INET_ESP is not set
295# CONFIG_INET_IPCOMP is not set 350# CONFIG_INET_IPCOMP is not set
351# CONFIG_INET_XFRM_TUNNEL is not set
296# CONFIG_INET_TUNNEL is not set 352# CONFIG_INET_TUNNEL is not set
297# CONFIG_INET_DIAG is not set 353CONFIG_INET_XFRM_MODE_TRANSPORT=y
354CONFIG_INET_XFRM_MODE_TUNNEL=y
355CONFIG_INET_DIAG=y
356CONFIG_INET_TCP_DIAG=y
298# CONFIG_TCP_CONG_ADVANCED is not set 357# CONFIG_TCP_CONG_ADVANCED is not set
299CONFIG_TCP_CONG_BIC=y 358CONFIG_TCP_CONG_CUBIC=y
300 359CONFIG_DEFAULT_TCP_CONG="cubic"
301# 360CONFIG_IPV6=y
302# IP: Virtual Server Configuration 361# CONFIG_IPV6_PRIVACY is not set
303# 362# CONFIG_IPV6_ROUTER_PREF is not set
304# CONFIG_IP_VS is not set 363# CONFIG_INET6_AH is not set
305# CONFIG_IPV6 is not set 364# CONFIG_INET6_ESP is not set
306CONFIG_NETFILTER=y 365# CONFIG_INET6_IPCOMP is not set
307# CONFIG_NETFILTER_DEBUG is not set 366# CONFIG_IPV6_MIP6 is not set
308 367# CONFIG_INET6_XFRM_TUNNEL is not set
309# 368# CONFIG_INET6_TUNNEL is not set
310# Core Netfilter Configuration 369CONFIG_INET6_XFRM_MODE_TRANSPORT=y
311# 370CONFIG_INET6_XFRM_MODE_TUNNEL=y
312# CONFIG_NETFILTER_NETLINK is not set 371# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
313CONFIG_NETFILTER_XTABLES=y 372# CONFIG_IPV6_TUNNEL is not set
314# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set 373# CONFIG_IPV6_SUBTREES is not set
315# CONFIG_NETFILTER_XT_TARGET_MARK is not set 374# CONFIG_IPV6_MULTIPLE_TABLES is not set
316# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set 375# CONFIG_NETWORK_SECMARK is not set
317# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set 376# CONFIG_NETFILTER is not set
318# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set
319# CONFIG_NETFILTER_XT_MATCH_DCCP is not set
320# CONFIG_NETFILTER_XT_MATCH_HELPER is not set
321# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set
322CONFIG_NETFILTER_XT_MATCH_LIMIT=y
323CONFIG_NETFILTER_XT_MATCH_MAC=y
324# CONFIG_NETFILTER_XT_MATCH_MARK is not set
325# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set
326# CONFIG_NETFILTER_XT_MATCH_REALM is not set
327# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
328CONFIG_NETFILTER_XT_MATCH_STATE=y
329# CONFIG_NETFILTER_XT_MATCH_STRING is not set
330# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set
331
332#
333# IP: Netfilter Configuration
334#
335CONFIG_IP_NF_CONNTRACK=y
336# CONFIG_IP_NF_CT_ACCT is not set
337# CONFIG_IP_NF_CONNTRACK_MARK is not set
338# CONFIG_IP_NF_CONNTRACK_EVENTS is not set
339# CONFIG_IP_NF_CT_PROTO_SCTP is not set
340CONFIG_IP_NF_FTP=y
341# CONFIG_IP_NF_IRC is not set
342# CONFIG_IP_NF_NETBIOS_NS is not set
343# CONFIG_IP_NF_TFTP is not set
344# CONFIG_IP_NF_AMANDA is not set
345# CONFIG_IP_NF_PPTP is not set
346# CONFIG_IP_NF_QUEUE is not set
347CONFIG_IP_NF_IPTABLES=y
348# CONFIG_IP_NF_MATCH_IPRANGE is not set
349# CONFIG_IP_NF_MATCH_MULTIPORT is not set
350# CONFIG_IP_NF_MATCH_TOS is not set
351# CONFIG_IP_NF_MATCH_RECENT is not set
352# CONFIG_IP_NF_MATCH_ECN is not set
353# CONFIG_IP_NF_MATCH_DSCP is not set
354# CONFIG_IP_NF_MATCH_AH_ESP is not set
355# CONFIG_IP_NF_MATCH_TTL is not set
356# CONFIG_IP_NF_MATCH_OWNER is not set
357# CONFIG_IP_NF_MATCH_ADDRTYPE is not set
358# CONFIG_IP_NF_MATCH_HASHLIMIT is not set
359CONFIG_IP_NF_FILTER=y
360# CONFIG_IP_NF_TARGET_REJECT is not set
361CONFIG_IP_NF_TARGET_LOG=y
362# CONFIG_IP_NF_TARGET_ULOG is not set
363# CONFIG_IP_NF_TARGET_TCPMSS is not set
364# CONFIG_IP_NF_NAT is not set
365# CONFIG_IP_NF_MANGLE is not set
366# CONFIG_IP_NF_RAW is not set
367# CONFIG_IP_NF_ARPTABLES is not set
368 377
369# 378#
370# DCCP Configuration (EXPERIMENTAL) 379# DCCP Configuration (EXPERIMENTAL)
@@ -389,7 +398,6 @@ CONFIG_IP_NF_TARGET_LOG=y
389# CONFIG_ATALK is not set 398# CONFIG_ATALK is not set
390# CONFIG_X25 is not set 399# CONFIG_X25 is not set
391# CONFIG_LAPB is not set 400# CONFIG_LAPB is not set
392# CONFIG_NET_DIVERT is not set
393# CONFIG_ECONET is not set 401# CONFIG_ECONET is not set
394# CONFIG_WAN_ROUTER is not set 402# CONFIG_WAN_ROUTER is not set
395 403
@@ -402,6 +410,7 @@ CONFIG_IP_NF_TARGET_LOG=y
402# Network testing 410# Network testing
403# 411#
404# CONFIG_NET_PKTGEN is not set 412# CONFIG_NET_PKTGEN is not set
413# CONFIG_NET_TCPPROBE is not set
405# CONFIG_HAMRADIO is not set 414# CONFIG_HAMRADIO is not set
406# CONFIG_IRDA is not set 415# CONFIG_IRDA is not set
407# CONFIG_BT is not set 416# CONFIG_BT is not set
@@ -416,7 +425,9 @@ CONFIG_IP_NF_TARGET_LOG=y
416# 425#
417CONFIG_STANDALONE=y 426CONFIG_STANDALONE=y
418CONFIG_PREVENT_FIRMWARE_BUILD=y 427CONFIG_PREVENT_FIRMWARE_BUILD=y
419# CONFIG_FW_LOADER is not set 428CONFIG_FW_LOADER=y
429# CONFIG_DEBUG_DRIVER is not set
430# CONFIG_SYS_HYPERVISOR is not set
420 431
421# 432#
422# Connector - unified userspace <-> kernelspace linker 433# Connector - unified userspace <-> kernelspace linker
@@ -431,13 +442,7 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y
431# 442#
432# Parallel port support 443# Parallel port support
433# 444#
434CONFIG_PARPORT=y 445# CONFIG_PARPORT is not set
435CONFIG_PARPORT_PC=y
436# CONFIG_PARPORT_SERIAL is not set
437# CONFIG_PARPORT_PC_FIFO is not set
438# CONFIG_PARPORT_PC_SUPERIO is not set
439# CONFIG_PARPORT_GSC is not set
440CONFIG_PARPORT_1284=y
441 446
442# 447#
443# Plug and Play support 448# Plug and Play support
@@ -447,8 +452,7 @@ CONFIG_PARPORT_1284=y
447# 452#
448# Block devices 453# Block devices
449# 454#
450# CONFIG_BLK_DEV_FD is not set 455CONFIG_BLK_DEV_FD=y
451# CONFIG_PARIDE is not set
452# CONFIG_BLK_CPQ_DA is not set 456# CONFIG_BLK_CPQ_DA is not set
453# CONFIG_BLK_CPQ_CISS_DA is not set 457# CONFIG_BLK_CPQ_CISS_DA is not set
454# CONFIG_BLK_DEV_DAC960 is not set 458# CONFIG_BLK_DEV_DAC960 is not set
@@ -459,8 +463,11 @@ CONFIG_BLK_DEV_LOOP=y
459# CONFIG_BLK_DEV_NBD is not set 463# CONFIG_BLK_DEV_NBD is not set
460# CONFIG_BLK_DEV_SX8 is not set 464# CONFIG_BLK_DEV_SX8 is not set
461# CONFIG_BLK_DEV_UB is not set 465# CONFIG_BLK_DEV_UB is not set
462# CONFIG_BLK_DEV_RAM is not set 466CONFIG_BLK_DEV_RAM=y
463CONFIG_BLK_DEV_RAM_COUNT=16 467CONFIG_BLK_DEV_RAM_COUNT=16
468CONFIG_BLK_DEV_RAM_SIZE=4096
469CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
470CONFIG_BLK_DEV_INITRD=y
464# CONFIG_CDROM_PKTCDVD is not set 471# CONFIG_CDROM_PKTCDVD is not set
465# CONFIG_ATA_OVER_ETH is not set 472# CONFIG_ATA_OVER_ETH is not set
466 473
@@ -476,7 +483,7 @@ CONFIG_BLK_DEV_IDE=y
476# CONFIG_BLK_DEV_IDE_SATA is not set 483# CONFIG_BLK_DEV_IDE_SATA is not set
477# CONFIG_BLK_DEV_HD_IDE is not set 484# CONFIG_BLK_DEV_HD_IDE is not set
478CONFIG_BLK_DEV_IDEDISK=y 485CONFIG_BLK_DEV_IDEDISK=y
479# CONFIG_IDEDISK_MULTI_MODE is not set 486CONFIG_IDEDISK_MULTI_MODE=y
480CONFIG_BLK_DEV_IDECD=y 487CONFIG_BLK_DEV_IDECD=y
481# CONFIG_BLK_DEV_IDETAPE is not set 488# CONFIG_BLK_DEV_IDETAPE is not set
482# CONFIG_BLK_DEV_IDEFLOPPY is not set 489# CONFIG_BLK_DEV_IDEFLOPPY is not set
@@ -486,10 +493,10 @@ CONFIG_BLK_DEV_IDECD=y
486# 493#
487# IDE chipset support/bugfixes 494# IDE chipset support/bugfixes
488# 495#
489# CONFIG_IDE_GENERIC is not set 496CONFIG_IDE_GENERIC=y
490# CONFIG_BLK_DEV_CMD640 is not set 497# CONFIG_BLK_DEV_CMD640 is not set
491CONFIG_BLK_DEV_IDEPCI=y 498CONFIG_BLK_DEV_IDEPCI=y
492CONFIG_IDEPCI_SHARE_IRQ=y 499# CONFIG_IDEPCI_SHARE_IRQ is not set
493# CONFIG_BLK_DEV_OFFBOARD is not set 500# CONFIG_BLK_DEV_OFFBOARD is not set
494# CONFIG_BLK_DEV_GENERIC is not set 501# CONFIG_BLK_DEV_GENERIC is not set
495# CONFIG_BLK_DEV_OPTI621 is not set 502# CONFIG_BLK_DEV_OPTI621 is not set
@@ -500,7 +507,7 @@ CONFIG_IDEDMA_PCI_AUTO=y
500# CONFIG_IDEDMA_ONLYDISK is not set 507# CONFIG_IDEDMA_ONLYDISK is not set
501# CONFIG_BLK_DEV_AEC62XX is not set 508# CONFIG_BLK_DEV_AEC62XX is not set
502# CONFIG_BLK_DEV_ALI15X3 is not set 509# CONFIG_BLK_DEV_ALI15X3 is not set
503# CONFIG_BLK_DEV_AMD74XX is not set 510CONFIG_BLK_DEV_AMD74XX=y
504# CONFIG_BLK_DEV_ATIIXP is not set 511# CONFIG_BLK_DEV_ATIIXP is not set
505# CONFIG_BLK_DEV_CMD64X is not set 512# CONFIG_BLK_DEV_CMD64X is not set
506# CONFIG_BLK_DEV_TRIFLEX is not set 513# CONFIG_BLK_DEV_TRIFLEX is not set
@@ -511,7 +518,7 @@ CONFIG_IDEDMA_PCI_AUTO=y
511# CONFIG_BLK_DEV_HPT34X is not set 518# CONFIG_BLK_DEV_HPT34X is not set
512# CONFIG_BLK_DEV_HPT366 is not set 519# CONFIG_BLK_DEV_HPT366 is not set
513# CONFIG_BLK_DEV_SC1200 is not set 520# CONFIG_BLK_DEV_SC1200 is not set
514# CONFIG_BLK_DEV_PIIX is not set 521CONFIG_BLK_DEV_PIIX=y
515# CONFIG_BLK_DEV_IT821X is not set 522# CONFIG_BLK_DEV_IT821X is not set
516# CONFIG_BLK_DEV_NS87415 is not set 523# CONFIG_BLK_DEV_NS87415 is not set
517# CONFIG_BLK_DEV_PDC202XX_OLD is not set 524# CONFIG_BLK_DEV_PDC202XX_OLD is not set
@@ -521,7 +528,7 @@ CONFIG_IDEDMA_PCI_AUTO=y
521# CONFIG_BLK_DEV_SIS5513 is not set 528# CONFIG_BLK_DEV_SIS5513 is not set
522# CONFIG_BLK_DEV_SLC90E66 is not set 529# CONFIG_BLK_DEV_SLC90E66 is not set
523# CONFIG_BLK_DEV_TRM290 is not set 530# CONFIG_BLK_DEV_TRM290 is not set
524CONFIG_BLK_DEV_VIA82CXXX=y 531# CONFIG_BLK_DEV_VIA82CXXX is not set
525# CONFIG_IDE_ARM is not set 532# CONFIG_IDE_ARM is not set
526CONFIG_BLK_DEV_IDEDMA=y 533CONFIG_BLK_DEV_IDEDMA=y
527# CONFIG_IDEDMA_IVB is not set 534# CONFIG_IDEDMA_IVB is not set
@@ -533,6 +540,7 @@ CONFIG_IDEDMA_AUTO=y
533# 540#
534# CONFIG_RAID_ATTRS is not set 541# CONFIG_RAID_ATTRS is not set
535CONFIG_SCSI=y 542CONFIG_SCSI=y
543CONFIG_SCSI_NETLINK=y
536# CONFIG_SCSI_PROC_FS is not set 544# CONFIG_SCSI_PROC_FS is not set
537 545
538# 546#
@@ -541,8 +549,9 @@ CONFIG_SCSI=y
541CONFIG_BLK_DEV_SD=y 549CONFIG_BLK_DEV_SD=y
542# CONFIG_CHR_DEV_ST is not set 550# CONFIG_CHR_DEV_ST is not set
543# CONFIG_CHR_DEV_OSST is not set 551# CONFIG_CHR_DEV_OSST is not set
544# CONFIG_BLK_DEV_SR is not set 552CONFIG_BLK_DEV_SR=y
545# CONFIG_CHR_DEV_SG is not set 553# CONFIG_BLK_DEV_SR_VENDOR is not set
554CONFIG_CHR_DEV_SG=y
546# CONFIG_CHR_DEV_SCH is not set 555# CONFIG_CHR_DEV_SCH is not set
547 556
548# 557#
@@ -553,29 +562,44 @@ CONFIG_BLK_DEV_SD=y
553# CONFIG_SCSI_LOGGING is not set 562# CONFIG_SCSI_LOGGING is not set
554 563
555# 564#
556# SCSI Transport Attributes 565# SCSI Transports
557# 566#
558# CONFIG_SCSI_SPI_ATTRS is not set 567CONFIG_SCSI_SPI_ATTRS=y
559# CONFIG_SCSI_FC_ATTRS is not set 568CONFIG_SCSI_FC_ATTRS=y
560# CONFIG_SCSI_ISCSI_ATTRS is not set 569# CONFIG_SCSI_ISCSI_ATTRS is not set
561# CONFIG_SCSI_SAS_ATTRS is not set 570# CONFIG_SCSI_SAS_ATTRS is not set
571# CONFIG_SCSI_SAS_LIBSAS is not set
562 572
563# 573#
564# SCSI low-level drivers 574# SCSI low-level drivers
565# 575#
566# CONFIG_ISCSI_TCP is not set 576# CONFIG_ISCSI_TCP is not set
567# CONFIG_BLK_DEV_3W_XXXX_RAID is not set 577CONFIG_BLK_DEV_3W_XXXX_RAID=y
568# CONFIG_SCSI_3W_9XXX is not set 578# CONFIG_SCSI_3W_9XXX is not set
569# CONFIG_SCSI_ACARD is not set 579# CONFIG_SCSI_ACARD is not set
570# CONFIG_SCSI_AACRAID is not set 580# CONFIG_SCSI_AACRAID is not set
571# CONFIG_SCSI_AIC7XXX is not set 581CONFIG_SCSI_AIC7XXX=y
582CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
583CONFIG_AIC7XXX_RESET_DELAY_MS=5000
584CONFIG_AIC7XXX_DEBUG_ENABLE=y
585CONFIG_AIC7XXX_DEBUG_MASK=0
586CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
572# CONFIG_SCSI_AIC7XXX_OLD is not set 587# CONFIG_SCSI_AIC7XXX_OLD is not set
573# CONFIG_SCSI_AIC79XX is not set 588CONFIG_SCSI_AIC79XX=y
589CONFIG_AIC79XX_CMDS_PER_DEVICE=32
590CONFIG_AIC79XX_RESET_DELAY_MS=4000
591# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
592# CONFIG_AIC79XX_DEBUG_ENABLE is not set
593CONFIG_AIC79XX_DEBUG_MASK=0
594# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
595# CONFIG_SCSI_AIC94XX is not set
574# CONFIG_SCSI_DPT_I2O is not set 596# CONFIG_SCSI_DPT_I2O is not set
597# CONFIG_SCSI_ADVANSYS is not set
598# CONFIG_SCSI_ARCMSR is not set
575# CONFIG_MEGARAID_NEWGEN is not set 599# CONFIG_MEGARAID_NEWGEN is not set
576# CONFIG_MEGARAID_LEGACY is not set 600# CONFIG_MEGARAID_LEGACY is not set
577# CONFIG_MEGARAID_SAS is not set 601# CONFIG_MEGARAID_SAS is not set
578# CONFIG_SCSI_SATA is not set 602# CONFIG_SCSI_HPTIOP is not set
579# CONFIG_SCSI_BUSLOGIC is not set 603# CONFIG_SCSI_BUSLOGIC is not set
580# CONFIG_SCSI_DMX3191D is not set 604# CONFIG_SCSI_DMX3191D is not set
581# CONFIG_SCSI_EATA is not set 605# CONFIG_SCSI_EATA is not set
@@ -584,11 +608,9 @@ CONFIG_BLK_DEV_SD=y
584# CONFIG_SCSI_IPS is not set 608# CONFIG_SCSI_IPS is not set
585# CONFIG_SCSI_INITIO is not set 609# CONFIG_SCSI_INITIO is not set
586# CONFIG_SCSI_INIA100 is not set 610# CONFIG_SCSI_INIA100 is not set
587# CONFIG_SCSI_PPA is not set 611# CONFIG_SCSI_STEX is not set
588# CONFIG_SCSI_IMM is not set
589# CONFIG_SCSI_SYM53C8XX_2 is not set 612# CONFIG_SCSI_SYM53C8XX_2 is not set
590# CONFIG_SCSI_IPR is not set 613# CONFIG_SCSI_IPR is not set
591# CONFIG_SCSI_QLOGIC_FC is not set
592# CONFIG_SCSI_QLOGIC_1280 is not set 614# CONFIG_SCSI_QLOGIC_1280 is not set
593# CONFIG_SCSI_QLA_FC is not set 615# CONFIG_SCSI_QLA_FC is not set
594# CONFIG_SCSI_LPFC is not set 616# CONFIG_SCSI_LPFC is not set
@@ -598,22 +620,114 @@ CONFIG_BLK_DEV_SD=y
598# CONFIG_SCSI_DEBUG is not set 620# CONFIG_SCSI_DEBUG is not set
599 621
600# 622#
623# Serial ATA (prod) and Parallel ATA (experimental) drivers
624#
625CONFIG_ATA=y
626CONFIG_SATA_AHCI=y
627CONFIG_SATA_SVW=y
628CONFIG_ATA_PIIX=y
629# CONFIG_SATA_MV is not set
630CONFIG_SATA_NV=y
631# CONFIG_PDC_ADMA is not set
632# CONFIG_SATA_QSTOR is not set
633# CONFIG_SATA_PROMISE is not set
634# CONFIG_SATA_SX4 is not set
635CONFIG_SATA_SIL=y
636# CONFIG_SATA_SIL24 is not set
637# CONFIG_SATA_SIS is not set
638# CONFIG_SATA_ULI is not set
639CONFIG_SATA_VIA=y
640# CONFIG_SATA_VITESSE is not set
641CONFIG_SATA_INTEL_COMBINED=y
642# CONFIG_PATA_ALI is not set
643# CONFIG_PATA_AMD is not set
644# CONFIG_PATA_ARTOP is not set
645# CONFIG_PATA_ATIIXP is not set
646# CONFIG_PATA_CMD64X is not set
647# CONFIG_PATA_CS5520 is not set
648# CONFIG_PATA_CS5530 is not set
649# CONFIG_PATA_CS5535 is not set
650# CONFIG_PATA_CYPRESS is not set
651# CONFIG_PATA_EFAR is not set
652# CONFIG_ATA_GENERIC is not set
653# CONFIG_PATA_HPT366 is not set
654# CONFIG_PATA_HPT37X is not set
655# CONFIG_PATA_HPT3X2N is not set
656# CONFIG_PATA_HPT3X3 is not set
657# CONFIG_PATA_IT821X is not set
658# CONFIG_PATA_JMICRON is not set
659# CONFIG_PATA_LEGACY is not set
660# CONFIG_PATA_TRIFLEX is not set
661# CONFIG_PATA_MPIIX is not set
662# CONFIG_PATA_OLDPIIX is not set
663# CONFIG_PATA_NETCELL is not set
664# CONFIG_PATA_NS87410 is not set
665# CONFIG_PATA_OPTI is not set
666# CONFIG_PATA_OPTIDMA is not set
667# CONFIG_PATA_PDC_OLD is not set
668# CONFIG_PATA_QDI is not set
669# CONFIG_PATA_RADISYS is not set
670# CONFIG_PATA_RZ1000 is not set
671# CONFIG_PATA_SC1200 is not set
672# CONFIG_PATA_SERVERWORKS is not set
673# CONFIG_PATA_PDC2027X is not set
674# CONFIG_PATA_SIL680 is not set
675# CONFIG_PATA_SIS is not set
676# CONFIG_PATA_VIA is not set
677# CONFIG_PATA_WINBOND is not set
678
679#
601# Multi-device support (RAID and LVM) 680# Multi-device support (RAID and LVM)
602# 681#
603# CONFIG_MD is not set 682CONFIG_MD=y
683# CONFIG_BLK_DEV_MD is not set
684CONFIG_BLK_DEV_DM=y
685# CONFIG_DM_CRYPT is not set
686# CONFIG_DM_SNAPSHOT is not set
687# CONFIG_DM_MIRROR is not set
688# CONFIG_DM_ZERO is not set
689# CONFIG_DM_MULTIPATH is not set
604 690
605# 691#
606# Fusion MPT device support 692# Fusion MPT device support
607# 693#
608# CONFIG_FUSION is not set 694CONFIG_FUSION=y
609# CONFIG_FUSION_SPI is not set 695CONFIG_FUSION_SPI=y
610# CONFIG_FUSION_FC is not set 696# CONFIG_FUSION_FC is not set
611# CONFIG_FUSION_SAS is not set 697# CONFIG_FUSION_SAS is not set
698CONFIG_FUSION_MAX_SGE=128
699# CONFIG_FUSION_CTL is not set
612 700
613# 701#
614# IEEE 1394 (FireWire) support 702# IEEE 1394 (FireWire) support
615# 703#
616# CONFIG_IEEE1394 is not set 704CONFIG_IEEE1394=y
705
706#
707# Subsystem Options
708#
709# CONFIG_IEEE1394_VERBOSEDEBUG is not set
710# CONFIG_IEEE1394_OUI_DB is not set
711# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
712# CONFIG_IEEE1394_EXPORT_FULL_API is not set
713
714#
715# Device Drivers
716#
717
718#
719# Texas Instruments PCILynx requires I2C
720#
721CONFIG_IEEE1394_OHCI1394=y
722
723#
724# Protocol Drivers
725#
726# CONFIG_IEEE1394_VIDEO1394 is not set
727# CONFIG_IEEE1394_SBP2 is not set
728# CONFIG_IEEE1394_ETH1394 is not set
729# CONFIG_IEEE1394_DV1394 is not set
730CONFIG_IEEE1394_RAWIO=y
617 731
618# 732#
619# I2O device support 733# I2O device support
@@ -652,46 +766,63 @@ CONFIG_MII=y
652# 766#
653# Tulip family network device support 767# Tulip family network device support
654# 768#
655# CONFIG_NET_TULIP is not set 769CONFIG_NET_TULIP=y
770# CONFIG_DE2104X is not set
771CONFIG_TULIP=y
772# CONFIG_TULIP_MWI is not set
773# CONFIG_TULIP_MMIO is not set
774# CONFIG_TULIP_NAPI is not set
775# CONFIG_DE4X5 is not set
776# CONFIG_WINBOND_840 is not set
777# CONFIG_DM9102 is not set
778# CONFIG_ULI526X is not set
656# CONFIG_HP100 is not set 779# CONFIG_HP100 is not set
657CONFIG_NET_PCI=y 780CONFIG_NET_PCI=y
658# CONFIG_PCNET32 is not set 781# CONFIG_PCNET32 is not set
659# CONFIG_AMD8111_ETH is not set 782# CONFIG_AMD8111_ETH is not set
660# CONFIG_ADAPTEC_STARFIRE is not set 783# CONFIG_ADAPTEC_STARFIRE is not set
661# CONFIG_B44 is not set 784CONFIG_B44=y
662# CONFIG_FORCEDETH is not set 785CONFIG_FORCEDETH=y
786# CONFIG_FORCEDETH_NAPI is not set
663# CONFIG_DGRS is not set 787# CONFIG_DGRS is not set
664# CONFIG_EEPRO100 is not set 788# CONFIG_EEPRO100 is not set
665CONFIG_E100=y 789CONFIG_E100=y
666# CONFIG_FEALNX is not set 790# CONFIG_FEALNX is not set
667# CONFIG_NATSEMI is not set 791# CONFIG_NATSEMI is not set
668# CONFIG_NE2K_PCI is not set 792# CONFIG_NE2K_PCI is not set
669# CONFIG_8139CP is not set 793CONFIG_8139CP=y
670# CONFIG_8139TOO is not set 794CONFIG_8139TOO=y
795# CONFIG_8139TOO_PIO is not set
796# CONFIG_8139TOO_TUNE_TWISTER is not set
797# CONFIG_8139TOO_8129 is not set
798# CONFIG_8139_OLD_RX_RESET is not set
671# CONFIG_SIS900 is not set 799# CONFIG_SIS900 is not set
672# CONFIG_EPIC100 is not set 800# CONFIG_EPIC100 is not set
673# CONFIG_SUNDANCE is not set 801# CONFIG_SUNDANCE is not set
674# CONFIG_TLAN is not set 802# CONFIG_TLAN is not set
675# CONFIG_VIA_RHINE is not set 803# CONFIG_VIA_RHINE is not set
676# CONFIG_NET_POCKET is not set
677 804
678# 805#
679# Ethernet (1000 Mbit) 806# Ethernet (1000 Mbit)
680# 807#
681# CONFIG_ACENIC is not set 808# CONFIG_ACENIC is not set
682# CONFIG_DL2K is not set 809# CONFIG_DL2K is not set
683# CONFIG_E1000 is not set 810CONFIG_E1000=y
811# CONFIG_E1000_NAPI is not set
812# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
684# CONFIG_NS83820 is not set 813# CONFIG_NS83820 is not set
685# CONFIG_HAMACHI is not set 814# CONFIG_HAMACHI is not set
686# CONFIG_YELLOWFIN is not set 815# CONFIG_YELLOWFIN is not set
687# CONFIG_R8169 is not set 816CONFIG_R8169=y
817# CONFIG_R8169_NAPI is not set
688# CONFIG_SIS190 is not set 818# CONFIG_SIS190 is not set
689# CONFIG_SKGE is not set 819# CONFIG_SKGE is not set
690# CONFIG_SKY2 is not set 820CONFIG_SKY2=y
691# CONFIG_SK98LIN is not set 821# CONFIG_SK98LIN is not set
692# CONFIG_VIA_VELOCITY is not set 822# CONFIG_VIA_VELOCITY is not set
693# CONFIG_TIGON3 is not set 823CONFIG_TIGON3=y
694# CONFIG_BNX2 is not set 824CONFIG_BNX2=y
825# CONFIG_QLA3XXX is not set
695 826
696# 827#
697# Ethernet (10000 Mbit) 828# Ethernet (10000 Mbit)
@@ -699,6 +830,7 @@ CONFIG_E100=y
699# CONFIG_CHELSIO_T1 is not set 830# CONFIG_CHELSIO_T1 is not set
700# CONFIG_IXGB is not set 831# CONFIG_IXGB is not set
701# CONFIG_S2IO is not set 832# CONFIG_S2IO is not set
833# CONFIG_MYRI10GE is not set
702 834
703# 835#
704# Token Ring devices 836# Token Ring devices
@@ -716,14 +848,15 @@ CONFIG_E100=y
716# CONFIG_WAN is not set 848# CONFIG_WAN is not set
717# CONFIG_FDDI is not set 849# CONFIG_FDDI is not set
718# CONFIG_HIPPI is not set 850# CONFIG_HIPPI is not set
719# CONFIG_PLIP is not set
720# CONFIG_PPP is not set 851# CONFIG_PPP is not set
721# CONFIG_SLIP is not set 852# CONFIG_SLIP is not set
722# CONFIG_NET_FC is not set 853# CONFIG_NET_FC is not set
723# CONFIG_SHAPER is not set 854# CONFIG_SHAPER is not set
724# CONFIG_NETCONSOLE is not set 855CONFIG_NETCONSOLE=y
725# CONFIG_NETPOLL is not set 856CONFIG_NETPOLL=y
726# CONFIG_NET_POLL_CONTROLLER is not set 857# CONFIG_NETPOLL_RX is not set
858# CONFIG_NETPOLL_TRAP is not set
859CONFIG_NET_POLL_CONTROLLER=y
727 860
728# 861#
729# ISDN subsystem 862# ISDN subsystem
@@ -745,8 +878,8 @@ CONFIG_INPUT=y
745# 878#
746CONFIG_INPUT_MOUSEDEV=y 879CONFIG_INPUT_MOUSEDEV=y
747CONFIG_INPUT_MOUSEDEV_PSAUX=y 880CONFIG_INPUT_MOUSEDEV_PSAUX=y
748CONFIG_INPUT_MOUSEDEV_SCREEN_X=1280 881CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
749CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1024 882CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
750# CONFIG_INPUT_JOYDEV is not set 883# CONFIG_INPUT_JOYDEV is not set
751# CONFIG_INPUT_TSDEV is not set 884# CONFIG_INPUT_TSDEV is not set
752CONFIG_INPUT_EVDEV=y 885CONFIG_INPUT_EVDEV=y
@@ -776,7 +909,6 @@ CONFIG_SERIO=y
776CONFIG_SERIO_I8042=y 909CONFIG_SERIO_I8042=y
777# CONFIG_SERIO_SERPORT is not set 910# CONFIG_SERIO_SERPORT is not set
778# CONFIG_SERIO_CT82C710 is not set 911# CONFIG_SERIO_CT82C710 is not set
779# CONFIG_SERIO_PARKBD is not set
780# CONFIG_SERIO_PCIPS2 is not set 912# CONFIG_SERIO_PCIPS2 is not set
781CONFIG_SERIO_LIBPS2=y 913CONFIG_SERIO_LIBPS2=y
782# CONFIG_SERIO_RAW is not set 914# CONFIG_SERIO_RAW is not set
@@ -788,14 +920,15 @@ CONFIG_SERIO_LIBPS2=y
788CONFIG_VT=y 920CONFIG_VT=y
789CONFIG_VT_CONSOLE=y 921CONFIG_VT_CONSOLE=y
790CONFIG_HW_CONSOLE=y 922CONFIG_HW_CONSOLE=y
923# CONFIG_VT_HW_CONSOLE_BINDING is not set
791# CONFIG_SERIAL_NONSTANDARD is not set 924# CONFIG_SERIAL_NONSTANDARD is not set
792 925
793# 926#
794# Serial drivers 927# Serial drivers
795# 928#
796CONFIG_SERIAL_8250=y 929CONFIG_SERIAL_8250=y
797# CONFIG_SERIAL_8250_CONSOLE is not set 930CONFIG_SERIAL_8250_CONSOLE=y
798# CONFIG_SERIAL_8250_ACPI is not set 931CONFIG_SERIAL_8250_PCI=y
799CONFIG_SERIAL_8250_NR_UARTS=4 932CONFIG_SERIAL_8250_NR_UARTS=4
800CONFIG_SERIAL_8250_RUNTIME_UARTS=4 933CONFIG_SERIAL_8250_RUNTIME_UARTS=4
801# CONFIG_SERIAL_8250_EXTENDED is not set 934# CONFIG_SERIAL_8250_EXTENDED is not set
@@ -804,14 +937,11 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=4
804# Non-8250 serial port support 937# Non-8250 serial port support
805# 938#
806CONFIG_SERIAL_CORE=y 939CONFIG_SERIAL_CORE=y
940CONFIG_SERIAL_CORE_CONSOLE=y
807# CONFIG_SERIAL_JSM is not set 941# CONFIG_SERIAL_JSM is not set
808CONFIG_UNIX98_PTYS=y 942CONFIG_UNIX98_PTYS=y
809CONFIG_LEGACY_PTYS=y 943CONFIG_LEGACY_PTYS=y
810CONFIG_LEGACY_PTY_COUNT=256 944CONFIG_LEGACY_PTY_COUNT=256
811CONFIG_PRINTER=y
812# CONFIG_LP_CONSOLE is not set
813# CONFIG_PPDEV is not set
814# CONFIG_TIPAR is not set
815 945
816# 946#
817# IPMI 947# IPMI
@@ -822,8 +952,12 @@ CONFIG_PRINTER=y
822# Watchdog Cards 952# Watchdog Cards
823# 953#
824# CONFIG_WATCHDOG is not set 954# CONFIG_WATCHDOG is not set
825# CONFIG_HW_RANDOM is not set 955CONFIG_HW_RANDOM=y
826CONFIG_NVRAM=y 956CONFIG_HW_RANDOM_INTEL=y
957CONFIG_HW_RANDOM_AMD=y
958CONFIG_HW_RANDOM_GEODE=y
959CONFIG_HW_RANDOM_VIA=y
960# CONFIG_NVRAM is not set
827CONFIG_RTC=y 961CONFIG_RTC=y
828# CONFIG_DTLK is not set 962# CONFIG_DTLK is not set
829# CONFIG_R3964 is not set 963# CONFIG_R3964 is not set
@@ -833,31 +967,28 @@ CONFIG_RTC=y
833# 967#
834# Ftape, the floppy tape device driver 968# Ftape, the floppy tape device driver
835# 969#
836# CONFIG_FTAPE is not set
837CONFIG_AGP=y 970CONFIG_AGP=y
838# CONFIG_AGP_ALI is not set 971# CONFIG_AGP_ALI is not set
839# CONFIG_AGP_ATI is not set 972# CONFIG_AGP_ATI is not set
840# CONFIG_AGP_AMD is not set 973# CONFIG_AGP_AMD is not set
841# CONFIG_AGP_AMD64 is not set 974CONFIG_AGP_AMD64=y
842# CONFIG_AGP_INTEL is not set 975CONFIG_AGP_INTEL=y
843# CONFIG_AGP_NVIDIA is not set 976# CONFIG_AGP_NVIDIA is not set
844# CONFIG_AGP_SIS is not set 977# CONFIG_AGP_SIS is not set
845# CONFIG_AGP_SWORKS is not set 978# CONFIG_AGP_SWORKS is not set
846CONFIG_AGP_VIA=y 979# CONFIG_AGP_VIA is not set
847# CONFIG_AGP_EFFICEON is not set 980# CONFIG_AGP_EFFICEON is not set
848CONFIG_DRM=y 981# CONFIG_DRM is not set
849# CONFIG_DRM_TDFX is not set
850# CONFIG_DRM_R128 is not set
851CONFIG_DRM_RADEON=y
852# CONFIG_DRM_MGA is not set
853# CONFIG_DRM_SIS is not set
854# CONFIG_DRM_VIA is not set
855# CONFIG_DRM_SAVAGE is not set
856# CONFIG_MWAVE is not set 982# CONFIG_MWAVE is not set
983# CONFIG_PC8736x_GPIO is not set
984# CONFIG_NSC_GPIO is not set
857# CONFIG_CS5535_GPIO is not set 985# CONFIG_CS5535_GPIO is not set
858# CONFIG_RAW_DRIVER is not set 986CONFIG_RAW_DRIVER=y
859# CONFIG_HPET is not set 987CONFIG_MAX_RAW_DEVS=256
860# CONFIG_HANGCHECK_TIMER is not set 988CONFIG_HPET=y
989# CONFIG_HPET_RTC_IRQ is not set
990CONFIG_HPET_MMAP=y
991CONFIG_HANGCHECK_TIMER=y
861 992
862# 993#
863# TPM devices 994# TPM devices
@@ -868,59 +999,7 @@ CONFIG_DRM_RADEON=y
868# 999#
869# I2C support 1000# I2C support
870# 1001#
871CONFIG_I2C=y 1002# CONFIG_I2C is not set
872CONFIG_I2C_CHARDEV=y
873
874#
875# I2C Algorithms
876#
877CONFIG_I2C_ALGOBIT=y
878# CONFIG_I2C_ALGOPCF is not set
879# CONFIG_I2C_ALGOPCA is not set
880
881#
882# I2C Hardware Bus support
883#
884# CONFIG_I2C_ALI1535 is not set
885# CONFIG_I2C_ALI1563 is not set
886# CONFIG_I2C_ALI15X3 is not set
887# CONFIG_I2C_AMD756 is not set
888# CONFIG_I2C_AMD8111 is not set
889# CONFIG_I2C_I801 is not set
890# CONFIG_I2C_I810 is not set
891# CONFIG_I2C_PIIX4 is not set
892CONFIG_I2C_ISA=y
893# CONFIG_I2C_NFORCE2 is not set
894# CONFIG_I2C_PARPORT is not set
895# CONFIG_I2C_PARPORT_LIGHT is not set
896# CONFIG_I2C_PROSAVAGE is not set
897# CONFIG_I2C_SAVAGE4 is not set
898# CONFIG_SCx200_ACB is not set
899# CONFIG_I2C_SIS5595 is not set
900# CONFIG_I2C_SIS630 is not set
901# CONFIG_I2C_SIS96X is not set
902# CONFIG_I2C_STUB is not set
903# CONFIG_I2C_VIA is not set
904CONFIG_I2C_VIAPRO=y
905# CONFIG_I2C_VOODOO3 is not set
906# CONFIG_I2C_PCA_ISA is not set
907
908#
909# Miscellaneous I2C Chip support
910#
911# CONFIG_SENSORS_DS1337 is not set
912# CONFIG_SENSORS_DS1374 is not set
913# CONFIG_SENSORS_EEPROM is not set
914# CONFIG_SENSORS_PCF8574 is not set
915# CONFIG_SENSORS_PCA9539 is not set
916# CONFIG_SENSORS_PCF8591 is not set
917# CONFIG_SENSORS_RTC8564 is not set
918# CONFIG_SENSORS_MAX6875 is not set
919# CONFIG_RTC_X1205_I2C is not set
920# CONFIG_I2C_DEBUG_CORE is not set
921# CONFIG_I2C_DEBUG_ALGO is not set
922# CONFIG_I2C_DEBUG_BUS is not set
923# CONFIG_I2C_DEBUG_CHIP is not set
924 1003
925# 1004#
926# SPI support 1005# SPI support
@@ -931,51 +1010,12 @@ CONFIG_I2C_VIAPRO=y
931# 1010#
932# Dallas's 1-wire bus 1011# Dallas's 1-wire bus
933# 1012#
934# CONFIG_W1 is not set
935 1013
936# 1014#
937# Hardware Monitoring support 1015# Hardware Monitoring support
938# 1016#
939CONFIG_HWMON=y 1017# CONFIG_HWMON is not set
940CONFIG_HWMON_VID=y 1018# CONFIG_HWMON_VID is not set
941# CONFIG_SENSORS_ADM1021 is not set
942# CONFIG_SENSORS_ADM1025 is not set
943# CONFIG_SENSORS_ADM1026 is not set
944# CONFIG_SENSORS_ADM1031 is not set
945# CONFIG_SENSORS_ADM9240 is not set
946# CONFIG_SENSORS_ASB100 is not set
947# CONFIG_SENSORS_ATXP1 is not set
948# CONFIG_SENSORS_DS1621 is not set
949# CONFIG_SENSORS_F71805F is not set
950# CONFIG_SENSORS_FSCHER is not set
951# CONFIG_SENSORS_FSCPOS is not set
952# CONFIG_SENSORS_GL518SM is not set
953# CONFIG_SENSORS_GL520SM is not set
954CONFIG_SENSORS_IT87=y
955# CONFIG_SENSORS_LM63 is not set
956# CONFIG_SENSORS_LM75 is not set
957# CONFIG_SENSORS_LM77 is not set
958# CONFIG_SENSORS_LM78 is not set
959# CONFIG_SENSORS_LM80 is not set
960# CONFIG_SENSORS_LM83 is not set
961# CONFIG_SENSORS_LM85 is not set
962# CONFIG_SENSORS_LM87 is not set
963# CONFIG_SENSORS_LM90 is not set
964# CONFIG_SENSORS_LM92 is not set
965# CONFIG_SENSORS_MAX1619 is not set
966# CONFIG_SENSORS_PC87360 is not set
967# CONFIG_SENSORS_SIS5595 is not set
968# CONFIG_SENSORS_SMSC47M1 is not set
969# CONFIG_SENSORS_SMSC47B397 is not set
970# CONFIG_SENSORS_VIA686A is not set
971# CONFIG_SENSORS_VT8231 is not set
972# CONFIG_SENSORS_W83781D is not set
973# CONFIG_SENSORS_W83792D is not set
974# CONFIG_SENSORS_W83L785TS is not set
975# CONFIG_SENSORS_W83627HF is not set
976# CONFIG_SENSORS_W83627EHF is not set
977# CONFIG_SENSORS_HDAPS is not set
978# CONFIG_HWMON_DEBUG_CHIP is not set
979 1019
980# 1020#
981# Misc devices 1021# Misc devices
@@ -983,117 +1023,31 @@ CONFIG_SENSORS_IT87=y
983# CONFIG_IBM_ASM is not set 1023# CONFIG_IBM_ASM is not set
984 1024
985# 1025#
986# Multimedia Capabilities Port drivers
987#
988
989#
990# Multimedia devices 1026# Multimedia devices
991# 1027#
992CONFIG_VIDEO_DEV=y 1028# CONFIG_VIDEO_DEV is not set
993 1029CONFIG_VIDEO_V4L2=y
994#
995# Video For Linux
996#
997
998#
999# Video Adapters
1000#
1001# CONFIG_VIDEO_ADV_DEBUG is not set
1002# CONFIG_VIDEO_BT848 is not set
1003# CONFIG_VIDEO_BWQCAM is not set
1004# CONFIG_VIDEO_CQCAM is not set
1005# CONFIG_VIDEO_W9966 is not set
1006# CONFIG_VIDEO_CPIA is not set
1007# CONFIG_VIDEO_SAA5246A is not set
1008# CONFIG_VIDEO_SAA5249 is not set
1009# CONFIG_TUNER_3036 is not set
1010# CONFIG_VIDEO_STRADIS is not set
1011# CONFIG_VIDEO_ZORAN is not set
1012CONFIG_VIDEO_SAA7134=y
1013# CONFIG_VIDEO_SAA7134_ALSA is not set
1014# CONFIG_VIDEO_MXB is not set
1015# CONFIG_VIDEO_DPC is not set
1016# CONFIG_VIDEO_HEXIUM_ORION is not set
1017# CONFIG_VIDEO_HEXIUM_GEMINI is not set
1018# CONFIG_VIDEO_CX88 is not set
1019# CONFIG_VIDEO_EM28XX is not set
1020# CONFIG_VIDEO_OVCAMCHIP is not set
1021# CONFIG_VIDEO_AUDIO_DECODER is not set
1022# CONFIG_VIDEO_DECODER is not set
1023
1024#
1025# Radio Adapters
1026#
1027# CONFIG_RADIO_GEMTEK_PCI is not set
1028# CONFIG_RADIO_MAXIRADIO is not set
1029# CONFIG_RADIO_MAESTRO is not set
1030 1030
1031# 1031#
1032# Digital Video Broadcasting Devices 1032# Digital Video Broadcasting Devices
1033# 1033#
1034# CONFIG_DVB is not set 1034# CONFIG_DVB is not set
1035CONFIG_VIDEO_TUNER=y 1035# CONFIG_USB_DABUSB is not set
1036CONFIG_VIDEO_BUF=y
1037CONFIG_VIDEO_IR=y
1038 1036
1039# 1037#
1040# Graphics support 1038# Graphics support
1041# 1039#
1042CONFIG_FB=y 1040CONFIG_FIRMWARE_EDID=y
1043CONFIG_FB_CFB_FILLRECT=y 1041# CONFIG_FB is not set
1044CONFIG_FB_CFB_COPYAREA=y
1045CONFIG_FB_CFB_IMAGEBLIT=y
1046# CONFIG_FB_MACMODES is not set
1047CONFIG_FB_MODE_HELPERS=y
1048# CONFIG_FB_TILEBLITTING is not set
1049# CONFIG_FB_CIRRUS is not set
1050# CONFIG_FB_PM2 is not set
1051# CONFIG_FB_CYBER2000 is not set
1052# CONFIG_FB_ARC is not set
1053# CONFIG_FB_ASILIANT is not set
1054# CONFIG_FB_IMSTT is not set
1055# CONFIG_FB_VGA16 is not set
1056# CONFIG_FB_VESA is not set
1057CONFIG_VIDEO_SELECT=y
1058# CONFIG_FB_HGA is not set
1059# CONFIG_FB_S1D13XXX is not set
1060# CONFIG_FB_NVIDIA is not set
1061# CONFIG_FB_RIVA is not set
1062# CONFIG_FB_I810 is not set
1063# CONFIG_FB_INTEL is not set
1064# CONFIG_FB_MATROX is not set
1065# CONFIG_FB_RADEON_OLD is not set
1066CONFIG_FB_RADEON=y
1067CONFIG_FB_RADEON_I2C=y
1068# CONFIG_FB_RADEON_DEBUG is not set
1069# CONFIG_FB_ATY128 is not set
1070# CONFIG_FB_ATY is not set
1071# CONFIG_FB_SAVAGE is not set
1072# CONFIG_FB_SIS is not set
1073# CONFIG_FB_NEOMAGIC is not set
1074# CONFIG_FB_KYRO is not set
1075# CONFIG_FB_3DFX is not set
1076# CONFIG_FB_VOODOO1 is not set
1077# CONFIG_FB_CYBLA is not set
1078# CONFIG_FB_TRIDENT is not set
1079# CONFIG_FB_GEODE is not set
1080# CONFIG_FB_VIRTUAL is not set
1081 1042
1082# 1043#
1083# Console display driver support 1044# Console display driver support
1084# 1045#
1085CONFIG_VGA_CONSOLE=y 1046CONFIG_VGA_CONSOLE=y
1047CONFIG_VGACON_SOFT_SCROLLBACK=y
1048CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128
1049CONFIG_VIDEO_SELECT=y
1086CONFIG_DUMMY_CONSOLE=y 1050CONFIG_DUMMY_CONSOLE=y
1087CONFIG_FRAMEBUFFER_CONSOLE=y
1088# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
1089# CONFIG_FONTS is not set
1090CONFIG_FONT_8x8=y
1091CONFIG_FONT_8x16=y
1092
1093#
1094# Logo configuration
1095#
1096# CONFIG_LOGO is not set
1097# CONFIG_BACKLIGHT_LCD_SUPPORT is not set 1051# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
1098 1052
1099# 1053#
@@ -1104,97 +1058,30 @@ CONFIG_SOUND=y
1104# 1058#
1105# Advanced Linux Sound Architecture 1059# Advanced Linux Sound Architecture
1106# 1060#
1107CONFIG_SND=y 1061# CONFIG_SND is not set
1108CONFIG_SND_TIMER=y
1109CONFIG_SND_PCM=y
1110CONFIG_SND_RAWMIDI=y
1111CONFIG_SND_SEQUENCER=y
1112# CONFIG_SND_SEQ_DUMMY is not set
1113# CONFIG_SND_MIXER_OSS is not set
1114# CONFIG_SND_PCM_OSS is not set
1115# CONFIG_SND_SEQUENCER_OSS is not set
1116CONFIG_SND_RTCTIMER=y
1117CONFIG_SND_SEQ_RTCTIMER_DEFAULT=y
1118# CONFIG_SND_DYNAMIC_MINORS is not set
1119# CONFIG_SND_SUPPORT_OLD_API is not set
1120# CONFIG_SND_VERBOSE_PRINTK is not set
1121# CONFIG_SND_DEBUG is not set
1122
1123#
1124# Generic devices
1125#
1126CONFIG_SND_MPU401_UART=y
1127CONFIG_SND_AC97_CODEC=y
1128CONFIG_SND_AC97_BUS=y
1129# CONFIG_SND_DUMMY is not set
1130# CONFIG_SND_VIRMIDI is not set
1131# CONFIG_SND_MTPAV is not set
1132# CONFIG_SND_SERIAL_U16550 is not set
1133# CONFIG_SND_MPU401 is not set
1134
1135#
1136# PCI devices
1137#
1138# CONFIG_SND_AD1889 is not set
1139# CONFIG_SND_ALS4000 is not set
1140# CONFIG_SND_ALI5451 is not set
1141# CONFIG_SND_ATIIXP is not set
1142# CONFIG_SND_ATIIXP_MODEM is not set
1143# CONFIG_SND_AU8810 is not set
1144# CONFIG_SND_AU8820 is not set
1145# CONFIG_SND_AU8830 is not set
1146# CONFIG_SND_AZT3328 is not set
1147# CONFIG_SND_BT87X is not set
1148# CONFIG_SND_CA0106 is not set
1149# CONFIG_SND_CMIPCI is not set
1150# CONFIG_SND_CS4281 is not set
1151# CONFIG_SND_CS46XX is not set
1152# CONFIG_SND_CS5535AUDIO is not set
1153# CONFIG_SND_EMU10K1 is not set
1154# CONFIG_SND_EMU10K1X is not set
1155# CONFIG_SND_ENS1370 is not set
1156# CONFIG_SND_ENS1371 is not set
1157# CONFIG_SND_ES1938 is not set
1158# CONFIG_SND_ES1968 is not set
1159# CONFIG_SND_FM801 is not set
1160# CONFIG_SND_HDA_INTEL is not set
1161# CONFIG_SND_HDSP is not set
1162# CONFIG_SND_HDSPM is not set
1163# CONFIG_SND_ICE1712 is not set
1164# CONFIG_SND_ICE1724 is not set
1165# CONFIG_SND_INTEL8X0 is not set
1166# CONFIG_SND_INTEL8X0M is not set
1167# CONFIG_SND_KORG1212 is not set
1168# CONFIG_SND_MAESTRO3 is not set
1169# CONFIG_SND_MIXART is not set
1170# CONFIG_SND_NM256 is not set
1171# CONFIG_SND_PCXHR is not set
1172# CONFIG_SND_RME32 is not set
1173# CONFIG_SND_RME96 is not set
1174# CONFIG_SND_RME9652 is not set
1175# CONFIG_SND_SONICVIBES is not set
1176# CONFIG_SND_TRIDENT is not set
1177CONFIG_SND_VIA82XX=y
1178# CONFIG_SND_VIA82XX_MODEM is not set
1179# CONFIG_SND_VX222 is not set
1180# CONFIG_SND_YMFPCI is not set
1181
1182#
1183# USB devices
1184#
1185# CONFIG_SND_USB_AUDIO is not set
1186# CONFIG_SND_USB_USX2Y is not set
1187 1062
1188# 1063#
1189# Open Sound System 1064# Open Sound System
1190# 1065#
1191# CONFIG_SOUND_PRIME is not set 1066CONFIG_SOUND_PRIME=y
1067CONFIG_OSS_OBSOLETE_DRIVER=y
1068# CONFIG_SOUND_BT878 is not set
1069# CONFIG_SOUND_EMU10K1 is not set
1070# CONFIG_SOUND_FUSION is not set
1071# CONFIG_SOUND_ES1371 is not set
1072CONFIG_SOUND_ICH=y
1073# CONFIG_SOUND_TRIDENT is not set
1074# CONFIG_SOUND_MSNDCLAS is not set
1075# CONFIG_SOUND_MSNDPIN is not set
1076# CONFIG_SOUND_VIA82CXXX is not set
1077# CONFIG_SOUND_OSS is not set
1192 1078
1193# 1079#
1194# USB support 1080# USB support
1195# 1081#
1196CONFIG_USB_ARCH_HAS_HCD=y 1082CONFIG_USB_ARCH_HAS_HCD=y
1197CONFIG_USB_ARCH_HAS_OHCI=y 1083CONFIG_USB_ARCH_HAS_OHCI=y
1084CONFIG_USB_ARCH_HAS_EHCI=y
1198CONFIG_USB=y 1085CONFIG_USB=y
1199# CONFIG_USB_DEBUG is not set 1086# CONFIG_USB_DEBUG is not set
1200 1087
@@ -1213,17 +1100,19 @@ CONFIG_USB_DEVICEFS=y
1213CONFIG_USB_EHCI_HCD=y 1100CONFIG_USB_EHCI_HCD=y
1214# CONFIG_USB_EHCI_SPLIT_ISO is not set 1101# CONFIG_USB_EHCI_SPLIT_ISO is not set
1215# CONFIG_USB_EHCI_ROOT_HUB_TT is not set 1102# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
1103# CONFIG_USB_EHCI_TT_NEWSCHED is not set
1216# CONFIG_USB_ISP116X_HCD is not set 1104# CONFIG_USB_ISP116X_HCD is not set
1217# CONFIG_USB_OHCI_HCD is not set 1105CONFIG_USB_OHCI_HCD=y
1106# CONFIG_USB_OHCI_BIG_ENDIAN is not set
1107CONFIG_USB_OHCI_LITTLE_ENDIAN=y
1218CONFIG_USB_UHCI_HCD=y 1108CONFIG_USB_UHCI_HCD=y
1219# CONFIG_USB_SL811_HCD is not set 1109# CONFIG_USB_SL811_HCD is not set
1220 1110
1221# 1111#
1222# USB Device Class drivers 1112# USB Device Class drivers
1223# 1113#
1224# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
1225# CONFIG_USB_ACM is not set 1114# CONFIG_USB_ACM is not set
1226# CONFIG_USB_PRINTER is not set 1115CONFIG_USB_PRINTER=y
1227 1116
1228# 1117#
1229# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' 1118# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
@@ -1248,21 +1137,17 @@ CONFIG_USB_STORAGE=y
1248# 1137#
1249# USB Input Devices 1138# USB Input Devices
1250# 1139#
1251# CONFIG_USB_HID is not set 1140CONFIG_USB_HID=y
1252 1141CONFIG_USB_HIDINPUT=y
1253# 1142# CONFIG_USB_HIDINPUT_POWERBOOK is not set
1254# USB HID Boot Protocol drivers 1143# CONFIG_HID_FF is not set
1255# 1144# CONFIG_USB_HIDDEV is not set
1256# CONFIG_USB_KBD is not set
1257# CONFIG_USB_MOUSE is not set
1258# CONFIG_USB_AIPTEK is not set 1145# CONFIG_USB_AIPTEK is not set
1259# CONFIG_USB_WACOM is not set 1146# CONFIG_USB_WACOM is not set
1260# CONFIG_USB_ACECAD is not set 1147# CONFIG_USB_ACECAD is not set
1261# CONFIG_USB_KBTAB is not set 1148# CONFIG_USB_KBTAB is not set
1262# CONFIG_USB_POWERMATE is not set 1149# CONFIG_USB_POWERMATE is not set
1263# CONFIG_USB_MTOUCH is not set 1150# CONFIG_USB_TOUCHSCREEN is not set
1264# CONFIG_USB_ITMTOUCH is not set
1265# CONFIG_USB_EGALAX is not set
1266# CONFIG_USB_YEALINK is not set 1151# CONFIG_USB_YEALINK is not set
1267# CONFIG_USB_XPAD is not set 1152# CONFIG_USB_XPAD is not set
1268# CONFIG_USB_ATI_REMOTE is not set 1153# CONFIG_USB_ATI_REMOTE is not set
@@ -1277,21 +1162,6 @@ CONFIG_USB_STORAGE=y
1277# CONFIG_USB_MICROTEK is not set 1162# CONFIG_USB_MICROTEK is not set
1278 1163
1279# 1164#
1280# USB Multimedia devices
1281#
1282# CONFIG_USB_DABUSB is not set
1283# CONFIG_USB_VICAM is not set
1284# CONFIG_USB_DSBR is not set
1285# CONFIG_USB_ET61X251 is not set
1286# CONFIG_USB_IBMCAM is not set
1287# CONFIG_USB_KONICAWC is not set
1288# CONFIG_USB_OV511 is not set
1289# CONFIG_USB_SE401 is not set
1290# CONFIG_USB_SN9C102 is not set
1291# CONFIG_USB_STV680 is not set
1292# CONFIG_USB_PWC is not set
1293
1294#
1295# USB Network Adapters 1165# USB Network Adapters
1296# 1166#
1297# CONFIG_USB_CATC is not set 1167# CONFIG_USB_CATC is not set
@@ -1299,12 +1169,11 @@ CONFIG_USB_STORAGE=y
1299# CONFIG_USB_PEGASUS is not set 1169# CONFIG_USB_PEGASUS is not set
1300# CONFIG_USB_RTL8150 is not set 1170# CONFIG_USB_RTL8150 is not set
1301# CONFIG_USB_USBNET is not set 1171# CONFIG_USB_USBNET is not set
1302# CONFIG_USB_MON is not set 1172CONFIG_USB_MON=y
1303 1173
1304# 1174#
1305# USB port drivers 1175# USB port drivers
1306# 1176#
1307# CONFIG_USB_USS720 is not set
1308 1177
1309# 1178#
1310# USB Serial Converter support 1179# USB Serial Converter support
@@ -1321,10 +1190,12 @@ CONFIG_USB_STORAGE=y
1321# CONFIG_USB_LEGOTOWER is not set 1190# CONFIG_USB_LEGOTOWER is not set
1322# CONFIG_USB_LCD is not set 1191# CONFIG_USB_LCD is not set
1323# CONFIG_USB_LED is not set 1192# CONFIG_USB_LED is not set
1193# CONFIG_USB_CYPRESS_CY7C63 is not set
1324# CONFIG_USB_CYTHERM is not set 1194# CONFIG_USB_CYTHERM is not set
1325# CONFIG_USB_PHIDGETKIT is not set 1195# CONFIG_USB_PHIDGETKIT is not set
1326# CONFIG_USB_PHIDGETSERVO is not set 1196# CONFIG_USB_PHIDGETSERVO is not set
1327# CONFIG_USB_IDMOUSE is not set 1197# CONFIG_USB_IDMOUSE is not set
1198# CONFIG_USB_APPLEDISPLAY is not set
1328# CONFIG_USB_SISUSBVGA is not set 1199# CONFIG_USB_SISUSBVGA is not set
1329# CONFIG_USB_LD is not set 1200# CONFIG_USB_LD is not set
1330# CONFIG_USB_TEST is not set 1201# CONFIG_USB_TEST is not set
@@ -1344,56 +1215,96 @@ CONFIG_USB_STORAGE=y
1344# CONFIG_MMC is not set 1215# CONFIG_MMC is not set
1345 1216
1346# 1217#
1218# LED devices
1219#
1220# CONFIG_NEW_LEDS is not set
1221
1222#
1223# LED drivers
1224#
1225
1226#
1227# LED Triggers
1228#
1229
1230#
1347# InfiniBand support 1231# InfiniBand support
1348# 1232#
1349# CONFIG_INFINIBAND is not set 1233# CONFIG_INFINIBAND is not set
1350 1234
1351# 1235#
1352# SN Devices 1236# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
1237#
1238# CONFIG_EDAC is not set
1239
1240#
1241# Real Time Clock
1353# 1242#
1243# CONFIG_RTC_CLASS is not set
1354 1244
1355# 1245#
1356# EDAC - error detection and reporting (RAS) 1246# DMA Engine support
1247#
1248# CONFIG_DMA_ENGINE is not set
1249
1250#
1251# DMA Clients
1252#
1253
1254#
1255# DMA Devices
1357# 1256#
1358# CONFIG_EDAC is not set
1359 1257
1360# 1258#
1361# File systems 1259# File systems
1362# 1260#
1363CONFIG_EXT2_FS=y 1261CONFIG_EXT2_FS=y
1364# CONFIG_EXT2_FS_XATTR is not set 1262CONFIG_EXT2_FS_XATTR=y
1263CONFIG_EXT2_FS_POSIX_ACL=y
1264# CONFIG_EXT2_FS_SECURITY is not set
1365# CONFIG_EXT2_FS_XIP is not set 1265# CONFIG_EXT2_FS_XIP is not set
1366# CONFIG_EXT3_FS is not set 1266CONFIG_EXT3_FS=y
1367# CONFIG_REISERFS_FS is not set 1267CONFIG_EXT3_FS_XATTR=y
1268CONFIG_EXT3_FS_POSIX_ACL=y
1269# CONFIG_EXT3_FS_SECURITY is not set
1270CONFIG_JBD=y
1271# CONFIG_JBD_DEBUG is not set
1272CONFIG_FS_MBCACHE=y
1273CONFIG_REISERFS_FS=y
1274# CONFIG_REISERFS_CHECK is not set
1275# CONFIG_REISERFS_PROC_INFO is not set
1276CONFIG_REISERFS_FS_XATTR=y
1277CONFIG_REISERFS_FS_POSIX_ACL=y
1278# CONFIG_REISERFS_FS_SECURITY is not set
1368# CONFIG_JFS_FS is not set 1279# CONFIG_JFS_FS is not set
1369# CONFIG_FS_POSIX_ACL is not set 1280CONFIG_FS_POSIX_ACL=y
1370# CONFIG_XFS_FS is not set 1281# CONFIG_XFS_FS is not set
1371# CONFIG_OCFS2_FS is not set 1282# CONFIG_OCFS2_FS is not set
1372# CONFIG_MINIX_FS is not set 1283# CONFIG_MINIX_FS is not set
1373# CONFIG_ROMFS_FS is not set 1284# CONFIG_ROMFS_FS is not set
1374# CONFIG_INOTIFY is not set 1285CONFIG_INOTIFY=y
1286CONFIG_INOTIFY_USER=y
1375# CONFIG_QUOTA is not set 1287# CONFIG_QUOTA is not set
1376CONFIG_DNOTIFY=y 1288CONFIG_DNOTIFY=y
1377# CONFIG_AUTOFS_FS is not set 1289# CONFIG_AUTOFS_FS is not set
1378# CONFIG_AUTOFS4_FS is not set 1290CONFIG_AUTOFS4_FS=y
1379# CONFIG_FUSE_FS is not set 1291# CONFIG_FUSE_FS is not set
1380 1292
1381# 1293#
1382# CD-ROM/DVD Filesystems 1294# CD-ROM/DVD Filesystems
1383# 1295#
1384CONFIG_ISO9660_FS=y 1296CONFIG_ISO9660_FS=y
1385CONFIG_JOLIET=y 1297# CONFIG_JOLIET is not set
1386CONFIG_ZISOFS=y 1298# CONFIG_ZISOFS is not set
1387CONFIG_ZISOFS_FS=y
1388# CONFIG_UDF_FS is not set 1299# CONFIG_UDF_FS is not set
1389 1300
1390# 1301#
1391# DOS/FAT/NT Filesystems 1302# DOS/FAT/NT Filesystems
1392# 1303#
1393CONFIG_FAT_FS=y 1304CONFIG_FAT_FS=y
1394# CONFIG_MSDOS_FS is not set 1305CONFIG_MSDOS_FS=y
1395CONFIG_VFAT_FS=y 1306CONFIG_VFAT_FS=y
1396CONFIG_FAT_DEFAULT_CODEPAGE=850 1307CONFIG_FAT_DEFAULT_CODEPAGE=437
1397CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" 1308CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
1398# CONFIG_NTFS_FS is not set 1309# CONFIG_NTFS_FS is not set
1399 1310
@@ -1404,10 +1315,9 @@ CONFIG_PROC_FS=y
1404CONFIG_PROC_KCORE=y 1315CONFIG_PROC_KCORE=y
1405CONFIG_SYSFS=y 1316CONFIG_SYSFS=y
1406CONFIG_TMPFS=y 1317CONFIG_TMPFS=y
1407# CONFIG_HUGETLBFS is not set 1318CONFIG_HUGETLBFS=y
1408# CONFIG_HUGETLB_PAGE is not set 1319CONFIG_HUGETLB_PAGE=y
1409CONFIG_RAMFS=y 1320CONFIG_RAMFS=y
1410# CONFIG_RELAYFS_FS is not set
1411# CONFIG_CONFIGFS_FS is not set 1321# CONFIG_CONFIGFS_FS is not set
1412 1322
1413# 1323#
@@ -1430,13 +1340,26 @@ CONFIG_RAMFS=y
1430# 1340#
1431# Network File Systems 1341# Network File Systems
1432# 1342#
1433# CONFIG_NFS_FS is not set 1343CONFIG_NFS_FS=y
1434# CONFIG_NFSD is not set 1344CONFIG_NFS_V3=y
1345# CONFIG_NFS_V3_ACL is not set
1346# CONFIG_NFS_V4 is not set
1347# CONFIG_NFS_DIRECTIO is not set
1348CONFIG_NFSD=y
1349CONFIG_NFSD_V3=y
1350# CONFIG_NFSD_V3_ACL is not set
1351# CONFIG_NFSD_V4 is not set
1352CONFIG_NFSD_TCP=y
1353CONFIG_ROOT_NFS=y
1354CONFIG_LOCKD=y
1355CONFIG_LOCKD_V4=y
1356CONFIG_EXPORTFS=y
1357CONFIG_NFS_COMMON=y
1358CONFIG_SUNRPC=y
1359# CONFIG_RPCSEC_GSS_KRB5 is not set
1360# CONFIG_RPCSEC_GSS_SPKM3 is not set
1435# CONFIG_SMB_FS is not set 1361# CONFIG_SMB_FS is not set
1436CONFIG_CIFS=y 1362# CONFIG_CIFS is not set
1437# CONFIG_CIFS_STATS is not set
1438# CONFIG_CIFS_XATTR is not set
1439# CONFIG_CIFS_EXPERIMENTAL is not set
1440# CONFIG_NCP_FS is not set 1363# CONFIG_NCP_FS is not set
1441# CONFIG_CODA_FS is not set 1364# CONFIG_CODA_FS is not set
1442# CONFIG_AFS_FS is not set 1365# CONFIG_AFS_FS is not set
@@ -1445,33 +1368,18 @@ CONFIG_CIFS=y
1445# 1368#
1446# Partition Types 1369# Partition Types
1447# 1370#
1448CONFIG_PARTITION_ADVANCED=y 1371# CONFIG_PARTITION_ADVANCED is not set
1449# CONFIG_ACORN_PARTITION is not set
1450# CONFIG_OSF_PARTITION is not set
1451# CONFIG_AMIGA_PARTITION is not set
1452# CONFIG_ATARI_PARTITION is not set
1453# CONFIG_MAC_PARTITION is not set
1454CONFIG_MSDOS_PARTITION=y 1372CONFIG_MSDOS_PARTITION=y
1455# CONFIG_BSD_DISKLABEL is not set
1456# CONFIG_MINIX_SUBPARTITION is not set
1457# CONFIG_SOLARIS_X86_PARTITION is not set
1458# CONFIG_UNIXWARE_DISKLABEL is not set
1459# CONFIG_LDM_PARTITION is not set
1460# CONFIG_SGI_PARTITION is not set
1461# CONFIG_ULTRIX_PARTITION is not set
1462# CONFIG_SUN_PARTITION is not set
1463# CONFIG_KARMA_PARTITION is not set
1464# CONFIG_EFI_PARTITION is not set
1465 1373
1466# 1374#
1467# Native Language Support 1375# Native Language Support
1468# 1376#
1469CONFIG_NLS=y 1377CONFIG_NLS=y
1470CONFIG_NLS_DEFAULT="iso8859-15" 1378CONFIG_NLS_DEFAULT="iso8859-1"
1471# CONFIG_NLS_CODEPAGE_437 is not set 1379CONFIG_NLS_CODEPAGE_437=y
1472# CONFIG_NLS_CODEPAGE_737 is not set 1380# CONFIG_NLS_CODEPAGE_737 is not set
1473# CONFIG_NLS_CODEPAGE_775 is not set 1381# CONFIG_NLS_CODEPAGE_775 is not set
1474CONFIG_NLS_CODEPAGE_850=y 1382# CONFIG_NLS_CODEPAGE_850 is not set
1475# CONFIG_NLS_CODEPAGE_852 is not set 1383# CONFIG_NLS_CODEPAGE_852 is not set
1476# CONFIG_NLS_CODEPAGE_855 is not set 1384# CONFIG_NLS_CODEPAGE_855 is not set
1477# CONFIG_NLS_CODEPAGE_857 is not set 1385# CONFIG_NLS_CODEPAGE_857 is not set
@@ -1491,7 +1399,7 @@ CONFIG_NLS_CODEPAGE_850=y
1491# CONFIG_NLS_ISO8859_8 is not set 1399# CONFIG_NLS_ISO8859_8 is not set
1492# CONFIG_NLS_CODEPAGE_1250 is not set 1400# CONFIG_NLS_CODEPAGE_1250 is not set
1493# CONFIG_NLS_CODEPAGE_1251 is not set 1401# CONFIG_NLS_CODEPAGE_1251 is not set
1494# CONFIG_NLS_ASCII is not set 1402CONFIG_NLS_ASCII=y
1495CONFIG_NLS_ISO8859_1=y 1403CONFIG_NLS_ISO8859_1=y
1496# CONFIG_NLS_ISO8859_2 is not set 1404# CONFIG_NLS_ISO8859_2 is not set
1497# CONFIG_NLS_ISO8859_3 is not set 1405# CONFIG_NLS_ISO8859_3 is not set
@@ -1510,20 +1418,50 @@ CONFIG_NLS_UTF8=y
1510# 1418#
1511# Instrumentation Support 1419# Instrumentation Support
1512# 1420#
1513# CONFIG_PROFILING is not set 1421CONFIG_PROFILING=y
1514# CONFIG_KPROBES is not set 1422CONFIG_OPROFILE=y
1423CONFIG_KPROBES=y
1515 1424
1516# 1425#
1517# Kernel hacking 1426# Kernel hacking
1518# 1427#
1428CONFIG_TRACE_IRQFLAGS_SUPPORT=y
1519# CONFIG_PRINTK_TIME is not set 1429# CONFIG_PRINTK_TIME is not set
1520CONFIG_MAGIC_SYSRQ=y 1430CONFIG_MAGIC_SYSRQ=y
1521# CONFIG_DEBUG_KERNEL is not set 1431CONFIG_UNUSED_SYMBOLS=y
1522CONFIG_LOG_BUF_SHIFT=14 1432CONFIG_DEBUG_KERNEL=y
1433CONFIG_LOG_BUF_SHIFT=18
1434CONFIG_DETECT_SOFTLOCKUP=y
1435# CONFIG_SCHEDSTATS is not set
1436# CONFIG_DEBUG_SLAB is not set
1437# CONFIG_DEBUG_RT_MUTEXES is not set
1438# CONFIG_RT_MUTEX_TESTER is not set
1439# CONFIG_DEBUG_SPINLOCK is not set
1440# CONFIG_DEBUG_MUTEXES is not set
1441# CONFIG_DEBUG_RWSEMS is not set
1442# CONFIG_DEBUG_LOCK_ALLOC is not set
1443# CONFIG_PROVE_LOCKING is not set
1444# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
1445# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
1446# CONFIG_DEBUG_KOBJECT is not set
1447# CONFIG_DEBUG_HIGHMEM is not set
1523CONFIG_DEBUG_BUGVERBOSE=y 1448CONFIG_DEBUG_BUGVERBOSE=y
1449# CONFIG_DEBUG_INFO is not set
1450# CONFIG_DEBUG_FS is not set
1451# CONFIG_DEBUG_VM is not set
1452# CONFIG_FRAME_POINTER is not set
1453CONFIG_UNWIND_INFO=y
1454CONFIG_STACK_UNWIND=y
1455# CONFIG_FORCED_INLINING is not set
1456# CONFIG_RCU_TORTURE_TEST is not set
1524CONFIG_EARLY_PRINTK=y 1457CONFIG_EARLY_PRINTK=y
1458CONFIG_DEBUG_STACKOVERFLOW=y
1459# CONFIG_DEBUG_STACK_USAGE is not set
1460# CONFIG_DEBUG_RODATA is not set
1461# CONFIG_4KSTACKS is not set
1525CONFIG_X86_FIND_SMP_CONFIG=y 1462CONFIG_X86_FIND_SMP_CONFIG=y
1526CONFIG_X86_MPPARSE=y 1463CONFIG_X86_MPPARSE=y
1464CONFIG_DOUBLEFAULT=y
1527 1465
1528# 1466#
1529# Security options 1467# Security options
@@ -1537,10 +1475,6 @@ CONFIG_X86_MPPARSE=y
1537# CONFIG_CRYPTO is not set 1475# CONFIG_CRYPTO is not set
1538 1476
1539# 1477#
1540# Hardware crypto devices
1541#
1542
1543#
1544# Library routines 1478# Library routines
1545# 1479#
1546# CONFIG_CRC_CCITT is not set 1480# CONFIG_CRC_CCITT is not set
@@ -1548,7 +1482,12 @@ CONFIG_X86_MPPARSE=y
1548CONFIG_CRC32=y 1482CONFIG_CRC32=y
1549# CONFIG_LIBCRC32C is not set 1483# CONFIG_LIBCRC32C is not set
1550CONFIG_ZLIB_INFLATE=y 1484CONFIG_ZLIB_INFLATE=y
1485CONFIG_PLIST=y
1551CONFIG_GENERIC_HARDIRQS=y 1486CONFIG_GENERIC_HARDIRQS=y
1552CONFIG_GENERIC_IRQ_PROBE=y 1487CONFIG_GENERIC_IRQ_PROBE=y
1488CONFIG_GENERIC_PENDING_IRQ=y
1489CONFIG_X86_SMP=y
1490CONFIG_X86_HT=y
1553CONFIG_X86_BIOS_REBOOT=y 1491CONFIG_X86_BIOS_REBOOT=y
1492CONFIG_X86_TRAMPOLINE=y
1554CONFIG_KTIME_SCALAR=y 1493CONFIG_KTIME_SCALAR=y
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 5427a842e841..1a884b6e6e5c 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -4,7 +4,7 @@
4 4
5extra-y := head.o init_task.o vmlinux.lds 5extra-y := head.o init_task.o vmlinux.lds
6 6
7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ 7obj-y := process.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
9 pci-dma.o i386_ksyms.o i387.o bootflag.o \ 9 pci-dma.o i386_ksyms.o i387.o bootflag.o \
10 quirks.o i8237.o topology.o alternative.o i8253.o tsc.o 10 quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
@@ -81,4 +81,5 @@ $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
81 $(call if_changed,syscall) 81 $(call if_changed,syscall)
82 82
83k8-y += ../../x86_64/kernel/k8.o 83k8-y += ../../x86_64/kernel/k8.o
84stacktrace-y += ../../x86_64/kernel/stacktrace.o
84 85
diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile
index 7e9ac99354f4..7f7be01f44e6 100644
--- a/arch/i386/kernel/acpi/Makefile
+++ b/arch/i386/kernel/acpi/Makefile
@@ -1,5 +1,7 @@
1obj-$(CONFIG_ACPI) += boot.o 1obj-$(CONFIG_ACPI) += boot.o
2ifneq ($(CONFIG_PCI),)
2obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o 3obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o
4endif
3obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o 5obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
4 6
5ifneq ($(CONFIG_ACPI_PROCESSOR),) 7ifneq ($(CONFIG_ACPI_PROCESSOR),)
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index ee003bc0e8b1..1aaea6ab8c46 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -26,9 +26,12 @@
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/acpi.h> 27#include <linux/acpi.h>
28#include <linux/efi.h> 28#include <linux/efi.h>
29#include <linux/cpumask.h>
29#include <linux/module.h> 30#include <linux/module.h>
30#include <linux/dmi.h> 31#include <linux/dmi.h>
31#include <linux/irq.h> 32#include <linux/irq.h>
33#include <linux/bootmem.h>
34#include <linux/ioport.h>
32 35
33#include <asm/pgtable.h> 36#include <asm/pgtable.h>
34#include <asm/io_apic.h> 37#include <asm/io_apic.h>
@@ -36,11 +39,17 @@
36#include <asm/io.h> 39#include <asm/io.h>
37#include <asm/mpspec.h> 40#include <asm/mpspec.h>
38 41
39#ifdef CONFIG_X86_64 42static int __initdata acpi_force = 0;
40 43
41extern void __init clustered_apic_check(void); 44#ifdef CONFIG_ACPI
45int acpi_disabled = 0;
46#else
47int acpi_disabled = 1;
48#endif
49EXPORT_SYMBOL(acpi_disabled);
50
51#ifdef CONFIG_X86_64
42 52
43extern int gsi_irq_sharing(int gsi);
44#include <asm/proto.h> 53#include <asm/proto.h>
45 54
46static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } 55static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
@@ -506,16 +515,76 @@ EXPORT_SYMBOL(acpi_register_gsi);
506#ifdef CONFIG_ACPI_HOTPLUG_CPU 515#ifdef CONFIG_ACPI_HOTPLUG_CPU
507int acpi_map_lsapic(acpi_handle handle, int *pcpu) 516int acpi_map_lsapic(acpi_handle handle, int *pcpu)
508{ 517{
509 /* TBD */ 518 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
510 return -EINVAL; 519 union acpi_object *obj;
520 struct acpi_table_lapic *lapic;
521 cpumask_t tmp_map, new_map;
522 u8 physid;
523 int cpu;
524
525 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
526 return -EINVAL;
527
528 if (!buffer.length || !buffer.pointer)
529 return -EINVAL;
530
531 obj = buffer.pointer;
532 if (obj->type != ACPI_TYPE_BUFFER ||
533 obj->buffer.length < sizeof(*lapic)) {
534 kfree(buffer.pointer);
535 return -EINVAL;
536 }
537
538 lapic = (struct acpi_table_lapic *)obj->buffer.pointer;
539
540 if ((lapic->header.type != ACPI_MADT_LAPIC) ||
541 (!lapic->flags.enabled)) {
542 kfree(buffer.pointer);
543 return -EINVAL;
544 }
545
546 physid = lapic->id;
547
548 kfree(buffer.pointer);
549 buffer.length = ACPI_ALLOCATE_BUFFER;
550 buffer.pointer = NULL;
551
552 tmp_map = cpu_present_map;
553 mp_register_lapic(physid, lapic->flags.enabled);
554
555 /*
556 * If mp_register_lapic successfully generates a new logical cpu
557 * number, then the following will get us exactly what was mapped
558 */
559 cpus_andnot(new_map, cpu_present_map, tmp_map);
560 if (cpus_empty(new_map)) {
561 printk ("Unable to map lapic to logical cpu number\n");
562 return -EINVAL;
563 }
564
565 cpu = first_cpu(new_map);
566
567 *pcpu = cpu;
568 return 0;
511} 569}
512 570
513EXPORT_SYMBOL(acpi_map_lsapic); 571EXPORT_SYMBOL(acpi_map_lsapic);
514 572
515int acpi_unmap_lsapic(int cpu) 573int acpi_unmap_lsapic(int cpu)
516{ 574{
517 /* TBD */ 575 int i;
518 return -EINVAL; 576
577 for_each_possible_cpu(i) {
578 if (x86_acpiid_to_apicid[i] == x86_cpu_to_apicid[cpu]) {
579 x86_acpiid_to_apicid[i] = -1;
580 break;
581 }
582 }
583 x86_cpu_to_apicid[cpu] = -1;
584 cpu_clear(cpu, cpu_present_map);
585 num_processors--;
586
587 return (0);
519} 588}
520 589
521EXPORT_SYMBOL(acpi_unmap_lsapic); 590EXPORT_SYMBOL(acpi_unmap_lsapic);
@@ -579,6 +648,8 @@ static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
579static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) 648static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
580{ 649{
581 struct acpi_table_hpet *hpet_tbl; 650 struct acpi_table_hpet *hpet_tbl;
651 struct resource *hpet_res;
652 resource_size_t res_start;
582 653
583 if (!phys || !size) 654 if (!phys || !size)
584 return -EINVAL; 655 return -EINVAL;
@@ -594,12 +665,26 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
594 "memory.\n"); 665 "memory.\n");
595 return -1; 666 return -1;
596 } 667 }
668
669#define HPET_RESOURCE_NAME_SIZE 9
670 hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
671 if (hpet_res) {
672 memset(hpet_res, 0, sizeof(*hpet_res));
673 hpet_res->name = (void *)&hpet_res[1];
674 hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
675 snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE,
676 "HPET %u", hpet_tbl->number);
677 hpet_res->end = (1 * 1024) - 1;
678 }
679
597#ifdef CONFIG_X86_64 680#ifdef CONFIG_X86_64
598 vxtime.hpet_address = hpet_tbl->addr.addrl | 681 vxtime.hpet_address = hpet_tbl->addr.addrl |
599 ((long)hpet_tbl->addr.addrh << 32); 682 ((long)hpet_tbl->addr.addrh << 32);
600 683
601 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", 684 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
602 hpet_tbl->id, vxtime.hpet_address); 685 hpet_tbl->id, vxtime.hpet_address);
686
687 res_start = vxtime.hpet_address;
603#else /* X86 */ 688#else /* X86 */
604 { 689 {
605 extern unsigned long hpet_address; 690 extern unsigned long hpet_address;
@@ -607,9 +692,17 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
607 hpet_address = hpet_tbl->addr.addrl; 692 hpet_address = hpet_tbl->addr.addrl;
608 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", 693 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
609 hpet_tbl->id, hpet_address); 694 hpet_tbl->id, hpet_address);
695
696 res_start = hpet_address;
610 } 697 }
611#endif /* X86 */ 698#endif /* X86 */
612 699
700 if (hpet_res) {
701 hpet_res->start = res_start;
702 hpet_res->end += res_start;
703 insert_resource(&iomem_resource, hpet_res);
704 }
705
613 return 0; 706 return 0;
614} 707}
615#else 708#else
@@ -860,8 +953,6 @@ static void __init acpi_process_madt(void)
860 return; 953 return;
861} 954}
862 955
863extern int acpi_force;
864
865#ifdef __i386__ 956#ifdef __i386__
866 957
867static int __init disable_acpi_irq(struct dmi_system_id *d) 958static int __init disable_acpi_irq(struct dmi_system_id *d)
@@ -1163,3 +1254,75 @@ int __init acpi_boot_init(void)
1163 1254
1164 return 0; 1255 return 0;
1165} 1256}
1257
1258static int __init parse_acpi(char *arg)
1259{
1260 if (!arg)
1261 return -EINVAL;
1262
1263 /* "acpi=off" disables both ACPI table parsing and interpreter */
1264 if (strcmp(arg, "off") == 0) {
1265 disable_acpi();
1266 }
1267 /* acpi=force to over-ride black-list */
1268 else if (strcmp(arg, "force") == 0) {
1269 acpi_force = 1;
1270 acpi_ht = 1;
1271 acpi_disabled = 0;
1272 }
1273 /* acpi=strict disables out-of-spec workarounds */
1274 else if (strcmp(arg, "strict") == 0) {
1275 acpi_strict = 1;
1276 }
1277 /* Limit ACPI just to boot-time to enable HT */
1278 else if (strcmp(arg, "ht") == 0) {
1279 if (!acpi_force)
1280 disable_acpi();
1281 acpi_ht = 1;
1282 }
1283 /* "acpi=noirq" disables ACPI interrupt routing */
1284 else if (strcmp(arg, "noirq") == 0) {
1285 acpi_noirq_set();
1286 } else {
1287 /* Core will printk when we return error. */
1288 return -EINVAL;
1289 }
1290 return 0;
1291}
1292early_param("acpi", parse_acpi);
1293
1294/* FIXME: Using pci= for an ACPI parameter is a travesty. */
1295static int __init parse_pci(char *arg)
1296{
1297 if (arg && strcmp(arg, "noacpi") == 0)
1298 acpi_disable_pci();
1299 return 0;
1300}
1301early_param("pci", parse_pci);
1302
1303#ifdef CONFIG_X86_IO_APIC
1304static int __init parse_acpi_skip_timer_override(char *arg)
1305{
1306 acpi_skip_timer_override = 1;
1307 return 0;
1308}
1309early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override);
1310#endif /* CONFIG_X86_IO_APIC */
1311
1312static int __init setup_acpi_sci(char *s)
1313{
1314 if (!s)
1315 return -EINVAL;
1316 if (!strcmp(s, "edge"))
1317 acpi_sci_flags.trigger = 1;
1318 else if (!strcmp(s, "level"))
1319 acpi_sci_flags.trigger = 3;
1320 else if (!strcmp(s, "high"))
1321 acpi_sci_flags.polarity = 1;
1322 else if (!strcmp(s, "low"))
1323 acpi_sci_flags.polarity = 3;
1324 else
1325 return -EINVAL;
1326 return 0;
1327}
1328early_param("acpi_sci", setup_acpi_sci);
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c
index 1649a175a206..fe799b11ac0a 100644
--- a/arch/i386/kernel/acpi/earlyquirk.c
+++ b/arch/i386/kernel/acpi/earlyquirk.c
@@ -48,7 +48,11 @@ void __init check_acpi_pci(void)
48 int num, slot, func; 48 int num, slot, func;
49 49
50 /* Assume the machine supports type 1. If not it will 50 /* Assume the machine supports type 1. If not it will
51 always read ffffffff and should not have any side effect. */ 51 always read ffffffff and should not have any side effect.
52 Actually a few buggy systems can machine check. Allow the user
53 to disable it by command line option at least -AK */
54 if (!early_pci_allowed())
55 return;
52 56
53 /* Poor man's PCI discovery */ 57 /* Poor man's PCI discovery */
54 for (num = 0; num < 32; num++) { 58 for (num = 0; num < 32; num++) {
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 8c844d07862f..90faae5c5d30 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -52,7 +52,18 @@ static cpumask_t timer_bcast_ipi;
52/* 52/*
53 * Knob to control our willingness to enable the local APIC. 53 * Knob to control our willingness to enable the local APIC.
54 */ 54 */
55int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ 55static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
56
57static inline void lapic_disable(void)
58{
59 enable_local_apic = -1;
60 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
61}
62
63static inline void lapic_enable(void)
64{
65 enable_local_apic = 1;
66}
56 67
57/* 68/*
58 * Debug level 69 * Debug level
@@ -586,8 +597,7 @@ void __devinit setup_local_APIC(void)
586 printk("No ESR for 82489DX.\n"); 597 printk("No ESR for 82489DX.\n");
587 } 598 }
588 599
589 if (nmi_watchdog == NMI_LOCAL_APIC) 600 setup_apic_nmi_watchdog(NULL);
590 setup_apic_nmi_watchdog();
591 apic_pm_activate(); 601 apic_pm_activate();
592} 602}
593 603
@@ -1373,3 +1383,18 @@ int __init APIC_init_uniprocessor (void)
1373 1383
1374 return 0; 1384 return 0;
1375} 1385}
1386
1387static int __init parse_lapic(char *arg)
1388{
1389 lapic_enable();
1390 return 0;
1391}
1392early_param("lapic", parse_lapic);
1393
1394static int __init parse_nolapic(char *arg)
1395{
1396 lapic_disable();
1397 return 0;
1398}
1399early_param("nolapic", parse_nolapic);
1400
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index e6a2d6b80cda..e4758095d87a 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -22,7 +22,7 @@
22extern void vide(void); 22extern void vide(void);
23__asm__(".align 4\nvide: ret"); 23__asm__(".align 4\nvide: ret");
24 24
25static void __init init_amd(struct cpuinfo_x86 *c) 25static void __cpuinit init_amd(struct cpuinfo_x86 *c)
26{ 26{
27 u32 l, h; 27 u32 l, h;
28 int mbytes = num_physpages >> (20-PAGE_SHIFT); 28 int mbytes = num_physpages >> (20-PAGE_SHIFT);
@@ -246,7 +246,7 @@ static void __init init_amd(struct cpuinfo_x86 *c)
246 num_cache_leaves = 3; 246 num_cache_leaves = 3;
247} 247}
248 248
249static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) 249static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
250{ 250{
251 /* AMD errata T13 (order #21922) */ 251 /* AMD errata T13 (order #21922) */
252 if ((c->x86 == 6)) { 252 if ((c->x86 == 6)) {
@@ -259,7 +259,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
259 return size; 259 return size;
260} 260}
261 261
262static struct cpu_dev amd_cpu_dev __initdata = { 262static struct cpu_dev amd_cpu_dev __cpuinitdata = {
263 .c_vendor = "AMD", 263 .c_vendor = "AMD",
264 .c_ident = { "AuthenticAMD" }, 264 .c_ident = { "AuthenticAMD" },
265 .c_models = { 265 .c_models = {
@@ -275,7 +275,6 @@ static struct cpu_dev amd_cpu_dev __initdata = {
275 }, 275 },
276 }, 276 },
277 .c_init = init_amd, 277 .c_init = init_amd,
278 .c_identify = generic_identify,
279 .c_size_cache = amd_size_cache, 278 .c_size_cache = amd_size_cache,
280}; 279};
281 280
diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c
index bd75629dd262..8c25047975c0 100644
--- a/arch/i386/kernel/cpu/centaur.c
+++ b/arch/i386/kernel/cpu/centaur.c
@@ -9,7 +9,7 @@
9 9
10#ifdef CONFIG_X86_OOSTORE 10#ifdef CONFIG_X86_OOSTORE
11 11
12static u32 __init power2(u32 x) 12static u32 __cpuinit power2(u32 x)
13{ 13{
14 u32 s=1; 14 u32 s=1;
15 while(s<=x) 15 while(s<=x)
@@ -22,7 +22,7 @@ static u32 __init power2(u32 x)
22 * Set up an actual MCR 22 * Set up an actual MCR
23 */ 23 */
24 24
25static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key) 25static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key)
26{ 26{
27 u32 lo, hi; 27 u32 lo, hi;
28 28
@@ -40,7 +40,7 @@ static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key)
40 * Shortcut: We know you can't put 4Gig of RAM on a winchip 40 * Shortcut: We know you can't put 4Gig of RAM on a winchip
41 */ 41 */
42 42
43static u32 __init ramtop(void) /* 16388 */ 43static u32 __cpuinit ramtop(void) /* 16388 */
44{ 44{
45 int i; 45 int i;
46 u32 top = 0; 46 u32 top = 0;
@@ -91,7 +91,7 @@ static u32 __init ramtop(void) /* 16388 */
91 * Compute a set of MCR's to give maximum coverage 91 * Compute a set of MCR's to give maximum coverage
92 */ 92 */
93 93
94static int __init centaur_mcr_compute(int nr, int key) 94static int __cpuinit centaur_mcr_compute(int nr, int key)
95{ 95{
96 u32 mem = ramtop(); 96 u32 mem = ramtop();
97 u32 root = power2(mem); 97 u32 root = power2(mem);
@@ -166,7 +166,7 @@ static int __init centaur_mcr_compute(int nr, int key)
166 return ct; 166 return ct;
167} 167}
168 168
169static void __init centaur_create_optimal_mcr(void) 169static void __cpuinit centaur_create_optimal_mcr(void)
170{ 170{
171 int i; 171 int i;
172 /* 172 /*
@@ -189,7 +189,7 @@ static void __init centaur_create_optimal_mcr(void)
189 wrmsr(MSR_IDT_MCR0+i, 0, 0); 189 wrmsr(MSR_IDT_MCR0+i, 0, 0);
190} 190}
191 191
192static void __init winchip2_create_optimal_mcr(void) 192static void __cpuinit winchip2_create_optimal_mcr(void)
193{ 193{
194 u32 lo, hi; 194 u32 lo, hi;
195 int i; 195 int i;
@@ -227,7 +227,7 @@ static void __init winchip2_create_optimal_mcr(void)
227 * Handle the MCR key on the Winchip 2. 227 * Handle the MCR key on the Winchip 2.
228 */ 228 */
229 229
230static void __init winchip2_unprotect_mcr(void) 230static void __cpuinit winchip2_unprotect_mcr(void)
231{ 231{
232 u32 lo, hi; 232 u32 lo, hi;
233 u32 key; 233 u32 key;
@@ -239,7 +239,7 @@ static void __init winchip2_unprotect_mcr(void)
239 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 239 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
240} 240}
241 241
242static void __init winchip2_protect_mcr(void) 242static void __cpuinit winchip2_protect_mcr(void)
243{ 243{
244 u32 lo, hi; 244 u32 lo, hi;
245 245
@@ -257,7 +257,7 @@ static void __init winchip2_protect_mcr(void)
257#define RNG_ENABLED (1 << 3) 257#define RNG_ENABLED (1 << 3)
258#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ 258#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
259 259
260static void __init init_c3(struct cpuinfo_x86 *c) 260static void __cpuinit init_c3(struct cpuinfo_x86 *c)
261{ 261{
262 u32 lo, hi; 262 u32 lo, hi;
263 263
@@ -303,7 +303,7 @@ static void __init init_c3(struct cpuinfo_x86 *c)
303 display_cacheinfo(c); 303 display_cacheinfo(c);
304} 304}
305 305
306static void __init init_centaur(struct cpuinfo_x86 *c) 306static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
307{ 307{
308 enum { 308 enum {
309 ECX8=1<<1, 309 ECX8=1<<1,
@@ -442,7 +442,7 @@ static void __init init_centaur(struct cpuinfo_x86 *c)
442 } 442 }
443} 443}
444 444
445static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) 445static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
446{ 446{
447 /* VIA C3 CPUs (670-68F) need further shifting. */ 447 /* VIA C3 CPUs (670-68F) need further shifting. */
448 if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) 448 if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
@@ -457,7 +457,7 @@ static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size
457 return size; 457 return size;
458} 458}
459 459
460static struct cpu_dev centaur_cpu_dev __initdata = { 460static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
461 .c_vendor = "Centaur", 461 .c_vendor = "Centaur",
462 .c_ident = { "CentaurHauls" }, 462 .c_ident = { "CentaurHauls" },
463 .c_init = init_centaur, 463 .c_init = init_centaur,
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 70c87de582c7..2799baaadf45 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -36,7 +36,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
36 36
37extern int disable_pse; 37extern int disable_pse;
38 38
39static void default_init(struct cpuinfo_x86 * c) 39static void __cpuinit default_init(struct cpuinfo_x86 * c)
40{ 40{
41 /* Not much we can do here... */ 41 /* Not much we can do here... */
42 /* Check if at least it has cpuid */ 42 /* Check if at least it has cpuid */
@@ -49,7 +49,7 @@ static void default_init(struct cpuinfo_x86 * c)
49 } 49 }
50} 50}
51 51
52static struct cpu_dev default_cpu = { 52static struct cpu_dev __cpuinitdata default_cpu = {
53 .c_init = default_init, 53 .c_init = default_init,
54 .c_vendor = "Unknown", 54 .c_vendor = "Unknown",
55}; 55};
@@ -265,7 +265,7 @@ static void __init early_cpu_detect(void)
265 } 265 }
266} 266}
267 267
268void __cpuinit generic_identify(struct cpuinfo_x86 * c) 268static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
269{ 269{
270 u32 tfms, xlvl; 270 u32 tfms, xlvl;
271 int ebx; 271 int ebx;
@@ -675,7 +675,7 @@ old_gdt:
675#endif 675#endif
676 676
677 /* Clear %fs and %gs. */ 677 /* Clear %fs and %gs. */
678 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); 678 asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
679 679
680 /* Clear all 6 debug registers: */ 680 /* Clear all 6 debug registers: */
681 set_debugreg(0, 0); 681 set_debugreg(0, 0);
diff --git a/arch/i386/kernel/cpu/cpu.h b/arch/i386/kernel/cpu/cpu.h
index 5a1d4f163e84..2f6432cef6ff 100644
--- a/arch/i386/kernel/cpu/cpu.h
+++ b/arch/i386/kernel/cpu/cpu.h
@@ -24,7 +24,5 @@ extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
24extern int get_model_name(struct cpuinfo_x86 *c); 24extern int get_model_name(struct cpuinfo_x86 *c);
25extern void display_cacheinfo(struct cpuinfo_x86 *c); 25extern void display_cacheinfo(struct cpuinfo_x86 *c);
26 26
27extern void generic_identify(struct cpuinfo_x86 * c);
28
29extern void early_intel_workaround(struct cpuinfo_x86 *c); 27extern void early_intel_workaround(struct cpuinfo_x86 *c);
30 28
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index f03b7f94c304..c0c3b59de32c 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -12,7 +12,7 @@
12/* 12/*
13 * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU 13 * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
14 */ 14 */
15static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) 15static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
16{ 16{
17 unsigned char ccr2, ccr3; 17 unsigned char ccr2, ccr3;
18 unsigned long flags; 18 unsigned long flags;
@@ -52,25 +52,25 @@ static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
52 * Actually since bugs.h doesn't even reference this perhaps someone should 52 * Actually since bugs.h doesn't even reference this perhaps someone should
53 * fix the documentation ??? 53 * fix the documentation ???
54 */ 54 */
55static unsigned char Cx86_dir0_msb __initdata = 0; 55static unsigned char Cx86_dir0_msb __cpuinitdata = 0;
56 56
57static char Cx86_model[][9] __initdata = { 57static char Cx86_model[][9] __cpuinitdata = {
58 "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", 58 "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
59 "M II ", "Unknown" 59 "M II ", "Unknown"
60}; 60};
61static char Cx486_name[][5] __initdata = { 61static char Cx486_name[][5] __cpuinitdata = {
62 "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", 62 "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
63 "SRx2", "DRx2" 63 "SRx2", "DRx2"
64}; 64};
65static char Cx486S_name[][4] __initdata = { 65static char Cx486S_name[][4] __cpuinitdata = {
66 "S", "S2", "Se", "S2e" 66 "S", "S2", "Se", "S2e"
67}; 67};
68static char Cx486D_name[][4] __initdata = { 68static char Cx486D_name[][4] __cpuinitdata = {
69 "DX", "DX2", "?", "?", "?", "DX4" 69 "DX", "DX2", "?", "?", "?", "DX4"
70}; 70};
71static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock"; 71static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock";
72static char cyrix_model_mult1[] __initdata = "12??43"; 72static char cyrix_model_mult1[] __cpuinitdata = "12??43";
73static char cyrix_model_mult2[] __initdata = "12233445"; 73static char cyrix_model_mult2[] __cpuinitdata = "12233445";
74 74
75/* 75/*
76 * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old 76 * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
@@ -82,7 +82,7 @@ static char cyrix_model_mult2[] __initdata = "12233445";
82 82
83extern void calibrate_delay(void) __init; 83extern void calibrate_delay(void) __init;
84 84
85static void __init check_cx686_slop(struct cpuinfo_x86 *c) 85static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
86{ 86{
87 unsigned long flags; 87 unsigned long flags;
88 88
@@ -107,7 +107,7 @@ static void __init check_cx686_slop(struct cpuinfo_x86 *c)
107} 107}
108 108
109 109
110static void __init set_cx86_reorder(void) 110static void __cpuinit set_cx86_reorder(void)
111{ 111{
112 u8 ccr3; 112 u8 ccr3;
113 113
@@ -122,7 +122,7 @@ static void __init set_cx86_reorder(void)
122 setCx86(CX86_CCR3, ccr3); 122 setCx86(CX86_CCR3, ccr3);
123} 123}
124 124
125static void __init set_cx86_memwb(void) 125static void __cpuinit set_cx86_memwb(void)
126{ 126{
127 u32 cr0; 127 u32 cr0;
128 128
@@ -137,7 +137,7 @@ static void __init set_cx86_memwb(void)
137 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); 137 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
138} 138}
139 139
140static void __init set_cx86_inc(void) 140static void __cpuinit set_cx86_inc(void)
141{ 141{
142 unsigned char ccr3; 142 unsigned char ccr3;
143 143
@@ -158,7 +158,7 @@ static void __init set_cx86_inc(void)
158 * Configure later MediaGX and/or Geode processor. 158 * Configure later MediaGX and/or Geode processor.
159 */ 159 */
160 160
161static void __init geode_configure(void) 161static void __cpuinit geode_configure(void)
162{ 162{
163 unsigned long flags; 163 unsigned long flags;
164 u8 ccr3, ccr4; 164 u8 ccr3, ccr4;
@@ -184,14 +184,14 @@ static void __init geode_configure(void)
184 184
185 185
186#ifdef CONFIG_PCI 186#ifdef CONFIG_PCI
187static struct pci_device_id __initdata cyrix_55x0[] = { 187static struct pci_device_id __cpuinitdata cyrix_55x0[] = {
188 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) }, 188 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) },
189 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) }, 189 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) },
190 { }, 190 { },
191}; 191};
192#endif 192#endif
193 193
194static void __init init_cyrix(struct cpuinfo_x86 *c) 194static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
195{ 195{
196 unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; 196 unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
197 char *buf = c->x86_model_id; 197 char *buf = c->x86_model_id;
@@ -346,7 +346,7 @@ static void __init init_cyrix(struct cpuinfo_x86 *c)
346/* 346/*
347 * Handle National Semiconductor branded processors 347 * Handle National Semiconductor branded processors
348 */ 348 */
349static void __init init_nsc(struct cpuinfo_x86 *c) 349static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
350{ 350{
351 /* There may be GX1 processors in the wild that are branded 351 /* There may be GX1 processors in the wild that are branded
352 * NSC and not Cyrix. 352 * NSC and not Cyrix.
@@ -394,7 +394,7 @@ static inline int test_cyrix_52div(void)
394 return (unsigned char) (test >> 8) == 0x02; 394 return (unsigned char) (test >> 8) == 0x02;
395} 395}
396 396
397static void cyrix_identify(struct cpuinfo_x86 * c) 397static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c)
398{ 398{
399 /* Detect Cyrix with disabled CPUID */ 399 /* Detect Cyrix with disabled CPUID */
400 if ( c->x86 == 4 && test_cyrix_52div() ) { 400 if ( c->x86 == 4 && test_cyrix_52div() ) {
@@ -427,10 +427,9 @@ static void cyrix_identify(struct cpuinfo_x86 * c)
427 local_irq_restore(flags); 427 local_irq_restore(flags);
428 } 428 }
429 } 429 }
430 generic_identify(c);
431} 430}
432 431
433static struct cpu_dev cyrix_cpu_dev __initdata = { 432static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
434 .c_vendor = "Cyrix", 433 .c_vendor = "Cyrix",
435 .c_ident = { "CyrixInstead" }, 434 .c_ident = { "CyrixInstead" },
436 .c_init = init_cyrix, 435 .c_init = init_cyrix,
@@ -453,11 +452,10 @@ static int __init cyrix_exit_cpu(void)
453 452
454late_initcall(cyrix_exit_cpu); 453late_initcall(cyrix_exit_cpu);
455 454
456static struct cpu_dev nsc_cpu_dev __initdata = { 455static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
457 .c_vendor = "NSC", 456 .c_vendor = "NSC",
458 .c_ident = { "Geode by NSC" }, 457 .c_ident = { "Geode by NSC" },
459 .c_init = init_nsc, 458 .c_init = init_nsc,
460 .c_identify = generic_identify,
461}; 459};
462 460
463int __init nsc_init_cpu(void) 461int __init nsc_init_cpu(void)
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 5a2e270924b1..94a95aa5227e 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -198,7 +198,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
198} 198}
199 199
200 200
201static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) 201static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
202{ 202{
203 /* Intel PIII Tualatin. This comes in two flavours. 203 /* Intel PIII Tualatin. This comes in two flavours.
204 * One has 256kb of cache, the other 512. We have no way 204 * One has 256kb of cache, the other 512. We have no way
@@ -263,7 +263,6 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = {
263 }, 263 },
264 }, 264 },
265 .c_init = init_intel, 265 .c_init = init_intel,
266 .c_identify = generic_identify,
267 .c_size_cache = intel_size_cache, 266 .c_size_cache = intel_size_cache,
268}; 267};
269 268
diff --git a/arch/i386/kernel/cpu/mcheck/Makefile b/arch/i386/kernel/cpu/mcheck/Makefile
index 30808f3d6715..f1ebe1c1c17a 100644
--- a/arch/i386/kernel/cpu/mcheck/Makefile
+++ b/arch/i386/kernel/cpu/mcheck/Makefile
@@ -1,2 +1,2 @@
1obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o 1obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o
2obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o 2obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index b95f1b3d53aa..504434a46011 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -13,6 +13,8 @@
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/apic.h> 14#include <asm/apic.h>
15 15
16#include <asm/therm_throt.h>
17
16#include "mce.h" 18#include "mce.h"
17 19
18/* as supported by the P4/Xeon family */ 20/* as supported by the P4/Xeon family */
@@ -44,25 +46,12 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs)
44/* P4/Xeon Thermal transition interrupt handler */ 46/* P4/Xeon Thermal transition interrupt handler */
45static void intel_thermal_interrupt(struct pt_regs *regs) 47static void intel_thermal_interrupt(struct pt_regs *regs)
46{ 48{
47 u32 l, h; 49 __u64 msr_val;
48 unsigned int cpu = smp_processor_id();
49 static unsigned long next[NR_CPUS];
50 50
51 ack_APIC_irq(); 51 ack_APIC_irq();
52 52
53 if (time_after(next[cpu], jiffies)) 53 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
54 return; 54 therm_throt_process(msr_val & 0x1);
55
56 next[cpu] = jiffies + HZ*5;
57 rdmsr(MSR_IA32_THERM_STATUS, l, h);
58 if (l & 0x1) {
59 printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
60 printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
61 cpu);
62 add_taint(TAINT_MACHINE_CHECK);
63 } else {
64 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
65 }
66} 55}
67 56
68/* Thermal interrupt handler for this CPU setup */ 57/* Thermal interrupt handler for this CPU setup */
@@ -122,10 +111,13 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
122 111
123 rdmsr (MSR_IA32_MISC_ENABLE, l, h); 112 rdmsr (MSR_IA32_MISC_ENABLE, l, h);
124 wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); 113 wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
125 114
126 l = apic_read (APIC_LVTTHMR); 115 l = apic_read (APIC_LVTTHMR);
127 apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 116 apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
128 printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); 117 printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
118
119 /* enable thermal throttle processing */
120 atomic_set(&therm_throt_en, 1);
129 return; 121 return;
130} 122}
131#endif /* CONFIG_X86_MCE_P4THERMAL */ 123#endif /* CONFIG_X86_MCE_P4THERMAL */
diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c
new file mode 100644
index 000000000000..4f43047de406
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c
@@ -0,0 +1,180 @@
1/*
2 * linux/arch/i386/kerne/cpu/mcheck/therm_throt.c
3 *
4 * Thermal throttle event support code (such as syslog messaging and rate
5 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
6 * This allows consistent reporting of CPU thermal throttle events.
7 *
8 * Maintains a counter in /sys that keeps track of the number of thermal
9 * events, such that the user knows how bad the thermal problem might be
10 * (since the logging to syslog and mcelog is rate limited).
11 *
12 * Author: Dmitriy Zavin (dmitriyz@google.com)
13 *
14 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
15 * Inspired by Ross Biro's and Al Borchers' counter code.
16 */
17
18#include <linux/percpu.h>
19#include <linux/sysdev.h>
20#include <linux/cpu.h>
21#include <asm/cpu.h>
22#include <linux/notifier.h>
23#include <asm/therm_throt.h>
24
25/* How long to wait between reporting thermal events */
26#define CHECK_INTERVAL (300 * HZ)
27
28static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
29static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
30atomic_t therm_throt_en = ATOMIC_INIT(0);
31
32#ifdef CONFIG_SYSFS
33#define define_therm_throt_sysdev_one_ro(_name) \
34 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
35
36#define define_therm_throt_sysdev_show_func(name) \
37static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
38 char *buf) \
39{ \
40 unsigned int cpu = dev->id; \
41 ssize_t ret; \
42 \
43 preempt_disable(); /* CPU hotplug */ \
44 if (cpu_online(cpu)) \
45 ret = sprintf(buf, "%lu\n", \
46 per_cpu(thermal_throttle_##name, cpu)); \
47 else \
48 ret = 0; \
49 preempt_enable(); \
50 \
51 return ret; \
52}
53
54define_therm_throt_sysdev_show_func(count);
55define_therm_throt_sysdev_one_ro(count);
56
57static struct attribute *thermal_throttle_attrs[] = {
58 &attr_count.attr,
59 NULL
60};
61
62static struct attribute_group thermal_throttle_attr_group = {
63 .attrs = thermal_throttle_attrs,
64 .name = "thermal_throttle"
65};
66#endif /* CONFIG_SYSFS */
67
68/***
69 * therm_throt_process - Process thermal throttling event from interrupt
70 * @curr: Whether the condition is current or not (boolean), since the
71 * thermal interrupt normally gets called both when the thermal
72 * event begins and once the event has ended.
73 *
74 * This function is called by the thermal interrupt after the
75 * IRQ has been acknowledged.
76 *
77 * It will take care of rate limiting and printing messages to the syslog.
78 *
79 * Returns: 0 : Event should NOT be further logged, i.e. still in
80 * "timeout" from previous log message.
81 * 1 : Event should be logged further, and a message has been
82 * printed to the syslog.
83 */
84int therm_throt_process(int curr)
85{
86 unsigned int cpu = smp_processor_id();
87 __u64 tmp_jiffs = get_jiffies_64();
88
89 if (curr)
90 __get_cpu_var(thermal_throttle_count)++;
91
92 if (time_before64(tmp_jiffs, __get_cpu_var(next_check)))
93 return 0;
94
95 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
96
97 /* if we just entered the thermal event */
98 if (curr) {
99 printk(KERN_CRIT "CPU%d: Temperature above threshold, "
100 "cpu clock throttled (total events = %lu)\n", cpu,
101 __get_cpu_var(thermal_throttle_count));
102
103 add_taint(TAINT_MACHINE_CHECK);
104 } else {
105 printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
106 }
107
108 return 1;
109}
110
111#ifdef CONFIG_SYSFS
112/* Add/Remove thermal_throttle interface for CPU device */
113static __cpuinit int thermal_throttle_add_dev(struct sys_device * sys_dev)
114{
115 sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
116 return 0;
117}
118
119#ifdef CONFIG_HOTPLUG_CPU
120static __cpuinit int thermal_throttle_remove_dev(struct sys_device * sys_dev)
121{
122 sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
123 return 0;
124}
125
126/* Mutex protecting device creation against CPU hotplug */
127static DEFINE_MUTEX(therm_cpu_lock);
128
129/* Get notified when a cpu comes on/off. Be hotplug friendly. */
130static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
131 unsigned long action,
132 void *hcpu)
133{
134 unsigned int cpu = (unsigned long)hcpu;
135 struct sys_device *sys_dev;
136
137 sys_dev = get_cpu_sysdev(cpu);
138 mutex_lock(&therm_cpu_lock);
139 switch (action) {
140 case CPU_ONLINE:
141 thermal_throttle_add_dev(sys_dev);
142 break;
143 case CPU_DEAD:
144 thermal_throttle_remove_dev(sys_dev);
145 break;
146 }
147 mutex_unlock(&therm_cpu_lock);
148 return NOTIFY_OK;
149}
150
151static struct notifier_block thermal_throttle_cpu_notifier =
152{
153 .notifier_call = thermal_throttle_cpu_callback,
154};
155#endif /* CONFIG_HOTPLUG_CPU */
156
157static __init int thermal_throttle_init_device(void)
158{
159 unsigned int cpu = 0;
160
161 if (!atomic_read(&therm_throt_en))
162 return 0;
163
164 register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
165
166#ifdef CONFIG_HOTPLUG_CPU
167 mutex_lock(&therm_cpu_lock);
168#endif
169 /* connect live CPUs to sysfs */
170 for_each_online_cpu(cpu)
171 thermal_throttle_add_dev(get_cpu_sysdev(cpu));
172#ifdef CONFIG_HOTPLUG_CPU
173 mutex_unlock(&therm_cpu_lock);
174#endif
175
176 return 0;
177}
178
179device_initcall(thermal_throttle_init_device);
180#endif /* CONFIG_SYSFS */
diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c
index ad87fa58058d..8bf23cc80c63 100644
--- a/arch/i386/kernel/cpu/nexgen.c
+++ b/arch/i386/kernel/cpu/nexgen.c
@@ -10,7 +10,7 @@
10 * to have CPUID. (Thanks to Herbert Oppmann) 10 * to have CPUID. (Thanks to Herbert Oppmann)
11 */ 11 */
12 12
13static int __init deep_magic_nexgen_probe(void) 13static int __cpuinit deep_magic_nexgen_probe(void)
14{ 14{
15 int ret; 15 int ret;
16 16
@@ -27,21 +27,20 @@ static int __init deep_magic_nexgen_probe(void)
27 return ret; 27 return ret;
28} 28}
29 29
30static void __init init_nexgen(struct cpuinfo_x86 * c) 30static void __cpuinit init_nexgen(struct cpuinfo_x86 * c)
31{ 31{
32 c->x86_cache_size = 256; /* A few had 1 MB... */ 32 c->x86_cache_size = 256; /* A few had 1 MB... */
33} 33}
34 34
35static void __init nexgen_identify(struct cpuinfo_x86 * c) 35static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c)
36{ 36{
37 /* Detect NexGen with old hypercode */ 37 /* Detect NexGen with old hypercode */
38 if ( deep_magic_nexgen_probe() ) { 38 if ( deep_magic_nexgen_probe() ) {
39 strcpy(c->x86_vendor_id, "NexGenDriven"); 39 strcpy(c->x86_vendor_id, "NexGenDriven");
40 } 40 }
41 generic_identify(c);
42} 41}
43 42
44static struct cpu_dev nexgen_cpu_dev __initdata = { 43static struct cpu_dev nexgen_cpu_dev __cpuinitdata = {
45 .c_vendor = "Nexgen", 44 .c_vendor = "Nexgen",
46 .c_ident = { "NexGenDriven" }, 45 .c_ident = { "NexGenDriven" },
47 .c_models = { 46 .c_models = {
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index f54a15268ed7..76aac088a323 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -46,8 +46,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
46 46
47 /* Intel-defined (#2) */ 47 /* Intel-defined (#2) */
48 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", 48 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
49 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, 49 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
50 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 50 NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
51 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 51 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
52 52
53 /* VIA/Cyrix/Centaur-defined */ 53 /* VIA/Cyrix/Centaur-defined */
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c
index d08d5a2811c8..9317f7414989 100644
--- a/arch/i386/kernel/cpu/rise.c
+++ b/arch/i386/kernel/cpu/rise.c
@@ -5,7 +5,7 @@
5 5
6#include "cpu.h" 6#include "cpu.h"
7 7
8static void __init init_rise(struct cpuinfo_x86 *c) 8static void __cpuinit init_rise(struct cpuinfo_x86 *c)
9{ 9{
10 printk("CPU: Rise iDragon"); 10 printk("CPU: Rise iDragon");
11 if (c->x86_model > 2) 11 if (c->x86_model > 2)
@@ -28,7 +28,7 @@ static void __init init_rise(struct cpuinfo_x86 *c)
28 set_bit(X86_FEATURE_CX8, c->x86_capability); 28 set_bit(X86_FEATURE_CX8, c->x86_capability);
29} 29}
30 30
31static struct cpu_dev rise_cpu_dev __initdata = { 31static struct cpu_dev rise_cpu_dev __cpuinitdata = {
32 .c_vendor = "Rise", 32 .c_vendor = "Rise",
33 .c_ident = { "RiseRiseRise" }, 33 .c_ident = { "RiseRiseRise" },
34 .c_models = { 34 .c_models = {
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
index 7214c9b577ab..4056fb7d2cdf 100644
--- a/arch/i386/kernel/cpu/transmeta.c
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -5,7 +5,7 @@
5#include <asm/msr.h> 5#include <asm/msr.h>
6#include "cpu.h" 6#include "cpu.h"
7 7
8static void __init init_transmeta(struct cpuinfo_x86 *c) 8static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
9{ 9{
10 unsigned int cap_mask, uk, max, dummy; 10 unsigned int cap_mask, uk, max, dummy;
11 unsigned int cms_rev1, cms_rev2; 11 unsigned int cms_rev1, cms_rev2;
@@ -85,10 +85,9 @@ static void __init init_transmeta(struct cpuinfo_x86 *c)
85#endif 85#endif
86} 86}
87 87
88static void __init transmeta_identify(struct cpuinfo_x86 * c) 88static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c)
89{ 89{
90 u32 xlvl; 90 u32 xlvl;
91 generic_identify(c);
92 91
93 /* Transmeta-defined flags: level 0x80860001 */ 92 /* Transmeta-defined flags: level 0x80860001 */
94 xlvl = cpuid_eax(0x80860000); 93 xlvl = cpuid_eax(0x80860000);
@@ -98,7 +97,7 @@ static void __init transmeta_identify(struct cpuinfo_x86 * c)
98 } 97 }
99} 98}
100 99
101static struct cpu_dev transmeta_cpu_dev __initdata = { 100static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
102 .c_vendor = "Transmeta", 101 .c_vendor = "Transmeta",
103 .c_ident = { "GenuineTMx86", "TransmetaCPU" }, 102 .c_ident = { "GenuineTMx86", "TransmetaCPU" },
104 .c_init = init_transmeta, 103 .c_init = init_transmeta,
diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c
index 2cd988f6dc55..1bf3f87e9c5b 100644
--- a/arch/i386/kernel/cpu/umc.c
+++ b/arch/i386/kernel/cpu/umc.c
@@ -5,12 +5,8 @@
5 5
6/* UMC chips appear to be only either 386 or 486, so no special init takes place. 6/* UMC chips appear to be only either 386 or 486, so no special init takes place.
7 */ 7 */
8static void __init init_umc(struct cpuinfo_x86 * c)
9{
10
11}
12 8
13static struct cpu_dev umc_cpu_dev __initdata = { 9static struct cpu_dev umc_cpu_dev __cpuinitdata = {
14 .c_vendor = "UMC", 10 .c_vendor = "UMC",
15 .c_ident = { "UMC UMC UMC" }, 11 .c_ident = { "UMC UMC UMC" },
16 .c_models = { 12 .c_models = {
@@ -21,7 +17,6 @@ static struct cpu_dev umc_cpu_dev __initdata = {
21 } 17 }
22 }, 18 },
23 }, 19 },
24 .c_init = init_umc,
25}; 20};
26 21
27int __init umc_init_cpu(void) 22int __init umc_init_cpu(void)
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index 5b96f038367f..67d297dc1003 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -22,6 +22,8 @@
22#include <asm/nmi.h> 22#include <asm/nmi.h>
23#include <asm/hw_irq.h> 23#include <asm/hw_irq.h>
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/kdebug.h>
26
25#include <mach_ipi.h> 27#include <mach_ipi.h>
26 28
27 29
@@ -93,16 +95,25 @@ static void crash_save_self(struct pt_regs *regs)
93#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 95#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
94static atomic_t waiting_for_crash_ipi; 96static atomic_t waiting_for_crash_ipi;
95 97
96static int crash_nmi_callback(struct pt_regs *regs, int cpu) 98static int crash_nmi_callback(struct notifier_block *self,
99 unsigned long val, void *data)
97{ 100{
101 struct pt_regs *regs;
98 struct pt_regs fixed_regs; 102 struct pt_regs fixed_regs;
103 int cpu;
104
105 if (val != DIE_NMI_IPI)
106 return NOTIFY_OK;
107
108 regs = ((struct die_args *)data)->regs;
109 cpu = raw_smp_processor_id();
99 110
100 /* Don't do anything if this handler is invoked on crashing cpu. 111 /* Don't do anything if this handler is invoked on crashing cpu.
101 * Otherwise, system will completely hang. Crashing cpu can get 112 * Otherwise, system will completely hang. Crashing cpu can get
102 * an NMI if system was initially booted with nmi_watchdog parameter. 113 * an NMI if system was initially booted with nmi_watchdog parameter.
103 */ 114 */
104 if (cpu == crashing_cpu) 115 if (cpu == crashing_cpu)
105 return 1; 116 return NOTIFY_STOP;
106 local_irq_disable(); 117 local_irq_disable();
107 118
108 if (!user_mode_vm(regs)) { 119 if (!user_mode_vm(regs)) {
@@ -125,13 +136,18 @@ static void smp_send_nmi_allbutself(void)
125 send_IPI_allbutself(NMI_VECTOR); 136 send_IPI_allbutself(NMI_VECTOR);
126} 137}
127 138
139static struct notifier_block crash_nmi_nb = {
140 .notifier_call = crash_nmi_callback,
141};
142
128static void nmi_shootdown_cpus(void) 143static void nmi_shootdown_cpus(void)
129{ 144{
130 unsigned long msecs; 145 unsigned long msecs;
131 146
132 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); 147 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
133 /* Would it be better to replace the trap vector here? */ 148 /* Would it be better to replace the trap vector here? */
134 set_nmi_callback(crash_nmi_callback); 149 if (register_die_notifier(&crash_nmi_nb))
150 return; /* return what? */
135 /* Ensure the new callback function is set before sending 151 /* Ensure the new callback function is set before sending
136 * out the NMI 152 * out the NMI
137 */ 153 */
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 87f9f60b803b..5a63d6fdb70e 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -76,8 +76,15 @@ DF_MASK = 0x00000400
76NT_MASK = 0x00004000 76NT_MASK = 0x00004000
77VM_MASK = 0x00020000 77VM_MASK = 0x00020000
78 78
79/* These are replaces for paravirtualization */
80#define DISABLE_INTERRUPTS cli
81#define ENABLE_INTERRUPTS sti
82#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
83#define INTERRUPT_RETURN iret
84#define GET_CR0_INTO_EAX movl %cr0, %eax
85
79#ifdef CONFIG_PREEMPT 86#ifdef CONFIG_PREEMPT
80#define preempt_stop cli; TRACE_IRQS_OFF 87#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
81#else 88#else
82#define preempt_stop 89#define preempt_stop
83#define resume_kernel restore_nocheck 90#define resume_kernel restore_nocheck
@@ -176,18 +183,21 @@ VM_MASK = 0x00020000
176 183
177#define RING0_INT_FRAME \ 184#define RING0_INT_FRAME \
178 CFI_STARTPROC simple;\ 185 CFI_STARTPROC simple;\
186 CFI_SIGNAL_FRAME;\
179 CFI_DEF_CFA esp, 3*4;\ 187 CFI_DEF_CFA esp, 3*4;\
180 /*CFI_OFFSET cs, -2*4;*/\ 188 /*CFI_OFFSET cs, -2*4;*/\
181 CFI_OFFSET eip, -3*4 189 CFI_OFFSET eip, -3*4
182 190
183#define RING0_EC_FRAME \ 191#define RING0_EC_FRAME \
184 CFI_STARTPROC simple;\ 192 CFI_STARTPROC simple;\
193 CFI_SIGNAL_FRAME;\
185 CFI_DEF_CFA esp, 4*4;\ 194 CFI_DEF_CFA esp, 4*4;\
186 /*CFI_OFFSET cs, -2*4;*/\ 195 /*CFI_OFFSET cs, -2*4;*/\
187 CFI_OFFSET eip, -3*4 196 CFI_OFFSET eip, -3*4
188 197
189#define RING0_PTREGS_FRAME \ 198#define RING0_PTREGS_FRAME \
190 CFI_STARTPROC simple;\ 199 CFI_STARTPROC simple;\
200 CFI_SIGNAL_FRAME;\
191 CFI_DEF_CFA esp, OLDESP-EBX;\ 201 CFI_DEF_CFA esp, OLDESP-EBX;\
192 /*CFI_OFFSET cs, CS-OLDESP;*/\ 202 /*CFI_OFFSET cs, CS-OLDESP;*/\
193 CFI_OFFSET eip, EIP-OLDESP;\ 203 CFI_OFFSET eip, EIP-OLDESP;\
@@ -233,10 +243,11 @@ ret_from_intr:
233check_userspace: 243check_userspace:
234 movl EFLAGS(%esp), %eax # mix EFLAGS and CS 244 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
235 movb CS(%esp), %al 245 movb CS(%esp), %al
236 testl $(VM_MASK | 3), %eax 246 andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
237 jz resume_kernel 247 cmpl $USER_RPL, %eax
248 jb resume_kernel # not returning to v8086 or userspace
238ENTRY(resume_userspace) 249ENTRY(resume_userspace)
239 cli # make sure we don't miss an interrupt 250 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
240 # setting need_resched or sigpending 251 # setting need_resched or sigpending
241 # between sampling and the iret 252 # between sampling and the iret
242 movl TI_flags(%ebp), %ecx 253 movl TI_flags(%ebp), %ecx
@@ -247,7 +258,7 @@ ENTRY(resume_userspace)
247 258
248#ifdef CONFIG_PREEMPT 259#ifdef CONFIG_PREEMPT
249ENTRY(resume_kernel) 260ENTRY(resume_kernel)
250 cli 261 DISABLE_INTERRUPTS
251 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? 262 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
252 jnz restore_nocheck 263 jnz restore_nocheck
253need_resched: 264need_resched:
@@ -267,6 +278,7 @@ need_resched:
267 # sysenter call handler stub 278 # sysenter call handler stub
268ENTRY(sysenter_entry) 279ENTRY(sysenter_entry)
269 CFI_STARTPROC simple 280 CFI_STARTPROC simple
281 CFI_SIGNAL_FRAME
270 CFI_DEF_CFA esp, 0 282 CFI_DEF_CFA esp, 0
271 CFI_REGISTER esp, ebp 283 CFI_REGISTER esp, ebp
272 movl TSS_sysenter_esp0(%esp),%esp 284 movl TSS_sysenter_esp0(%esp),%esp
@@ -275,7 +287,7 @@ sysenter_past_esp:
275 * No need to follow this irqs on/off section: the syscall 287 * No need to follow this irqs on/off section: the syscall
276 * disabled irqs and here we enable it straight after entry: 288 * disabled irqs and here we enable it straight after entry:
277 */ 289 */
278 sti 290 ENABLE_INTERRUPTS
279 pushl $(__USER_DS) 291 pushl $(__USER_DS)
280 CFI_ADJUST_CFA_OFFSET 4 292 CFI_ADJUST_CFA_OFFSET 4
281 /*CFI_REL_OFFSET ss, 0*/ 293 /*CFI_REL_OFFSET ss, 0*/
@@ -320,7 +332,7 @@ sysenter_past_esp:
320 jae syscall_badsys 332 jae syscall_badsys
321 call *sys_call_table(,%eax,4) 333 call *sys_call_table(,%eax,4)
322 movl %eax,EAX(%esp) 334 movl %eax,EAX(%esp)
323 cli 335 DISABLE_INTERRUPTS
324 TRACE_IRQS_OFF 336 TRACE_IRQS_OFF
325 movl TI_flags(%ebp), %ecx 337 movl TI_flags(%ebp), %ecx
326 testw $_TIF_ALLWORK_MASK, %cx 338 testw $_TIF_ALLWORK_MASK, %cx
@@ -330,8 +342,7 @@ sysenter_past_esp:
330 movl OLDESP(%esp), %ecx 342 movl OLDESP(%esp), %ecx
331 xorl %ebp,%ebp 343 xorl %ebp,%ebp
332 TRACE_IRQS_ON 344 TRACE_IRQS_ON
333 sti 345 ENABLE_INTERRUPTS_SYSEXIT
334 sysexit
335 CFI_ENDPROC 346 CFI_ENDPROC
336 347
337 348
@@ -356,7 +367,7 @@ syscall_call:
356 call *sys_call_table(,%eax,4) 367 call *sys_call_table(,%eax,4)
357 movl %eax,EAX(%esp) # store the return value 368 movl %eax,EAX(%esp) # store the return value
358syscall_exit: 369syscall_exit:
359 cli # make sure we don't miss an interrupt 370 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
360 # setting need_resched or sigpending 371 # setting need_resched or sigpending
361 # between sampling and the iret 372 # between sampling and the iret
362 TRACE_IRQS_OFF 373 TRACE_IRQS_OFF
@@ -371,8 +382,8 @@ restore_all:
371 # See comments in process.c:copy_thread() for details. 382 # See comments in process.c:copy_thread() for details.
372 movb OLDSS(%esp), %ah 383 movb OLDSS(%esp), %ah
373 movb CS(%esp), %al 384 movb CS(%esp), %al
374 andl $(VM_MASK | (4 << 8) | 3), %eax 385 andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
375 cmpl $((4 << 8) | 3), %eax 386 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
376 CFI_REMEMBER_STATE 387 CFI_REMEMBER_STATE
377 je ldt_ss # returning to user-space with LDT SS 388 je ldt_ss # returning to user-space with LDT SS
378restore_nocheck: 389restore_nocheck:
@@ -381,11 +392,11 @@ restore_nocheck_notrace:
381 RESTORE_REGS 392 RESTORE_REGS
382 addl $4, %esp 393 addl $4, %esp
383 CFI_ADJUST_CFA_OFFSET -4 394 CFI_ADJUST_CFA_OFFSET -4
3841: iret 3951: INTERRUPT_RETURN
385.section .fixup,"ax" 396.section .fixup,"ax"
386iret_exc: 397iret_exc:
387 TRACE_IRQS_ON 398 TRACE_IRQS_ON
388 sti 399 ENABLE_INTERRUPTS
389 pushl $0 # no error code 400 pushl $0 # no error code
390 pushl $do_iret_error 401 pushl $do_iret_error
391 jmp error_code 402 jmp error_code
@@ -409,7 +420,7 @@ ldt_ss:
409 * dosemu and wine happy. */ 420 * dosemu and wine happy. */
410 subl $8, %esp # reserve space for switch16 pointer 421 subl $8, %esp # reserve space for switch16 pointer
411 CFI_ADJUST_CFA_OFFSET 8 422 CFI_ADJUST_CFA_OFFSET 8
412 cli 423 DISABLE_INTERRUPTS
413 TRACE_IRQS_OFF 424 TRACE_IRQS_OFF
414 movl %esp, %eax 425 movl %esp, %eax
415 /* Set up the 16bit stack frame with switch32 pointer on top, 426 /* Set up the 16bit stack frame with switch32 pointer on top,
@@ -419,7 +430,7 @@ ldt_ss:
419 TRACE_IRQS_IRET 430 TRACE_IRQS_IRET
420 RESTORE_REGS 431 RESTORE_REGS
421 lss 20+4(%esp), %esp # switch to 16bit stack 432 lss 20+4(%esp), %esp # switch to 16bit stack
4221: iret 4331: INTERRUPT_RETURN
423.section __ex_table,"a" 434.section __ex_table,"a"
424 .align 4 435 .align 4
425 .long 1b,iret_exc 436 .long 1b,iret_exc
@@ -434,7 +445,7 @@ work_pending:
434 jz work_notifysig 445 jz work_notifysig
435work_resched: 446work_resched:
436 call schedule 447 call schedule
437 cli # make sure we don't miss an interrupt 448 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
438 # setting need_resched or sigpending 449 # setting need_resched or sigpending
439 # between sampling and the iret 450 # between sampling and the iret
440 TRACE_IRQS_OFF 451 TRACE_IRQS_OFF
@@ -490,7 +501,7 @@ syscall_exit_work:
490 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl 501 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
491 jz work_pending 502 jz work_pending
492 TRACE_IRQS_ON 503 TRACE_IRQS_ON
493 sti # could let do_syscall_trace() call 504 ENABLE_INTERRUPTS # could let do_syscall_trace() call
494 # schedule() instead 505 # schedule() instead
495 movl %esp, %eax 506 movl %esp, %eax
496 movl $1, %edx 507 movl $1, %edx
@@ -591,11 +602,9 @@ ENTRY(name) \
591/* The include is where all of the SMP etc. interrupts come from */ 602/* The include is where all of the SMP etc. interrupts come from */
592#include "entry_arch.h" 603#include "entry_arch.h"
593 604
594ENTRY(divide_error) 605KPROBE_ENTRY(page_fault)
595 RING0_INT_FRAME 606 RING0_EC_FRAME
596 pushl $0 # no error code 607 pushl $do_page_fault
597 CFI_ADJUST_CFA_OFFSET 4
598 pushl $do_divide_error
599 CFI_ADJUST_CFA_OFFSET 4 608 CFI_ADJUST_CFA_OFFSET 4
600 ALIGN 609 ALIGN
601error_code: 610error_code:
@@ -645,6 +654,7 @@ error_code:
645 call *%edi 654 call *%edi
646 jmp ret_from_exception 655 jmp ret_from_exception
647 CFI_ENDPROC 656 CFI_ENDPROC
657KPROBE_END(page_fault)
648 658
649ENTRY(coprocessor_error) 659ENTRY(coprocessor_error)
650 RING0_INT_FRAME 660 RING0_INT_FRAME
@@ -669,7 +679,7 @@ ENTRY(device_not_available)
669 pushl $-1 # mark this as an int 679 pushl $-1 # mark this as an int
670 CFI_ADJUST_CFA_OFFSET 4 680 CFI_ADJUST_CFA_OFFSET 4
671 SAVE_ALL 681 SAVE_ALL
672 movl %cr0, %eax 682 GET_CR0_INTO_EAX
673 testl $0x4, %eax # EM (math emulation bit) 683 testl $0x4, %eax # EM (math emulation bit)
674 jne device_not_available_emulate 684 jne device_not_available_emulate
675 preempt_stop 685 preempt_stop
@@ -702,9 +712,15 @@ device_not_available_emulate:
702 jne ok; \ 712 jne ok; \
703label: \ 713label: \
704 movl TSS_sysenter_esp0+offset(%esp),%esp; \ 714 movl TSS_sysenter_esp0+offset(%esp),%esp; \
715 CFI_DEF_CFA esp, 0; \
716 CFI_UNDEFINED eip; \
705 pushfl; \ 717 pushfl; \
718 CFI_ADJUST_CFA_OFFSET 4; \
706 pushl $__KERNEL_CS; \ 719 pushl $__KERNEL_CS; \
707 pushl $sysenter_past_esp 720 CFI_ADJUST_CFA_OFFSET 4; \
721 pushl $sysenter_past_esp; \
722 CFI_ADJUST_CFA_OFFSET 4; \
723 CFI_REL_OFFSET eip, 0
708 724
709KPROBE_ENTRY(debug) 725KPROBE_ENTRY(debug)
710 RING0_INT_FRAME 726 RING0_INT_FRAME
@@ -720,7 +736,8 @@ debug_stack_correct:
720 call do_debug 736 call do_debug
721 jmp ret_from_exception 737 jmp ret_from_exception
722 CFI_ENDPROC 738 CFI_ENDPROC
723 .previous .text 739KPROBE_END(debug)
740
724/* 741/*
725 * NMI is doubly nasty. It can happen _while_ we're handling 742 * NMI is doubly nasty. It can happen _while_ we're handling
726 * a debug fault, and the debug fault hasn't yet been able to 743 * a debug fault, and the debug fault hasn't yet been able to
@@ -729,7 +746,7 @@ debug_stack_correct:
729 * check whether we got an NMI on the debug path where the debug 746 * check whether we got an NMI on the debug path where the debug
730 * fault happened on the sysenter path. 747 * fault happened on the sysenter path.
731 */ 748 */
732ENTRY(nmi) 749KPROBE_ENTRY(nmi)
733 RING0_INT_FRAME 750 RING0_INT_FRAME
734 pushl %eax 751 pushl %eax
735 CFI_ADJUST_CFA_OFFSET 4 752 CFI_ADJUST_CFA_OFFSET 4
@@ -754,6 +771,7 @@ ENTRY(nmi)
754 cmpl $sysenter_entry,12(%esp) 771 cmpl $sysenter_entry,12(%esp)
755 je nmi_debug_stack_check 772 je nmi_debug_stack_check
756nmi_stack_correct: 773nmi_stack_correct:
774 /* We have a RING0_INT_FRAME here */
757 pushl %eax 775 pushl %eax
758 CFI_ADJUST_CFA_OFFSET 4 776 CFI_ADJUST_CFA_OFFSET 4
759 SAVE_ALL 777 SAVE_ALL
@@ -764,9 +782,12 @@ nmi_stack_correct:
764 CFI_ENDPROC 782 CFI_ENDPROC
765 783
766nmi_stack_fixup: 784nmi_stack_fixup:
785 RING0_INT_FRAME
767 FIX_STACK(12,nmi_stack_correct, 1) 786 FIX_STACK(12,nmi_stack_correct, 1)
768 jmp nmi_stack_correct 787 jmp nmi_stack_correct
788
769nmi_debug_stack_check: 789nmi_debug_stack_check:
790 /* We have a RING0_INT_FRAME here */
770 cmpw $__KERNEL_CS,16(%esp) 791 cmpw $__KERNEL_CS,16(%esp)
771 jne nmi_stack_correct 792 jne nmi_stack_correct
772 cmpl $debug,(%esp) 793 cmpl $debug,(%esp)
@@ -777,8 +798,10 @@ nmi_debug_stack_check:
777 jmp nmi_stack_correct 798 jmp nmi_stack_correct
778 799
779nmi_16bit_stack: 800nmi_16bit_stack:
780 RING0_INT_FRAME 801 /* We have a RING0_INT_FRAME here.
781 /* create the pointer to lss back */ 802 *
803 * create the pointer to lss back
804 */
782 pushl %ss 805 pushl %ss
783 CFI_ADJUST_CFA_OFFSET 4 806 CFI_ADJUST_CFA_OFFSET 4
784 pushl %esp 807 pushl %esp
@@ -799,12 +822,13 @@ nmi_16bit_stack:
799 call do_nmi 822 call do_nmi
800 RESTORE_REGS 823 RESTORE_REGS
801 lss 12+4(%esp), %esp # back to 16bit stack 824 lss 12+4(%esp), %esp # back to 16bit stack
8021: iret 8251: INTERRUPT_RETURN
803 CFI_ENDPROC 826 CFI_ENDPROC
804.section __ex_table,"a" 827.section __ex_table,"a"
805 .align 4 828 .align 4
806 .long 1b,iret_exc 829 .long 1b,iret_exc
807.previous 830.previous
831KPROBE_END(nmi)
808 832
809KPROBE_ENTRY(int3) 833KPROBE_ENTRY(int3)
810 RING0_INT_FRAME 834 RING0_INT_FRAME
@@ -816,7 +840,7 @@ KPROBE_ENTRY(int3)
816 call do_int3 840 call do_int3
817 jmp ret_from_exception 841 jmp ret_from_exception
818 CFI_ENDPROC 842 CFI_ENDPROC
819 .previous .text 843KPROBE_END(int3)
820 844
821ENTRY(overflow) 845ENTRY(overflow)
822 RING0_INT_FRAME 846 RING0_INT_FRAME
@@ -881,7 +905,7 @@ KPROBE_ENTRY(general_protection)
881 CFI_ADJUST_CFA_OFFSET 4 905 CFI_ADJUST_CFA_OFFSET 4
882 jmp error_code 906 jmp error_code
883 CFI_ENDPROC 907 CFI_ENDPROC
884 .previous .text 908KPROBE_END(general_protection)
885 909
886ENTRY(alignment_check) 910ENTRY(alignment_check)
887 RING0_EC_FRAME 911 RING0_EC_FRAME
@@ -890,13 +914,14 @@ ENTRY(alignment_check)
890 jmp error_code 914 jmp error_code
891 CFI_ENDPROC 915 CFI_ENDPROC
892 916
893KPROBE_ENTRY(page_fault) 917ENTRY(divide_error)
894 RING0_EC_FRAME 918 RING0_INT_FRAME
895 pushl $do_page_fault 919 pushl $0 # no error code
920 CFI_ADJUST_CFA_OFFSET 4
921 pushl $do_divide_error
896 CFI_ADJUST_CFA_OFFSET 4 922 CFI_ADJUST_CFA_OFFSET 4
897 jmp error_code 923 jmp error_code
898 CFI_ENDPROC 924 CFI_ENDPROC
899 .previous .text
900 925
901#ifdef CONFIG_X86_MCE 926#ifdef CONFIG_X86_MCE
902ENTRY(machine_check) 927ENTRY(machine_check)
@@ -949,6 +974,19 @@ ENTRY(arch_unwind_init_running)
949ENDPROC(arch_unwind_init_running) 974ENDPROC(arch_unwind_init_running)
950#endif 975#endif
951 976
977ENTRY(kernel_thread_helper)
978 pushl $0 # fake return address for unwinder
979 CFI_STARTPROC
980 movl %edx,%eax
981 push %edx
982 CFI_ADJUST_CFA_OFFSET 4
983 call *%ebx
984 push %eax
985 CFI_ADJUST_CFA_OFFSET 4
986 call do_exit
987 CFI_ENDPROC
988ENDPROC(kernel_thread_helper)
989
952.section .rodata,"a" 990.section .rodata,"a"
953#include "syscall_table.S" 991#include "syscall_table.S"
954 992
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index a6b8bd89aa27..be9d883c62ce 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -371,8 +371,65 @@ rp_sidt:
371 addl $8,%edi 371 addl $8,%edi
372 dec %ecx 372 dec %ecx
373 jne rp_sidt 373 jne rp_sidt
374
375.macro set_early_handler handler,trapno
376 lea \handler,%edx
377 movl $(__KERNEL_CS << 16),%eax
378 movw %dx,%ax
379 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
380 lea idt_table,%edi
381 movl %eax,8*\trapno(%edi)
382 movl %edx,8*\trapno+4(%edi)
383.endm
384
385 set_early_handler handler=early_divide_err,trapno=0
386 set_early_handler handler=early_illegal_opcode,trapno=6
387 set_early_handler handler=early_protection_fault,trapno=13
388 set_early_handler handler=early_page_fault,trapno=14
389
374 ret 390 ret
375 391
392early_divide_err:
393 xor %edx,%edx
394 pushl $0 /* fake errcode */
395 jmp early_fault
396
397early_illegal_opcode:
398 movl $6,%edx
399 pushl $0 /* fake errcode */
400 jmp early_fault
401
402early_protection_fault:
403 movl $13,%edx
404 jmp early_fault
405
406early_page_fault:
407 movl $14,%edx
408 jmp early_fault
409
410early_fault:
411 cld
412#ifdef CONFIG_PRINTK
413 movl $(__KERNEL_DS),%eax
414 movl %eax,%ds
415 movl %eax,%es
416 cmpl $2,early_recursion_flag
417 je hlt_loop
418 incl early_recursion_flag
419 movl %cr2,%eax
420 pushl %eax
421 pushl %edx /* trapno */
422 pushl $fault_msg
423#ifdef CONFIG_EARLY_PRINTK
424 call early_printk
425#else
426 call printk
427#endif
428#endif
429hlt_loop:
430 hlt
431 jmp hlt_loop
432
376/* This is the default interrupt "handler" :-) */ 433/* This is the default interrupt "handler" :-) */
377 ALIGN 434 ALIGN
378ignore_int: 435ignore_int:
@@ -386,6 +443,9 @@ ignore_int:
386 movl $(__KERNEL_DS),%eax 443 movl $(__KERNEL_DS),%eax
387 movl %eax,%ds 444 movl %eax,%ds
388 movl %eax,%es 445 movl %eax,%es
446 cmpl $2,early_recursion_flag
447 je hlt_loop
448 incl early_recursion_flag
389 pushl 16(%esp) 449 pushl 16(%esp)
390 pushl 24(%esp) 450 pushl 24(%esp)
391 pushl 32(%esp) 451 pushl 32(%esp)
@@ -431,9 +491,16 @@ ENTRY(stack_start)
431 491
432ready: .byte 0 492ready: .byte 0
433 493
494early_recursion_flag:
495 .long 0
496
434int_msg: 497int_msg:
435 .asciz "Unknown interrupt or fault at EIP %p %p %p\n" 498 .asciz "Unknown interrupt or fault at EIP %p %p %p\n"
436 499
500fault_msg:
501 .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n"
502 .asciz "Stack: %p %p %p %p %p %p %p %p\n"
503
437/* 504/*
438 * The IDT and GDT 'descriptors' are a strange 48-bit object 505 * The IDT and GDT 'descriptors' are a strange 48-bit object
439 * only used by the lidt and lgdt instructions. They are not 506 * only used by the lidt and lgdt instructions. They are not
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index d4756d154f47..ea5f4e7958d8 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -45,6 +45,8 @@ static void end_8259A_irq (unsigned int irq)
45 45
46#define shutdown_8259A_irq disable_8259A_irq 46#define shutdown_8259A_irq disable_8259A_irq
47 47
48static int i8259A_auto_eoi;
49
48static void mask_and_ack_8259A(unsigned int); 50static void mask_and_ack_8259A(unsigned int);
49 51
50unsigned int startup_8259A_irq(unsigned int irq) 52unsigned int startup_8259A_irq(unsigned int irq)
@@ -253,7 +255,7 @@ static void save_ELCR(char *trigger)
253 255
254static int i8259A_resume(struct sys_device *dev) 256static int i8259A_resume(struct sys_device *dev)
255{ 257{
256 init_8259A(0); 258 init_8259A(i8259A_auto_eoi);
257 restore_ELCR(irq_trigger); 259 restore_ELCR(irq_trigger);
258 return 0; 260 return 0;
259} 261}
@@ -301,6 +303,8 @@ void init_8259A(int auto_eoi)
301{ 303{
302 unsigned long flags; 304 unsigned long flags;
303 305
306 i8259A_auto_eoi = auto_eoi;
307
304 spin_lock_irqsave(&i8259A_lock, flags); 308 spin_lock_irqsave(&i8259A_lock, flags);
305 309
306 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 310 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 4fb32c551fe0..fd0df75cfbda 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -40,6 +40,7 @@
40#include <asm/nmi.h> 40#include <asm/nmi.h>
41 41
42#include <mach_apic.h> 42#include <mach_apic.h>
43#include <mach_apicdef.h>
43 44
44#include "io_ports.h" 45#include "io_ports.h"
45 46
@@ -65,7 +66,7 @@ int sis_apic_bug = -1;
65 */ 66 */
66int nr_ioapic_registers[MAX_IO_APICS]; 67int nr_ioapic_registers[MAX_IO_APICS];
67 68
68int disable_timer_pin_1 __initdata; 69static int disable_timer_pin_1 __initdata;
69 70
70/* 71/*
71 * Rough estimation of how many shared IRQs there are, can 72 * Rough estimation of how many shared IRQs there are, can
@@ -93,6 +94,34 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
93#define vector_to_irq(vector) (vector) 94#define vector_to_irq(vector) (vector)
94#endif 95#endif
95 96
97
98union entry_union {
99 struct { u32 w1, w2; };
100 struct IO_APIC_route_entry entry;
101};
102
103static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
104{
105 union entry_union eu;
106 unsigned long flags;
107 spin_lock_irqsave(&ioapic_lock, flags);
108 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
109 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
110 spin_unlock_irqrestore(&ioapic_lock, flags);
111 return eu.entry;
112}
113
114static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
115{
116 unsigned long flags;
117 union entry_union eu;
118 eu.entry = e;
119 spin_lock_irqsave(&ioapic_lock, flags);
120 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
121 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
122 spin_unlock_irqrestore(&ioapic_lock, flags);
123}
124
96/* 125/*
97 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 126 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
98 * shared ISA-space IRQs, so we have to support them. We are super 127 * shared ISA-space IRQs, so we have to support them. We are super
@@ -200,13 +229,9 @@ static void unmask_IO_APIC_irq (unsigned int irq)
200static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 229static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
201{ 230{
202 struct IO_APIC_route_entry entry; 231 struct IO_APIC_route_entry entry;
203 unsigned long flags;
204 232
205 /* Check delivery_mode to be sure we're not clearing an SMI pin */ 233 /* Check delivery_mode to be sure we're not clearing an SMI pin */
206 spin_lock_irqsave(&ioapic_lock, flags); 234 entry = ioapic_read_entry(apic, pin);
207 *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
208 *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
209 spin_unlock_irqrestore(&ioapic_lock, flags);
210 if (entry.delivery_mode == dest_SMI) 235 if (entry.delivery_mode == dest_SMI)
211 return; 236 return;
212 237
@@ -215,10 +240,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
215 */ 240 */
216 memset(&entry, 0, sizeof(entry)); 241 memset(&entry, 0, sizeof(entry));
217 entry.mask = 1; 242 entry.mask = 1;
218 spin_lock_irqsave(&ioapic_lock, flags); 243 ioapic_write_entry(apic, pin, entry);
219 io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
220 io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
221 spin_unlock_irqrestore(&ioapic_lock, flags);
222} 244}
223 245
224static void clear_IO_APIC (void) 246static void clear_IO_APIC (void)
@@ -1283,9 +1305,8 @@ static void __init setup_IO_APIC_irqs(void)
1283 if (!apic && (irq < 16)) 1305 if (!apic && (irq < 16))
1284 disable_8259A_irq(irq); 1306 disable_8259A_irq(irq);
1285 } 1307 }
1308 ioapic_write_entry(apic, pin, entry);
1286 spin_lock_irqsave(&ioapic_lock, flags); 1309 spin_lock_irqsave(&ioapic_lock, flags);
1287 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
1288 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
1289 set_native_irq_info(irq, TARGET_CPUS); 1310 set_native_irq_info(irq, TARGET_CPUS);
1290 spin_unlock_irqrestore(&ioapic_lock, flags); 1311 spin_unlock_irqrestore(&ioapic_lock, flags);
1291 } 1312 }
@@ -1301,7 +1322,6 @@ static void __init setup_IO_APIC_irqs(void)
1301static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) 1322static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
1302{ 1323{
1303 struct IO_APIC_route_entry entry; 1324 struct IO_APIC_route_entry entry;
1304 unsigned long flags;
1305 1325
1306 memset(&entry,0,sizeof(entry)); 1326 memset(&entry,0,sizeof(entry));
1307 1327
@@ -1331,10 +1351,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
1331 /* 1351 /*
1332 * Add it to the IO-APIC irq-routing table: 1352 * Add it to the IO-APIC irq-routing table:
1333 */ 1353 */
1334 spin_lock_irqsave(&ioapic_lock, flags); 1354 ioapic_write_entry(apic, pin, entry);
1335 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
1336 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
1337 spin_unlock_irqrestore(&ioapic_lock, flags);
1338 1355
1339 enable_8259A_irq(0); 1356 enable_8259A_irq(0);
1340} 1357}
@@ -1444,10 +1461,7 @@ void __init print_IO_APIC(void)
1444 for (i = 0; i <= reg_01.bits.entries; i++) { 1461 for (i = 0; i <= reg_01.bits.entries; i++) {
1445 struct IO_APIC_route_entry entry; 1462 struct IO_APIC_route_entry entry;
1446 1463
1447 spin_lock_irqsave(&ioapic_lock, flags); 1464 entry = ioapic_read_entry(apic, i);
1448 *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
1449 *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
1450 spin_unlock_irqrestore(&ioapic_lock, flags);
1451 1465
1452 printk(KERN_DEBUG " %02x %03X %02X ", 1466 printk(KERN_DEBUG " %02x %03X %02X ",
1453 i, 1467 i,
@@ -1666,10 +1680,7 @@ static void __init enable_IO_APIC(void)
1666 /* See if any of the pins is in ExtINT mode */ 1680 /* See if any of the pins is in ExtINT mode */
1667 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1681 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1668 struct IO_APIC_route_entry entry; 1682 struct IO_APIC_route_entry entry;
1669 spin_lock_irqsave(&ioapic_lock, flags); 1683 entry = ioapic_read_entry(apic, pin);
1670 *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1671 *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1672 spin_unlock_irqrestore(&ioapic_lock, flags);
1673 1684
1674 1685
1675 /* If the interrupt line is enabled and in ExtInt mode 1686 /* If the interrupt line is enabled and in ExtInt mode
@@ -1726,7 +1737,6 @@ void disable_IO_APIC(void)
1726 */ 1737 */
1727 if (ioapic_i8259.pin != -1) { 1738 if (ioapic_i8259.pin != -1) {
1728 struct IO_APIC_route_entry entry; 1739 struct IO_APIC_route_entry entry;
1729 unsigned long flags;
1730 1740
1731 memset(&entry, 0, sizeof(entry)); 1741 memset(&entry, 0, sizeof(entry));
1732 entry.mask = 0; /* Enabled */ 1742 entry.mask = 0; /* Enabled */
@@ -1743,12 +1753,7 @@ void disable_IO_APIC(void)
1743 /* 1753 /*
1744 * Add it to the IO-APIC irq-routing table: 1754 * Add it to the IO-APIC irq-routing table:
1745 */ 1755 */
1746 spin_lock_irqsave(&ioapic_lock, flags); 1756 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1747 io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
1748 *(((int *)&entry)+1));
1749 io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
1750 *(((int *)&entry)+0));
1751 spin_unlock_irqrestore(&ioapic_lock, flags);
1752 } 1757 }
1753 disconnect_bsp_APIC(ioapic_i8259.pin != -1); 1758 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1754} 1759}
@@ -2213,17 +2218,13 @@ static inline void unlock_ExtINT_logic(void)
2213 int apic, pin, i; 2218 int apic, pin, i;
2214 struct IO_APIC_route_entry entry0, entry1; 2219 struct IO_APIC_route_entry entry0, entry1;
2215 unsigned char save_control, save_freq_select; 2220 unsigned char save_control, save_freq_select;
2216 unsigned long flags;
2217 2221
2218 pin = find_isa_irq_pin(8, mp_INT); 2222 pin = find_isa_irq_pin(8, mp_INT);
2219 apic = find_isa_irq_apic(8, mp_INT); 2223 apic = find_isa_irq_apic(8, mp_INT);
2220 if (pin == -1) 2224 if (pin == -1)
2221 return; 2225 return;
2222 2226
2223 spin_lock_irqsave(&ioapic_lock, flags); 2227 entry0 = ioapic_read_entry(apic, pin);
2224 *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
2225 *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
2226 spin_unlock_irqrestore(&ioapic_lock, flags);
2227 clear_IO_APIC_pin(apic, pin); 2228 clear_IO_APIC_pin(apic, pin);
2228 2229
2229 memset(&entry1, 0, sizeof(entry1)); 2230 memset(&entry1, 0, sizeof(entry1));
@@ -2236,10 +2237,7 @@ static inline void unlock_ExtINT_logic(void)
2236 entry1.trigger = 0; 2237 entry1.trigger = 0;
2237 entry1.vector = 0; 2238 entry1.vector = 0;
2238 2239
2239 spin_lock_irqsave(&ioapic_lock, flags); 2240 ioapic_write_entry(apic, pin, entry1);
2240 io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
2241 io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
2242 spin_unlock_irqrestore(&ioapic_lock, flags);
2243 2241
2244 save_control = CMOS_READ(RTC_CONTROL); 2242 save_control = CMOS_READ(RTC_CONTROL);
2245 save_freq_select = CMOS_READ(RTC_FREQ_SELECT); 2243 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
@@ -2258,10 +2256,7 @@ static inline void unlock_ExtINT_logic(void)
2258 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); 2256 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2259 clear_IO_APIC_pin(apic, pin); 2257 clear_IO_APIC_pin(apic, pin);
2260 2258
2261 spin_lock_irqsave(&ioapic_lock, flags); 2259 ioapic_write_entry(apic, pin, entry0);
2262 io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
2263 io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
2264 spin_unlock_irqrestore(&ioapic_lock, flags);
2265} 2260}
2266 2261
2267int timer_uses_ioapic_pin_0; 2262int timer_uses_ioapic_pin_0;
@@ -2461,17 +2456,12 @@ static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
2461{ 2456{
2462 struct IO_APIC_route_entry *entry; 2457 struct IO_APIC_route_entry *entry;
2463 struct sysfs_ioapic_data *data; 2458 struct sysfs_ioapic_data *data;
2464 unsigned long flags;
2465 int i; 2459 int i;
2466 2460
2467 data = container_of(dev, struct sysfs_ioapic_data, dev); 2461 data = container_of(dev, struct sysfs_ioapic_data, dev);
2468 entry = data->entry; 2462 entry = data->entry;
2469 spin_lock_irqsave(&ioapic_lock, flags); 2463 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
2470 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { 2464 entry[i] = ioapic_read_entry(dev->id, i);
2471 *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
2472 *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
2473 }
2474 spin_unlock_irqrestore(&ioapic_lock, flags);
2475 2465
2476 return 0; 2466 return 0;
2477} 2467}
@@ -2493,11 +2483,9 @@ static int ioapic_resume(struct sys_device *dev)
2493 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; 2483 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
2494 io_apic_write(dev->id, 0, reg_00.raw); 2484 io_apic_write(dev->id, 0, reg_00.raw);
2495 } 2485 }
2496 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
2497 io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
2498 io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
2499 }
2500 spin_unlock_irqrestore(&ioapic_lock, flags); 2486 spin_unlock_irqrestore(&ioapic_lock, flags);
2487 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
2488 ioapic_write_entry(dev->id, i, entry[i]);
2501 2489
2502 return 0; 2490 return 0;
2503} 2491}
@@ -2694,9 +2682,8 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
2694 if (!ioapic && (irq < 16)) 2682 if (!ioapic && (irq < 16))
2695 disable_8259A_irq(irq); 2683 disable_8259A_irq(irq);
2696 2684
2685 ioapic_write_entry(ioapic, pin, entry);
2697 spin_lock_irqsave(&ioapic_lock, flags); 2686 spin_lock_irqsave(&ioapic_lock, flags);
2698 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
2699 io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
2700 set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); 2687 set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
2701 spin_unlock_irqrestore(&ioapic_lock, flags); 2688 spin_unlock_irqrestore(&ioapic_lock, flags);
2702 2689
@@ -2704,3 +2691,25 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
2704} 2691}
2705 2692
2706#endif /* CONFIG_ACPI */ 2693#endif /* CONFIG_ACPI */
2694
2695static int __init parse_disable_timer_pin_1(char *arg)
2696{
2697 disable_timer_pin_1 = 1;
2698 return 0;
2699}
2700early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
2701
2702static int __init parse_enable_timer_pin_1(char *arg)
2703{
2704 disable_timer_pin_1 = -1;
2705 return 0;
2706}
2707early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
2708
2709static int __init parse_noapic(char *arg)
2710{
2711 /* disable IO-APIC */
2712 disable_ioapic_setup();
2713 return 0;
2714}
2715early_param("noapic", parse_noapic);
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
index 6b1ae6ba76f0..91966bafb3dc 100644
--- a/arch/i386/kernel/machine_kexec.c
+++ b/arch/i386/kernel/machine_kexec.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/kexec.h> 10#include <linux/kexec.h>
11#include <linux/delay.h> 11#include <linux/delay.h>
12#include <linux/init.h>
12#include <asm/pgtable.h> 13#include <asm/pgtable.h>
13#include <asm/pgalloc.h> 14#include <asm/pgalloc.h>
14#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
@@ -20,70 +21,13 @@
20#include <asm/system.h> 21#include <asm/system.h>
21 22
22#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) 23#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
23 24static u32 kexec_pgd[1024] PAGE_ALIGNED;
24#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 25#ifdef CONFIG_X86_PAE
25#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 26static u32 kexec_pmd0[1024] PAGE_ALIGNED;
26#define L2_ATTR (_PAGE_PRESENT) 27static u32 kexec_pmd1[1024] PAGE_ALIGNED;
27
28#define LEVEL0_SIZE (1UL << 12UL)
29
30#ifndef CONFIG_X86_PAE
31#define LEVEL1_SIZE (1UL << 22UL)
32static u32 pgtable_level1[1024] PAGE_ALIGNED;
33
34static void identity_map_page(unsigned long address)
35{
36 unsigned long level1_index, level2_index;
37 u32 *pgtable_level2;
38
39 /* Find the current page table */
40 pgtable_level2 = __va(read_cr3());
41
42 /* Find the indexes of the physical address to identity map */
43 level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
44 level2_index = address / LEVEL1_SIZE;
45
46 /* Identity map the page table entry */
47 pgtable_level1[level1_index] = address | L0_ATTR;
48 pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
49
50 /* Flush the tlb so the new mapping takes effect.
51 * Global tlb entries are not flushed but that is not an issue.
52 */
53 load_cr3(pgtable_level2);
54}
55
56#else
57#define LEVEL1_SIZE (1UL << 21UL)
58#define LEVEL2_SIZE (1UL << 30UL)
59static u64 pgtable_level1[512] PAGE_ALIGNED;
60static u64 pgtable_level2[512] PAGE_ALIGNED;
61
62static void identity_map_page(unsigned long address)
63{
64 unsigned long level1_index, level2_index, level3_index;
65 u64 *pgtable_level3;
66
67 /* Find the current page table */
68 pgtable_level3 = __va(read_cr3());
69
70 /* Find the indexes of the physical address to identity map */
71 level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
72 level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
73 level3_index = address / LEVEL2_SIZE;
74
75 /* Identity map the page table entry */
76 pgtable_level1[level1_index] = address | L0_ATTR;
77 pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
78 set_64bit(&pgtable_level3[level3_index],
79 __pa(pgtable_level2) | L2_ATTR);
80
81 /* Flush the tlb so the new mapping takes effect.
82 * Global tlb entries are not flushed but that is not an issue.
83 */
84 load_cr3(pgtable_level3);
85}
86#endif 28#endif
29static u32 kexec_pte0[1024] PAGE_ALIGNED;
30static u32 kexec_pte1[1024] PAGE_ALIGNED;
87 31
88static void set_idt(void *newidt, __u16 limit) 32static void set_idt(void *newidt, __u16 limit)
89{ 33{
@@ -127,16 +71,6 @@ static void load_segments(void)
127#undef __STR 71#undef __STR
128} 72}
129 73
130typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
131 unsigned long indirection_page,
132 unsigned long reboot_code_buffer,
133 unsigned long start_address,
134 unsigned int has_pae) ATTRIB_NORET;
135
136extern const unsigned char relocate_new_kernel[];
137extern void relocate_new_kernel_end(void);
138extern const unsigned int relocate_new_kernel_size;
139
140/* 74/*
141 * A architecture hook called to validate the 75 * A architecture hook called to validate the
142 * proposed image and prepare the control pages 76 * proposed image and prepare the control pages
@@ -169,25 +103,29 @@ void machine_kexec_cleanup(struct kimage *image)
169 */ 103 */
170NORET_TYPE void machine_kexec(struct kimage *image) 104NORET_TYPE void machine_kexec(struct kimage *image)
171{ 105{
172 unsigned long page_list; 106 unsigned long page_list[PAGES_NR];
173 unsigned long reboot_code_buffer; 107 void *control_page;
174
175 relocate_new_kernel_t rnk;
176 108
177 /* Interrupts aren't acceptable while we reboot */ 109 /* Interrupts aren't acceptable while we reboot */
178 local_irq_disable(); 110 local_irq_disable();
179 111
180 /* Compute some offsets */ 112 control_page = page_address(image->control_code_page);
181 reboot_code_buffer = page_to_pfn(image->control_code_page) 113 memcpy(control_page, relocate_kernel, PAGE_SIZE);
182 << PAGE_SHIFT; 114
183 page_list = image->head; 115 page_list[PA_CONTROL_PAGE] = __pa(control_page);
184 116 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
185 /* Set up an identity mapping for the reboot_code_buffer */ 117 page_list[PA_PGD] = __pa(kexec_pgd);
186 identity_map_page(reboot_code_buffer); 118 page_list[VA_PGD] = (unsigned long)kexec_pgd;
187 119#ifdef CONFIG_X86_PAE
188 /* copy it out */ 120 page_list[PA_PMD_0] = __pa(kexec_pmd0);
189 memcpy((void *)reboot_code_buffer, relocate_new_kernel, 121 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
190 relocate_new_kernel_size); 122 page_list[PA_PMD_1] = __pa(kexec_pmd1);
123 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
124#endif
125 page_list[PA_PTE_0] = __pa(kexec_pte0);
126 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
127 page_list[PA_PTE_1] = __pa(kexec_pte1);
128 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
191 129
192 /* The segment registers are funny things, they have both a 130 /* The segment registers are funny things, they have both a
193 * visible and an invisible part. Whenever the visible part is 131 * visible and an invisible part. Whenever the visible part is
@@ -206,6 +144,28 @@ NORET_TYPE void machine_kexec(struct kimage *image)
206 set_idt(phys_to_virt(0),0); 144 set_idt(phys_to_virt(0),0);
207 145
208 /* now call it */ 146 /* now call it */
209 rnk = (relocate_new_kernel_t) reboot_code_buffer; 147 relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
210 (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); 148 image->start, cpu_has_pae);
149}
150
151/* crashkernel=size@addr specifies the location to reserve for
152 * a crash kernel. By reserving this memory we guarantee
153 * that linux never sets it up as a DMA target.
154 * Useful for holding code to do something appropriate
155 * after a kernel panic.
156 */
157static int __init parse_crashkernel(char *arg)
158{
159 unsigned long size, base;
160 size = memparse(arg, &arg);
161 if (*arg == '@') {
162 base = memparse(arg+1, &arg);
163 /* FIXME: Do I want a sanity check
164 * to validate the memory range?
165 */
166 crashk_res.start = base;
167 crashk_res.end = base + size - 1;
168 }
169 return 0;
211} 170}
171early_param("crashkernel", parse_crashkernel);
diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c
index cd5456f14af4..eb57a851789d 100644
--- a/arch/i386/kernel/mca.c
+++ b/arch/i386/kernel/mca.c
@@ -42,6 +42,7 @@
42#include <linux/errno.h> 42#include <linux/errno.h>
43#include <linux/kernel.h> 43#include <linux/kernel.h>
44#include <linux/mca.h> 44#include <linux/mca.h>
45#include <linux/kprobes.h>
45#include <asm/system.h> 46#include <asm/system.h>
46#include <asm/io.h> 47#include <asm/io.h>
47#include <linux/proc_fs.h> 48#include <linux/proc_fs.h>
@@ -414,7 +415,8 @@ subsys_initcall(mca_init);
414 415
415/*--------------------------------------------------------------------*/ 416/*--------------------------------------------------------------------*/
416 417
417static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) 418static __kprobes void
419mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
418{ 420{
419 int slot = mca_dev->slot; 421 int slot = mca_dev->slot;
420 422
@@ -444,7 +446,7 @@ static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
444 446
445/*--------------------------------------------------------------------*/ 447/*--------------------------------------------------------------------*/
446 448
447static int mca_handle_nmi_callback(struct device *dev, void *data) 449static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data)
448{ 450{
449 struct mca_device *mca_dev = to_mca_device(dev); 451 struct mca_device *mca_dev = to_mca_device(dev);
450 unsigned char pos5; 452 unsigned char pos5;
@@ -462,7 +464,7 @@ static int mca_handle_nmi_callback(struct device *dev, void *data)
462 return 0; 464 return 0;
463} 465}
464 466
465void mca_handle_nmi(void) 467void __kprobes mca_handle_nmi(void)
466{ 468{
467 /* First try - scan the various adapters and see if a specific 469 /* First try - scan the various adapters and see if a specific
468 * adapter was responsible for the error. 470 * adapter was responsible for the error.
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index a70b5fa0ef06..442aaf8c77eb 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -30,6 +30,7 @@
30#include <asm/io_apic.h> 30#include <asm/io_apic.h>
31 31
32#include <mach_apic.h> 32#include <mach_apic.h>
33#include <mach_apicdef.h>
33#include <mach_mpparse.h> 34#include <mach_mpparse.h>
34#include <bios_ebda.h> 35#include <bios_ebda.h>
35 36
@@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
68/* Processor that is doing the boot up */ 69/* Processor that is doing the boot up */
69unsigned int boot_cpu_physical_apicid = -1U; 70unsigned int boot_cpu_physical_apicid = -1U;
70/* Internal processor count */ 71/* Internal processor count */
71static unsigned int __devinitdata num_processors; 72unsigned int __cpuinitdata num_processors;
72 73
73/* Bitmask of physically existing CPUs */ 74/* Bitmask of physically existing CPUs */
74physid_mask_t phys_cpu_present_map; 75physid_mask_t phys_cpu_present_map;
@@ -228,12 +229,14 @@ static void __init MP_bus_info (struct mpc_config_bus *m)
228 229
229 mpc_oem_bus_info(m, str, translation_table[mpc_record]); 230 mpc_oem_bus_info(m, str, translation_table[mpc_record]);
230 231
232#if MAX_MP_BUSSES < 256
231 if (m->mpc_busid >= MAX_MP_BUSSES) { 233 if (m->mpc_busid >= MAX_MP_BUSSES) {
232 printk(KERN_WARNING "MP table busid value (%d) for bustype %s " 234 printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
233 " is too large, max. supported is %d\n", 235 " is too large, max. supported is %d\n",
234 m->mpc_busid, str, MAX_MP_BUSSES - 1); 236 m->mpc_busid, str, MAX_MP_BUSSES - 1);
235 return; 237 return;
236 } 238 }
239#endif
237 240
238 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { 241 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
239 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; 242 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
@@ -293,19 +296,6 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
293 m->mpc_irqtype, m->mpc_irqflag & 3, 296 m->mpc_irqtype, m->mpc_irqflag & 3,
294 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, 297 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
295 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); 298 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
296 /*
297 * Well it seems all SMP boards in existence
298 * use ExtINT/LVT1 == LINT0 and
299 * NMI/LVT2 == LINT1 - the following check
300 * will show us if this assumptions is false.
301 * Until then we do not have to add baggage.
302 */
303 if ((m->mpc_irqtype == mp_ExtINT) &&
304 (m->mpc_destapiclint != 0))
305 BUG();
306 if ((m->mpc_irqtype == mp_NMI) &&
307 (m->mpc_destapiclint != 1))
308 BUG();
309} 299}
310 300
311#ifdef CONFIG_X86_NUMAQ 301#ifdef CONFIG_X86_NUMAQ
@@ -822,8 +812,7 @@ int es7000_plat;
822 812
823#ifdef CONFIG_ACPI 813#ifdef CONFIG_ACPI
824 814
825void __init mp_register_lapic_address ( 815void __init mp_register_lapic_address(u64 address)
826 u64 address)
827{ 816{
828 mp_lapic_addr = (unsigned long) address; 817 mp_lapic_addr = (unsigned long) address;
829 818
@@ -835,13 +824,10 @@ void __init mp_register_lapic_address (
835 Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); 824 Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
836} 825}
837 826
838 827void __devinit mp_register_lapic (u8 id, u8 enabled)
839void __devinit mp_register_lapic (
840 u8 id,
841 u8 enabled)
842{ 828{
843 struct mpc_config_processor processor; 829 struct mpc_config_processor processor;
844 int boot_cpu = 0; 830 int boot_cpu = 0;
845 831
846 if (MAX_APICS - id <= 0) { 832 if (MAX_APICS - id <= 0) {
847 printk(KERN_WARNING "Processor #%d invalid (max %d)\n", 833 printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
@@ -878,11 +864,9 @@ static struct mp_ioapic_routing {
878 u32 pin_programmed[4]; 864 u32 pin_programmed[4];
879} mp_ioapic_routing[MAX_IO_APICS]; 865} mp_ioapic_routing[MAX_IO_APICS];
880 866
881 867static int mp_find_ioapic (int gsi)
882static int mp_find_ioapic (
883 int gsi)
884{ 868{
885 int i = 0; 869 int i = 0;
886 870
887 /* Find the IOAPIC that manages this GSI. */ 871 /* Find the IOAPIC that manages this GSI. */
888 for (i = 0; i < nr_ioapics; i++) { 872 for (i = 0; i < nr_ioapics; i++) {
@@ -895,15 +879,11 @@ static int mp_find_ioapic (
895 879
896 return -1; 880 return -1;
897} 881}
898
899 882
900void __init mp_register_ioapic ( 883void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
901 u8 id,
902 u32 address,
903 u32 gsi_base)
904{ 884{
905 int idx = 0; 885 int idx = 0;
906 int tmpid; 886 int tmpid;
907 887
908 if (nr_ioapics >= MAX_IO_APICS) { 888 if (nr_ioapics >= MAX_IO_APICS) {
909 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " 889 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
@@ -949,16 +929,10 @@ void __init mp_register_ioapic (
949 mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, 929 mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
950 mp_ioapic_routing[idx].gsi_base, 930 mp_ioapic_routing[idx].gsi_base,
951 mp_ioapic_routing[idx].gsi_end); 931 mp_ioapic_routing[idx].gsi_end);
952
953 return;
954} 932}
955 933
956 934void __init
957void __init mp_override_legacy_irq ( 935mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
958 u8 bus_irq,
959 u8 polarity,
960 u8 trigger,
961 u32 gsi)
962{ 936{
963 struct mpc_config_intsrc intsrc; 937 struct mpc_config_intsrc intsrc;
964 int ioapic = -1; 938 int ioapic = -1;
@@ -996,15 +970,13 @@ void __init mp_override_legacy_irq (
996 mp_irqs[mp_irq_entries] = intsrc; 970 mp_irqs[mp_irq_entries] = intsrc;
997 if (++mp_irq_entries == MAX_IRQ_SOURCES) 971 if (++mp_irq_entries == MAX_IRQ_SOURCES)
998 panic("Max # of irq sources exceeded!\n"); 972 panic("Max # of irq sources exceeded!\n");
999
1000 return;
1001} 973}
1002 974
1003void __init mp_config_acpi_legacy_irqs (void) 975void __init mp_config_acpi_legacy_irqs (void)
1004{ 976{
1005 struct mpc_config_intsrc intsrc; 977 struct mpc_config_intsrc intsrc;
1006 int i = 0; 978 int i = 0;
1007 int ioapic = -1; 979 int ioapic = -1;
1008 980
1009 /* 981 /*
1010 * Fabricate the legacy ISA bus (bus #31). 982 * Fabricate the legacy ISA bus (bus #31).
@@ -1073,12 +1045,12 @@ void __init mp_config_acpi_legacy_irqs (void)
1073 1045
1074#define MAX_GSI_NUM 4096 1046#define MAX_GSI_NUM 4096
1075 1047
1076int mp_register_gsi (u32 gsi, int triggering, int polarity) 1048int mp_register_gsi(u32 gsi, int triggering, int polarity)
1077{ 1049{
1078 int ioapic = -1; 1050 int ioapic = -1;
1079 int ioapic_pin = 0; 1051 int ioapic_pin = 0;
1080 int idx, bit = 0; 1052 int idx, bit = 0;
1081 static int pci_irq = 16; 1053 static int pci_irq = 16;
1082 /* 1054 /*
1083 * Mapping between Global System Interrups, which 1055 * Mapping between Global System Interrups, which
1084 * represent all possible interrupts, and IRQs 1056 * represent all possible interrupts, and IRQs
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index acb351478e42..dbda706fdd14 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -21,83 +21,174 @@
21#include <linux/sysdev.h> 21#include <linux/sysdev.h>
22#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/percpu.h> 23#include <linux/percpu.h>
24#include <linux/dmi.h>
25#include <linux/kprobes.h>
24 26
25#include <asm/smp.h> 27#include <asm/smp.h>
26#include <asm/nmi.h> 28#include <asm/nmi.h>
29#include <asm/kdebug.h>
27#include <asm/intel_arch_perfmon.h> 30#include <asm/intel_arch_perfmon.h>
28 31
29#include "mach_traps.h" 32#include "mach_traps.h"
30 33
31unsigned int nmi_watchdog = NMI_NONE; 34/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
32extern int unknown_nmi_panic; 35 * evtsel_nmi_owner tracks the ownership of the event selection
33static unsigned int nmi_hz = HZ; 36 * - different performance counters/ event selection may be reserved for
34static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ 37 * different subsystems this reservation system just tries to coordinate
35static unsigned int nmi_p4_cccr_val; 38 * things a little
36extern void show_registers(struct pt_regs *regs); 39 */
40static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
41static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
37 42
38/* 43/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
39 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: 44 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
40 * - it may be reserved by some other driver, or not
41 * - when not reserved by some other driver, it may be used for
42 * the NMI watchdog, or not
43 *
44 * This is maintained separately from nmi_active because the NMI
45 * watchdog may also be driven from the I/O APIC timer.
46 */ 45 */
47static DEFINE_SPINLOCK(lapic_nmi_owner_lock); 46#define NMI_MAX_COUNTER_BITS 66
48static unsigned int lapic_nmi_owner;
49#define LAPIC_NMI_WATCHDOG (1<<0)
50#define LAPIC_NMI_RESERVED (1<<1)
51 47
52/* nmi_active: 48/* nmi_active:
53 * +1: the lapic NMI watchdog is active, but can be disabled 49 * >0: the lapic NMI watchdog is active, but can be disabled
54 * 0: the lapic NMI watchdog has not been set up, and cannot 50 * <0: the lapic NMI watchdog has not been set up, and cannot
55 * be enabled 51 * be enabled
56 * -1: the lapic NMI watchdog is disabled, but can be enabled 52 * 0: the lapic NMI watchdog is disabled, but can be enabled
57 */ 53 */
58int nmi_active; 54atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
59 55
60#define K7_EVNTSEL_ENABLE (1 << 22) 56unsigned int nmi_watchdog = NMI_DEFAULT;
61#define K7_EVNTSEL_INT (1 << 20) 57static unsigned int nmi_hz = HZ;
62#define K7_EVNTSEL_OS (1 << 17)
63#define K7_EVNTSEL_USR (1 << 16)
64#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
65#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
66 58
67#define P6_EVNTSEL0_ENABLE (1 << 22) 59struct nmi_watchdog_ctlblk {
68#define P6_EVNTSEL_INT (1 << 20) 60 int enabled;
69#define P6_EVNTSEL_OS (1 << 17) 61 u64 check_bit;
70#define P6_EVNTSEL_USR (1 << 16) 62 unsigned int cccr_msr;
71#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 63 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
72#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED 64 unsigned int evntsel_msr; /* the MSR to select the events to handle */
65};
66static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
73 67
74#define MSR_P4_MISC_ENABLE 0x1A0 68/* local prototypes */
75#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) 69static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
76#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
77#define MSR_P4_PERFCTR0 0x300
78#define MSR_P4_CCCR0 0x360
79#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
80#define P4_ESCR_OS (1<<3)
81#define P4_ESCR_USR (1<<2)
82#define P4_CCCR_OVF_PMI0 (1<<26)
83#define P4_CCCR_OVF_PMI1 (1<<27)
84#define P4_CCCR_THRESHOLD(N) ((N)<<20)
85#define P4_CCCR_COMPLEMENT (1<<19)
86#define P4_CCCR_COMPARE (1<<18)
87#define P4_CCCR_REQUIRED (3<<16)
88#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
89#define P4_CCCR_ENABLE (1<<12)
90/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
91 CRU_ESCR0 (with any non-null event selector) through a complemented
92 max threshold. [IA32-Vol3, Section 14.9.9] */
93#define MSR_P4_IQ_COUNTER0 0x30C
94#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
95#define P4_NMI_IQ_CCCR0 \
96 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
97 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
98 70
99#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 71extern void show_registers(struct pt_regs *regs);
100#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK 72extern int unknown_nmi_panic;
73
74/* converts an msr to an appropriate reservation bit */
75static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
76{
77 /* returns the bit offset of the performance counter register */
78 switch (boot_cpu_data.x86_vendor) {
79 case X86_VENDOR_AMD:
80 return (msr - MSR_K7_PERFCTR0);
81 case X86_VENDOR_INTEL:
82 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
83 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
84
85 switch (boot_cpu_data.x86) {
86 case 6:
87 return (msr - MSR_P6_PERFCTR0);
88 case 15:
89 return (msr - MSR_P4_BPU_PERFCTR0);
90 }
91 }
92 return 0;
93}
94
95/* converts an msr to an appropriate reservation bit */
96static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
97{
98 /* returns the bit offset of the event selection register */
99 switch (boot_cpu_data.x86_vendor) {
100 case X86_VENDOR_AMD:
101 return (msr - MSR_K7_EVNTSEL0);
102 case X86_VENDOR_INTEL:
103 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
104 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
105
106 switch (boot_cpu_data.x86) {
107 case 6:
108 return (msr - MSR_P6_EVNTSEL0);
109 case 15:
110 return (msr - MSR_P4_BSU_ESCR0);
111 }
112 }
113 return 0;
114}
115
116/* checks for a bit availability (hack for oprofile) */
117int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
118{
119 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
120
121 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
122}
123
124/* checks the an msr for availability */
125int avail_to_resrv_perfctr_nmi(unsigned int msr)
126{
127 unsigned int counter;
128
129 counter = nmi_perfctr_msr_to_bit(msr);
130 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
131
132 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
133}
134
135int reserve_perfctr_nmi(unsigned int msr)
136{
137 unsigned int counter;
138
139 counter = nmi_perfctr_msr_to_bit(msr);
140 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
141
142 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
143 return 1;
144 return 0;
145}
146
147void release_perfctr_nmi(unsigned int msr)
148{
149 unsigned int counter;
150
151 counter = nmi_perfctr_msr_to_bit(msr);
152 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
153
154 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
155}
156
157int reserve_evntsel_nmi(unsigned int msr)
158{
159 unsigned int counter;
160
161 counter = nmi_evntsel_msr_to_bit(msr);
162 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
163
164 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
165 return 1;
166 return 0;
167}
168
169void release_evntsel_nmi(unsigned int msr)
170{
171 unsigned int counter;
172
173 counter = nmi_evntsel_msr_to_bit(msr);
174 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
175
176 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
177}
178
179static __cpuinit inline int nmi_known_cpu(void)
180{
181 switch (boot_cpu_data.x86_vendor) {
182 case X86_VENDOR_AMD:
183 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
184 case X86_VENDOR_INTEL:
185 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
186 return 1;
187 else
188 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
189 }
190 return 0;
191}
101 192
102#ifdef CONFIG_SMP 193#ifdef CONFIG_SMP
103/* The performance counters used by NMI_LOCAL_APIC don't trigger when 194/* The performance counters used by NMI_LOCAL_APIC don't trigger when
@@ -125,7 +216,18 @@ static int __init check_nmi_watchdog(void)
125 unsigned int *prev_nmi_count; 216 unsigned int *prev_nmi_count;
126 int cpu; 217 int cpu;
127 218
128 if (nmi_watchdog == NMI_NONE) 219 /* Enable NMI watchdog for newer systems.
220 Actually it should be safe for most systems before 2004 too except
221 for some IBM systems that corrupt registers when NMI happens
222 during SMM. Unfortunately we don't have more exact information
223 on these and use this coarse check. */
224 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004)
225 nmi_watchdog = NMI_LOCAL_APIC;
226
227 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
228 return 0;
229
230 if (!atomic_read(&nmi_active))
129 return 0; 231 return 0;
130 232
131 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); 233 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
@@ -149,25 +251,45 @@ static int __init check_nmi_watchdog(void)
149 if (!cpu_isset(cpu, cpu_callin_map)) 251 if (!cpu_isset(cpu, cpu_callin_map))
150 continue; 252 continue;
151#endif 253#endif
254 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
255 continue;
152 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { 256 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
153 endflag = 1;
154 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", 257 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
155 cpu, 258 cpu,
156 prev_nmi_count[cpu], 259 prev_nmi_count[cpu],
157 nmi_count(cpu)); 260 nmi_count(cpu));
158 nmi_active = 0; 261 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
159 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; 262 atomic_dec(&nmi_active);
160 kfree(prev_nmi_count);
161 return -1;
162 } 263 }
163 } 264 }
265 if (!atomic_read(&nmi_active)) {
266 kfree(prev_nmi_count);
267 atomic_set(&nmi_active, -1);
268 return -1;
269 }
164 endflag = 1; 270 endflag = 1;
165 printk("OK.\n"); 271 printk("OK.\n");
166 272
167 /* now that we know it works we can reduce NMI frequency to 273 /* now that we know it works we can reduce NMI frequency to
168 something more reasonable; makes a difference in some configs */ 274 something more reasonable; makes a difference in some configs */
169 if (nmi_watchdog == NMI_LOCAL_APIC) 275 if (nmi_watchdog == NMI_LOCAL_APIC) {
276 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
277
170 nmi_hz = 1; 278 nmi_hz = 1;
279 /*
280 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
281 * are writable, with higher bits sign extending from bit 31.
282 * So, we can only program the counter with 31 bit values and
283 * 32nd bit should be 1, for 33.. to be 1.
284 * Find the appropriate nmi_hz
285 */
286 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
287 ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
288 u64 count = (u64)cpu_khz * 1000;
289 do_div(count, 0x7fffffffUL);
290 nmi_hz = count + 1;
291 }
292 }
171 293
172 kfree(prev_nmi_count); 294 kfree(prev_nmi_count);
173 return 0; 295 return 0;
@@ -181,124 +303,70 @@ static int __init setup_nmi_watchdog(char *str)
181 303
182 get_option(&str, &nmi); 304 get_option(&str, &nmi);
183 305
184 if (nmi >= NMI_INVALID) 306 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
185 return 0; 307 return 0;
186 if (nmi == NMI_NONE)
187 nmi_watchdog = nmi;
188 /* 308 /*
189 * If any other x86 CPU has a local APIC, then 309 * If any other x86 CPU has a local APIC, then
190 * please test the NMI stuff there and send me the 310 * please test the NMI stuff there and send me the
191 * missing bits. Right now Intel P6/P4 and AMD K7 only. 311 * missing bits. Right now Intel P6/P4 and AMD K7 only.
192 */ 312 */
193 if ((nmi == NMI_LOCAL_APIC) && 313 if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
194 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 314 return 0; /* no lapic support */
195 (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) 315 nmi_watchdog = nmi;
196 nmi_watchdog = nmi;
197 if ((nmi == NMI_LOCAL_APIC) &&
198 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
199 (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
200 nmi_watchdog = nmi;
201 /*
202 * We can enable the IO-APIC watchdog
203 * unconditionally.
204 */
205 if (nmi == NMI_IO_APIC) {
206 nmi_active = 1;
207 nmi_watchdog = nmi;
208 }
209 return 1; 316 return 1;
210} 317}
211 318
212__setup("nmi_watchdog=", setup_nmi_watchdog); 319__setup("nmi_watchdog=", setup_nmi_watchdog);
213 320
214static void disable_intel_arch_watchdog(void);
215
216static void disable_lapic_nmi_watchdog(void) 321static void disable_lapic_nmi_watchdog(void)
217{ 322{
218 if (nmi_active <= 0) 323 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
324
325 if (atomic_read(&nmi_active) <= 0)
219 return; 326 return;
220 switch (boot_cpu_data.x86_vendor) {
221 case X86_VENDOR_AMD:
222 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
223 break;
224 case X86_VENDOR_INTEL:
225 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
226 disable_intel_arch_watchdog();
227 break;
228 }
229 switch (boot_cpu_data.x86) {
230 case 6:
231 if (boot_cpu_data.x86_model > 0xd)
232 break;
233 327
234 wrmsr(MSR_P6_EVNTSEL0, 0, 0); 328 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
235 break;
236 case 15:
237 if (boot_cpu_data.x86_model > 0x4)
238 break;
239 329
240 wrmsr(MSR_P4_IQ_CCCR0, 0, 0); 330 BUG_ON(atomic_read(&nmi_active) != 0);
241 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
242 break;
243 }
244 break;
245 }
246 nmi_active = -1;
247 /* tell do_nmi() and others that we're not active any more */
248 nmi_watchdog = 0;
249} 331}
250 332
251static void enable_lapic_nmi_watchdog(void) 333static void enable_lapic_nmi_watchdog(void)
252{ 334{
253 if (nmi_active < 0) { 335 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
254 nmi_watchdog = NMI_LOCAL_APIC;
255 setup_apic_nmi_watchdog();
256 }
257}
258 336
259int reserve_lapic_nmi(void) 337 /* are we already enabled */
260{ 338 if (atomic_read(&nmi_active) != 0)
261 unsigned int old_owner; 339 return;
262
263 spin_lock(&lapic_nmi_owner_lock);
264 old_owner = lapic_nmi_owner;
265 lapic_nmi_owner |= LAPIC_NMI_RESERVED;
266 spin_unlock(&lapic_nmi_owner_lock);
267 if (old_owner & LAPIC_NMI_RESERVED)
268 return -EBUSY;
269 if (old_owner & LAPIC_NMI_WATCHDOG)
270 disable_lapic_nmi_watchdog();
271 return 0;
272}
273 340
274void release_lapic_nmi(void) 341 /* are we lapic aware */
275{ 342 if (nmi_known_cpu() <= 0)
276 unsigned int new_owner; 343 return;
277 344
278 spin_lock(&lapic_nmi_owner_lock); 345 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
279 new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; 346 touch_nmi_watchdog();
280 lapic_nmi_owner = new_owner;
281 spin_unlock(&lapic_nmi_owner_lock);
282 if (new_owner & LAPIC_NMI_WATCHDOG)
283 enable_lapic_nmi_watchdog();
284} 347}
285 348
286void disable_timer_nmi_watchdog(void) 349void disable_timer_nmi_watchdog(void)
287{ 350{
288 if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) 351 BUG_ON(nmi_watchdog != NMI_IO_APIC);
352
353 if (atomic_read(&nmi_active) <= 0)
289 return; 354 return;
290 355
291 unset_nmi_callback(); 356 disable_irq(0);
292 nmi_active = -1; 357 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
293 nmi_watchdog = NMI_NONE; 358
359 BUG_ON(atomic_read(&nmi_active) != 0);
294} 360}
295 361
296void enable_timer_nmi_watchdog(void) 362void enable_timer_nmi_watchdog(void)
297{ 363{
298 if (nmi_active < 0) { 364 BUG_ON(nmi_watchdog != NMI_IO_APIC);
299 nmi_watchdog = NMI_IO_APIC; 365
366 if (atomic_read(&nmi_active) == 0) {
300 touch_nmi_watchdog(); 367 touch_nmi_watchdog();
301 nmi_active = 1; 368 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
369 enable_irq(0);
302 } 370 }
303} 371}
304 372
@@ -308,15 +376,20 @@ static int nmi_pm_active; /* nmi_active before suspend */
308 376
309static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) 377static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
310{ 378{
311 nmi_pm_active = nmi_active; 379 /* only CPU0 goes here, other CPUs should be offline */
312 disable_lapic_nmi_watchdog(); 380 nmi_pm_active = atomic_read(&nmi_active);
381 stop_apic_nmi_watchdog(NULL);
382 BUG_ON(atomic_read(&nmi_active) != 0);
313 return 0; 383 return 0;
314} 384}
315 385
316static int lapic_nmi_resume(struct sys_device *dev) 386static int lapic_nmi_resume(struct sys_device *dev)
317{ 387{
318 if (nmi_pm_active > 0) 388 /* only CPU0 goes here, other CPUs should be offline */
319 enable_lapic_nmi_watchdog(); 389 if (nmi_pm_active > 0) {
390 setup_apic_nmi_watchdog(NULL);
391 touch_nmi_watchdog();
392 }
320 return 0; 393 return 0;
321} 394}
322 395
@@ -336,7 +409,13 @@ static int __init init_lapic_nmi_sysfs(void)
336{ 409{
337 int error; 410 int error;
338 411
339 if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) 412 /* should really be a BUG_ON but b/c this is an
413 * init call, it just doesn't work. -dcz
414 */
415 if (nmi_watchdog != NMI_LOCAL_APIC)
416 return 0;
417
418 if ( atomic_read(&nmi_active) < 0 )
340 return 0; 419 return 0;
341 420
342 error = sysdev_class_register(&nmi_sysclass); 421 error = sysdev_class_register(&nmi_sysclass);
@@ -354,138 +433,269 @@ late_initcall(init_lapic_nmi_sysfs);
354 * Original code written by Keith Owens. 433 * Original code written by Keith Owens.
355 */ 434 */
356 435
357static void clear_msr_range(unsigned int base, unsigned int n) 436static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
358{
359 unsigned int i;
360
361 for(i = 0; i < n; ++i)
362 wrmsr(base+i, 0, 0);
363}
364
365static void write_watchdog_counter(const char *descr)
366{ 437{
367 u64 count = (u64)cpu_khz * 1000; 438 u64 count = (u64)cpu_khz * 1000;
368 439
369 do_div(count, nmi_hz); 440 do_div(count, nmi_hz);
370 if(descr) 441 if(descr)
371 Dprintk("setting %s to -0x%08Lx\n", descr, count); 442 Dprintk("setting %s to -0x%08Lx\n", descr, count);
372 wrmsrl(nmi_perfctr_msr, 0 - count); 443 wrmsrl(perfctr_msr, 0 - count);
373} 444}
374 445
375static void setup_k7_watchdog(void) 446/* Note that these events don't tick when the CPU idles. This means
447 the frequency varies with CPU load. */
448
449#define K7_EVNTSEL_ENABLE (1 << 22)
450#define K7_EVNTSEL_INT (1 << 20)
451#define K7_EVNTSEL_OS (1 << 17)
452#define K7_EVNTSEL_USR (1 << 16)
453#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
454#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
455
456static int setup_k7_watchdog(void)
376{ 457{
458 unsigned int perfctr_msr, evntsel_msr;
377 unsigned int evntsel; 459 unsigned int evntsel;
460 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
461
462 perfctr_msr = MSR_K7_PERFCTR0;
463 evntsel_msr = MSR_K7_EVNTSEL0;
464 if (!reserve_perfctr_nmi(perfctr_msr))
465 goto fail;
378 466
379 nmi_perfctr_msr = MSR_K7_PERFCTR0; 467 if (!reserve_evntsel_nmi(evntsel_msr))
468 goto fail1;
380 469
381 clear_msr_range(MSR_K7_EVNTSEL0, 4); 470 wrmsrl(perfctr_msr, 0UL);
382 clear_msr_range(MSR_K7_PERFCTR0, 4);
383 471
384 evntsel = K7_EVNTSEL_INT 472 evntsel = K7_EVNTSEL_INT
385 | K7_EVNTSEL_OS 473 | K7_EVNTSEL_OS
386 | K7_EVNTSEL_USR 474 | K7_EVNTSEL_USR
387 | K7_NMI_EVENT; 475 | K7_NMI_EVENT;
388 476
389 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 477 /* setup the timer */
390 write_watchdog_counter("K7_PERFCTR0"); 478 wrmsr(evntsel_msr, evntsel, 0);
479 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
391 apic_write(APIC_LVTPC, APIC_DM_NMI); 480 apic_write(APIC_LVTPC, APIC_DM_NMI);
392 evntsel |= K7_EVNTSEL_ENABLE; 481 evntsel |= K7_EVNTSEL_ENABLE;
393 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 482 wrmsr(evntsel_msr, evntsel, 0);
483
484 wd->perfctr_msr = perfctr_msr;
485 wd->evntsel_msr = evntsel_msr;
486 wd->cccr_msr = 0; //unused
487 wd->check_bit = 1ULL<<63;
488 return 1;
489fail1:
490 release_perfctr_nmi(perfctr_msr);
491fail:
492 return 0;
493}
494
495static void stop_k7_watchdog(void)
496{
497 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
498
499 wrmsr(wd->evntsel_msr, 0, 0);
500
501 release_evntsel_nmi(wd->evntsel_msr);
502 release_perfctr_nmi(wd->perfctr_msr);
394} 503}
395 504
396static void setup_p6_watchdog(void) 505#define P6_EVNTSEL0_ENABLE (1 << 22)
506#define P6_EVNTSEL_INT (1 << 20)
507#define P6_EVNTSEL_OS (1 << 17)
508#define P6_EVNTSEL_USR (1 << 16)
509#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
510#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
511
512static int setup_p6_watchdog(void)
397{ 513{
514 unsigned int perfctr_msr, evntsel_msr;
398 unsigned int evntsel; 515 unsigned int evntsel;
516 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
517
518 perfctr_msr = MSR_P6_PERFCTR0;
519 evntsel_msr = MSR_P6_EVNTSEL0;
520 if (!reserve_perfctr_nmi(perfctr_msr))
521 goto fail;
399 522
400 nmi_perfctr_msr = MSR_P6_PERFCTR0; 523 if (!reserve_evntsel_nmi(evntsel_msr))
524 goto fail1;
401 525
402 clear_msr_range(MSR_P6_EVNTSEL0, 2); 526 wrmsrl(perfctr_msr, 0UL);
403 clear_msr_range(MSR_P6_PERFCTR0, 2);
404 527
405 evntsel = P6_EVNTSEL_INT 528 evntsel = P6_EVNTSEL_INT
406 | P6_EVNTSEL_OS 529 | P6_EVNTSEL_OS
407 | P6_EVNTSEL_USR 530 | P6_EVNTSEL_USR
408 | P6_NMI_EVENT; 531 | P6_NMI_EVENT;
409 532
410 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 533 /* setup the timer */
411 write_watchdog_counter("P6_PERFCTR0"); 534 wrmsr(evntsel_msr, evntsel, 0);
535 write_watchdog_counter(perfctr_msr, "P6_PERFCTR0");
412 apic_write(APIC_LVTPC, APIC_DM_NMI); 536 apic_write(APIC_LVTPC, APIC_DM_NMI);
413 evntsel |= P6_EVNTSEL0_ENABLE; 537 evntsel |= P6_EVNTSEL0_ENABLE;
414 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 538 wrmsr(evntsel_msr, evntsel, 0);
539
540 wd->perfctr_msr = perfctr_msr;
541 wd->evntsel_msr = evntsel_msr;
542 wd->cccr_msr = 0; //unused
543 wd->check_bit = 1ULL<<39;
544 return 1;
545fail1:
546 release_perfctr_nmi(perfctr_msr);
547fail:
548 return 0;
549}
550
551static void stop_p6_watchdog(void)
552{
553 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
554
555 wrmsr(wd->evntsel_msr, 0, 0);
556
557 release_evntsel_nmi(wd->evntsel_msr);
558 release_perfctr_nmi(wd->perfctr_msr);
415} 559}
416 560
561/* Note that these events don't tick when the CPU idles. This means
562 the frequency varies with CPU load. */
563
564#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
565#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
566#define P4_ESCR_OS (1<<3)
567#define P4_ESCR_USR (1<<2)
568#define P4_CCCR_OVF_PMI0 (1<<26)
569#define P4_CCCR_OVF_PMI1 (1<<27)
570#define P4_CCCR_THRESHOLD(N) ((N)<<20)
571#define P4_CCCR_COMPLEMENT (1<<19)
572#define P4_CCCR_COMPARE (1<<18)
573#define P4_CCCR_REQUIRED (3<<16)
574#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
575#define P4_CCCR_ENABLE (1<<12)
576#define P4_CCCR_OVF (1<<31)
577/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
578 CRU_ESCR0 (with any non-null event selector) through a complemented
579 max threshold. [IA32-Vol3, Section 14.9.9] */
580
417static int setup_p4_watchdog(void) 581static int setup_p4_watchdog(void)
418{ 582{
583 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
584 unsigned int evntsel, cccr_val;
419 unsigned int misc_enable, dummy; 585 unsigned int misc_enable, dummy;
586 unsigned int ht_num;
587 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
420 588
421 rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); 589 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
422 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) 590 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
423 return 0; 591 return 0;
424 592
425 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
426 nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
427#ifdef CONFIG_SMP 593#ifdef CONFIG_SMP
428 if (smp_num_siblings == 2) 594 /* detect which hyperthread we are on */
429 nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; 595 if (smp_num_siblings == 2) {
596 unsigned int ebx, apicid;
597
598 ebx = cpuid_ebx(1);
599 apicid = (ebx >> 24) & 0xff;
600 ht_num = apicid & 1;
601 } else
430#endif 602#endif
603 ht_num = 0;
431 604
432 if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) 605 /* performance counters are shared resources
433 clear_msr_range(0x3F1, 2); 606 * assign each hyperthread its own set
434 /* MSR 0x3F0 seems to have a default value of 0xFC00, but current 607 * (re-use the ESCR0 register, seems safe
435 docs doesn't fully define it, so leave it alone for now. */ 608 * and keeps the cccr_val the same)
436 if (boot_cpu_data.x86_model >= 0x3) { 609 */
437 /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ 610 if (!ht_num) {
438 clear_msr_range(0x3A0, 26); 611 /* logical cpu 0 */
439 clear_msr_range(0x3BC, 3); 612 perfctr_msr = MSR_P4_IQ_PERFCTR0;
613 evntsel_msr = MSR_P4_CRU_ESCR0;
614 cccr_msr = MSR_P4_IQ_CCCR0;
615 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
440 } else { 616 } else {
441 clear_msr_range(0x3A0, 31); 617 /* logical cpu 1 */
618 perfctr_msr = MSR_P4_IQ_PERFCTR1;
619 evntsel_msr = MSR_P4_CRU_ESCR0;
620 cccr_msr = MSR_P4_IQ_CCCR1;
621 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
442 } 622 }
443 clear_msr_range(0x3C0, 6); 623
444 clear_msr_range(0x3C8, 6); 624 if (!reserve_perfctr_nmi(perfctr_msr))
445 clear_msr_range(0x3E0, 2); 625 goto fail;
446 clear_msr_range(MSR_P4_CCCR0, 18); 626
447 clear_msr_range(MSR_P4_PERFCTR0, 18); 627 if (!reserve_evntsel_nmi(evntsel_msr))
448 628 goto fail1;
449 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); 629
450 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); 630 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
451 write_watchdog_counter("P4_IQ_COUNTER0"); 631 | P4_ESCR_OS
632 | P4_ESCR_USR;
633
634 cccr_val |= P4_CCCR_THRESHOLD(15)
635 | P4_CCCR_COMPLEMENT
636 | P4_CCCR_COMPARE
637 | P4_CCCR_REQUIRED;
638
639 wrmsr(evntsel_msr, evntsel, 0);
640 wrmsr(cccr_msr, cccr_val, 0);
641 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
452 apic_write(APIC_LVTPC, APIC_DM_NMI); 642 apic_write(APIC_LVTPC, APIC_DM_NMI);
453 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 643 cccr_val |= P4_CCCR_ENABLE;
644 wrmsr(cccr_msr, cccr_val, 0);
645 wd->perfctr_msr = perfctr_msr;
646 wd->evntsel_msr = evntsel_msr;
647 wd->cccr_msr = cccr_msr;
648 wd->check_bit = 1ULL<<39;
454 return 1; 649 return 1;
650fail1:
651 release_perfctr_nmi(perfctr_msr);
652fail:
653 return 0;
455} 654}
456 655
457static void disable_intel_arch_watchdog(void) 656static void stop_p4_watchdog(void)
458{ 657{
459 unsigned ebx; 658 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
460 659
461 /* 660 wrmsr(wd->cccr_msr, 0, 0);
462 * Check whether the Architectural PerfMon supports 661 wrmsr(wd->evntsel_msr, 0, 0);
463 * Unhalted Core Cycles Event or not. 662
464 * NOTE: Corresponding bit = 0 in ebp indicates event present. 663 release_evntsel_nmi(wd->evntsel_msr);
465 */ 664 release_perfctr_nmi(wd->perfctr_msr);
466 ebx = cpuid_ebx(10);
467 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
468 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
469} 665}
470 666
667#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
668#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
669
471static int setup_intel_arch_watchdog(void) 670static int setup_intel_arch_watchdog(void)
472{ 671{
672 unsigned int ebx;
673 union cpuid10_eax eax;
674 unsigned int unused;
675 unsigned int perfctr_msr, evntsel_msr;
473 unsigned int evntsel; 676 unsigned int evntsel;
474 unsigned ebx; 677 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
475 678
476 /* 679 /*
477 * Check whether the Architectural PerfMon supports 680 * Check whether the Architectural PerfMon supports
478 * Unhalted Core Cycles Event or not. 681 * Unhalted Core Cycles Event or not.
479 * NOTE: Corresponding bit = 0 in ebp indicates event present. 682 * NOTE: Corresponding bit = 0 in ebx indicates event present.
480 */ 683 */
481 ebx = cpuid_ebx(10); 684 cpuid(10, &(eax.full), &ebx, &unused, &unused);
482 if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) 685 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
483 return 0; 686 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
687 goto fail;
688
689 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
690 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
484 691
485 nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; 692 if (!reserve_perfctr_nmi(perfctr_msr))
693 goto fail;
486 694
487 clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); 695 if (!reserve_evntsel_nmi(evntsel_msr))
488 clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); 696 goto fail1;
697
698 wrmsrl(perfctr_msr, 0UL);
489 699
490 evntsel = ARCH_PERFMON_EVENTSEL_INT 700 evntsel = ARCH_PERFMON_EVENTSEL_INT
491 | ARCH_PERFMON_EVENTSEL_OS 701 | ARCH_PERFMON_EVENTSEL_OS
@@ -493,51 +703,145 @@ static int setup_intel_arch_watchdog(void)
493 | ARCH_PERFMON_NMI_EVENT_SEL 703 | ARCH_PERFMON_NMI_EVENT_SEL
494 | ARCH_PERFMON_NMI_EVENT_UMASK; 704 | ARCH_PERFMON_NMI_EVENT_UMASK;
495 705
496 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); 706 /* setup the timer */
497 write_watchdog_counter("INTEL_ARCH_PERFCTR0"); 707 wrmsr(evntsel_msr, evntsel, 0);
708 write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0");
498 apic_write(APIC_LVTPC, APIC_DM_NMI); 709 apic_write(APIC_LVTPC, APIC_DM_NMI);
499 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; 710 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
500 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); 711 wrmsr(evntsel_msr, evntsel, 0);
712
713 wd->perfctr_msr = perfctr_msr;
714 wd->evntsel_msr = evntsel_msr;
715 wd->cccr_msr = 0; //unused
716 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
501 return 1; 717 return 1;
718fail1:
719 release_perfctr_nmi(perfctr_msr);
720fail:
721 return 0;
502} 722}
503 723
504void setup_apic_nmi_watchdog (void) 724static void stop_intel_arch_watchdog(void)
505{ 725{
506 switch (boot_cpu_data.x86_vendor) { 726 unsigned int ebx;
507 case X86_VENDOR_AMD: 727 union cpuid10_eax eax;
508 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) 728 unsigned int unused;
509 return; 729 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
510 setup_k7_watchdog(); 730
511 break; 731 /*
512 case X86_VENDOR_INTEL: 732 * Check whether the Architectural PerfMon supports
513 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 733 * Unhalted Core Cycles Event or not.
514 if (!setup_intel_arch_watchdog()) 734 * NOTE: Corresponding bit = 0 in ebx indicates event present.
735 */
736 cpuid(10, &(eax.full), &ebx, &unused, &unused);
737 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
738 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
739 return;
740
741 wrmsr(wd->evntsel_msr, 0, 0);
742 release_evntsel_nmi(wd->evntsel_msr);
743 release_perfctr_nmi(wd->perfctr_msr);
744}
745
746void setup_apic_nmi_watchdog (void *unused)
747{
748 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
749
750 /* only support LOCAL and IO APICs for now */
751 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
752 (nmi_watchdog != NMI_IO_APIC))
753 return;
754
755 if (wd->enabled == 1)
756 return;
757
758 /* cheap hack to support suspend/resume */
759 /* if cpu0 is not active neither should the other cpus */
760 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
761 return;
762
763 if (nmi_watchdog == NMI_LOCAL_APIC) {
764 switch (boot_cpu_data.x86_vendor) {
765 case X86_VENDOR_AMD:
766 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
515 return; 767 return;
516 break; 768 if (!setup_k7_watchdog())
517 }
518 switch (boot_cpu_data.x86) {
519 case 6:
520 if (boot_cpu_data.x86_model > 0xd)
521 return; 769 return;
522
523 setup_p6_watchdog();
524 break; 770 break;
525 case 15: 771 case X86_VENDOR_INTEL:
526 if (boot_cpu_data.x86_model > 0x4) 772 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
527 return; 773 if (!setup_intel_arch_watchdog())
774 return;
775 break;
776 }
777 switch (boot_cpu_data.x86) {
778 case 6:
779 if (boot_cpu_data.x86_model > 0xd)
780 return;
781
782 if (!setup_p6_watchdog())
783 return;
784 break;
785 case 15:
786 if (boot_cpu_data.x86_model > 0x4)
787 return;
528 788
529 if (!setup_p4_watchdog()) 789 if (!setup_p4_watchdog())
790 return;
791 break;
792 default:
530 return; 793 return;
794 }
531 break; 795 break;
532 default: 796 default:
533 return; 797 return;
534 } 798 }
535 break; 799 }
536 default: 800 wd->enabled = 1;
801 atomic_inc(&nmi_active);
802}
803
804void stop_apic_nmi_watchdog(void *unused)
805{
806 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
807
808 /* only support LOCAL and IO APICs for now */
809 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
810 (nmi_watchdog != NMI_IO_APIC))
811 return;
812
813 if (wd->enabled == 0)
537 return; 814 return;
815
816 if (nmi_watchdog == NMI_LOCAL_APIC) {
817 switch (boot_cpu_data.x86_vendor) {
818 case X86_VENDOR_AMD:
819 stop_k7_watchdog();
820 break;
821 case X86_VENDOR_INTEL:
822 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
823 stop_intel_arch_watchdog();
824 break;
825 }
826 switch (boot_cpu_data.x86) {
827 case 6:
828 if (boot_cpu_data.x86_model > 0xd)
829 break;
830 stop_p6_watchdog();
831 break;
832 case 15:
833 if (boot_cpu_data.x86_model > 0x4)
834 break;
835 stop_p4_watchdog();
836 break;
837 }
838 break;
839 default:
840 return;
841 }
538 } 842 }
539 lapic_nmi_owner = LAPIC_NMI_WATCHDOG; 843 wd->enabled = 0;
540 nmi_active = 1; 844 atomic_dec(&nmi_active);
541} 845}
542 846
543/* 847/*
@@ -579,7 +883,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
579 883
580extern void die_nmi(struct pt_regs *, const char *msg); 884extern void die_nmi(struct pt_regs *, const char *msg);
581 885
582void nmi_watchdog_tick (struct pt_regs * regs) 886__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
583{ 887{
584 888
585 /* 889 /*
@@ -588,11 +892,23 @@ void nmi_watchdog_tick (struct pt_regs * regs)
588 * smp_processor_id(). 892 * smp_processor_id().
589 */ 893 */
590 unsigned int sum; 894 unsigned int sum;
895 int touched = 0;
591 int cpu = smp_processor_id(); 896 int cpu = smp_processor_id();
897 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
898 u64 dummy;
899 int rc=0;
900
901 /* check for other users first */
902 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
903 == NOTIFY_STOP) {
904 rc = 1;
905 touched = 1;
906 }
592 907
593 sum = per_cpu(irq_stat, cpu).apic_timer_irqs; 908 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
594 909
595 if (last_irq_sums[cpu] == sum) { 910 /* if the apic timer isn't firing, this cpu isn't doing much */
911 if (!touched && last_irq_sums[cpu] == sum) {
596 /* 912 /*
597 * Ayiee, looks like this CPU is stuck ... 913 * Ayiee, looks like this CPU is stuck ...
598 * wait a few IRQs (5 seconds) before doing the oops ... 914 * wait a few IRQs (5 seconds) before doing the oops ...
@@ -607,27 +923,59 @@ void nmi_watchdog_tick (struct pt_regs * regs)
607 last_irq_sums[cpu] = sum; 923 last_irq_sums[cpu] = sum;
608 alert_counter[cpu] = 0; 924 alert_counter[cpu] = 0;
609 } 925 }
610 if (nmi_perfctr_msr) { 926 /* see if the nmi watchdog went off */
611 if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { 927 if (wd->enabled) {
612 /* 928 if (nmi_watchdog == NMI_LOCAL_APIC) {
613 * P4 quirks: 929 rdmsrl(wd->perfctr_msr, dummy);
614 * - An overflown perfctr will assert its interrupt 930 if (dummy & wd->check_bit){
615 * until the OVF flag in its CCCR is cleared. 931 /* this wasn't a watchdog timer interrupt */
616 * - LVTPC is masked on interrupt and must be 932 goto done;
617 * unmasked by the LVTPC handler. 933 }
934
935 /* only Intel P4 uses the cccr msr */
936 if (wd->cccr_msr != 0) {
937 /*
938 * P4 quirks:
939 * - An overflown perfctr will assert its interrupt
940 * until the OVF flag in its CCCR is cleared.
941 * - LVTPC is masked on interrupt and must be
942 * unmasked by the LVTPC handler.
943 */
944 rdmsrl(wd->cccr_msr, dummy);
945 dummy &= ~P4_CCCR_OVF;
946 wrmsrl(wd->cccr_msr, dummy);
947 apic_write(APIC_LVTPC, APIC_DM_NMI);
948 }
949 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
950 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
951 /* P6 based Pentium M need to re-unmask
952 * the apic vector but it doesn't hurt
953 * other P6 variant.
954 * ArchPerfom/Core Duo also needs this */
955 apic_write(APIC_LVTPC, APIC_DM_NMI);
956 }
957 /* start the cycle over again */
958 write_watchdog_counter(wd->perfctr_msr, NULL);
959 rc = 1;
960 } else if (nmi_watchdog == NMI_IO_APIC) {
961 /* don't know how to accurately check for this.
962 * just assume it was a watchdog timer interrupt
963 * This matches the old behaviour.
618 */ 964 */
619 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 965 rc = 1;
620 apic_write(APIC_LVTPC, APIC_DM_NMI);
621 } 966 }
622 else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
623 nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
624 /* Only P6 based Pentium M need to re-unmask
625 * the apic vector but it doesn't hurt
626 * other P6 variant */
627 apic_write(APIC_LVTPC, APIC_DM_NMI);
628 }
629 write_watchdog_counter(NULL);
630 } 967 }
968done:
969 return rc;
970}
971
972int do_nmi_callback(struct pt_regs * regs, int cpu)
973{
974#ifdef CONFIG_SYSCTL
975 if (unknown_nmi_panic)
976 return unknown_nmi_panic_callback(regs, cpu);
977#endif
978 return 0;
631} 979}
632 980
633#ifdef CONFIG_SYSCTL 981#ifdef CONFIG_SYSCTL
@@ -637,36 +985,46 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
637 unsigned char reason = get_nmi_reason(); 985 unsigned char reason = get_nmi_reason();
638 char buf[64]; 986 char buf[64];
639 987
640 if (!(reason & 0xc0)) { 988 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
641 sprintf(buf, "NMI received for unknown reason %02x\n", reason); 989 die_nmi(regs, buf);
642 die_nmi(regs, buf);
643 }
644 return 0; 990 return 0;
645} 991}
646 992
647/* 993/*
648 * proc handler for /proc/sys/kernel/unknown_nmi_panic 994 * proc handler for /proc/sys/kernel/nmi
649 */ 995 */
650int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, 996int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
651 void __user *buffer, size_t *length, loff_t *ppos) 997 void __user *buffer, size_t *length, loff_t *ppos)
652{ 998{
653 int old_state; 999 int old_state;
654 1000
655 old_state = unknown_nmi_panic; 1001 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
1002 old_state = nmi_watchdog_enabled;
656 proc_dointvec(table, write, file, buffer, length, ppos); 1003 proc_dointvec(table, write, file, buffer, length, ppos);
657 if (!!old_state == !!unknown_nmi_panic) 1004 if (!!old_state == !!nmi_watchdog_enabled)
658 return 0; 1005 return 0;
659 1006
660 if (unknown_nmi_panic) { 1007 if (atomic_read(&nmi_active) < 0) {
661 if (reserve_lapic_nmi() < 0) { 1008 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
662 unknown_nmi_panic = 0; 1009 return -EIO;
663 return -EBUSY; 1010 }
664 } else { 1011
665 set_nmi_callback(unknown_nmi_panic_callback); 1012 if (nmi_watchdog == NMI_DEFAULT) {
666 } 1013 if (nmi_known_cpu() > 0)
1014 nmi_watchdog = NMI_LOCAL_APIC;
1015 else
1016 nmi_watchdog = NMI_IO_APIC;
1017 }
1018
1019 if (nmi_watchdog == NMI_LOCAL_APIC) {
1020 if (nmi_watchdog_enabled)
1021 enable_lapic_nmi_watchdog();
1022 else
1023 disable_lapic_nmi_watchdog();
667 } else { 1024 } else {
668 release_lapic_nmi(); 1025 printk( KERN_WARNING
669 unset_nmi_callback(); 1026 "NMI watchdog doesn't know what hardware to touch\n");
1027 return -EIO;
670 } 1028 }
671 return 0; 1029 return 0;
672} 1030}
@@ -675,7 +1033,11 @@ int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
675 1033
676EXPORT_SYMBOL(nmi_active); 1034EXPORT_SYMBOL(nmi_active);
677EXPORT_SYMBOL(nmi_watchdog); 1035EXPORT_SYMBOL(nmi_watchdog);
678EXPORT_SYMBOL(reserve_lapic_nmi); 1036EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
679EXPORT_SYMBOL(release_lapic_nmi); 1037EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1038EXPORT_SYMBOL(reserve_perfctr_nmi);
1039EXPORT_SYMBOL(release_perfctr_nmi);
1040EXPORT_SYMBOL(reserve_evntsel_nmi);
1041EXPORT_SYMBOL(release_evntsel_nmi);
680EXPORT_SYMBOL(disable_timer_nmi_watchdog); 1042EXPORT_SYMBOL(disable_timer_nmi_watchdog);
681EXPORT_SYMBOL(enable_timer_nmi_watchdog); 1043EXPORT_SYMBOL(enable_timer_nmi_watchdog);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 8657c739656a..8c190ca7ae44 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -37,6 +37,7 @@
37#include <linux/kallsyms.h> 37#include <linux/kallsyms.h>
38#include <linux/ptrace.h> 38#include <linux/ptrace.h>
39#include <linux/random.h> 39#include <linux/random.h>
40#include <linux/personality.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <asm/pgtable.h> 43#include <asm/pgtable.h>
@@ -320,15 +321,6 @@ void show_regs(struct pt_regs * regs)
320 * the "args". 321 * the "args".
321 */ 322 */
322extern void kernel_thread_helper(void); 323extern void kernel_thread_helper(void);
323__asm__(".section .text\n"
324 ".align 4\n"
325 "kernel_thread_helper:\n\t"
326 "movl %edx,%eax\n\t"
327 "pushl %edx\n\t"
328 "call *%ebx\n\t"
329 "pushl %eax\n\t"
330 "call do_exit\n"
331 ".previous");
332 324
333/* 325/*
334 * Create a kernel thread 326 * Create a kernel thread
@@ -346,7 +338,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
346 regs.xes = __USER_DS; 338 regs.xes = __USER_DS;
347 regs.orig_eax = -1; 339 regs.orig_eax = -1;
348 regs.eip = (unsigned long) kernel_thread_helper; 340 regs.eip = (unsigned long) kernel_thread_helper;
349 regs.xcs = __KERNEL_CS; 341 regs.xcs = __KERNEL_CS | get_kernel_rpl();
350 regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; 342 regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
351 343
352 /* Ok, create the new process.. */ 344 /* Ok, create the new process.. */
@@ -905,7 +897,7 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
905 897
906unsigned long arch_align_stack(unsigned long sp) 898unsigned long arch_align_stack(unsigned long sp)
907{ 899{
908 if (randomize_va_space) 900 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
909 sp -= get_random_int() % 8192; 901 sp -= get_random_int() % 8192;
910 return sp & ~0xf; 902 return sp & ~0xf;
911} 903}
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index d3db03f4085d..775f50e9395b 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -185,17 +185,17 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_
185 return addr; 185 return addr;
186} 186}
187 187
188static inline int is_at_popf(struct task_struct *child, struct pt_regs *regs) 188static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
189{ 189{
190 int i, copied; 190 int i, copied;
191 unsigned char opcode[16]; 191 unsigned char opcode[15];
192 unsigned long addr = convert_eip_to_linear(child, regs); 192 unsigned long addr = convert_eip_to_linear(child, regs);
193 193
194 copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); 194 copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
195 for (i = 0; i < copied; i++) { 195 for (i = 0; i < copied; i++) {
196 switch (opcode[i]) { 196 switch (opcode[i]) {
197 /* popf */ 197 /* popf and iret */
198 case 0x9d: 198 case 0x9d: case 0xcf:
199 return 1; 199 return 1;
200 /* opcode and address size prefixes */ 200 /* opcode and address size prefixes */
201 case 0x66: case 0x67: 201 case 0x66: case 0x67:
@@ -247,7 +247,7 @@ static void set_singlestep(struct task_struct *child)
247 * don't mark it as being "us" that set it, so that we 247 * don't mark it as being "us" that set it, so that we
248 * won't clear it by hand later. 248 * won't clear it by hand later.
249 */ 249 */
250 if (is_at_popf(child, regs)) 250 if (is_setting_trap_flag(child, regs))
251 return; 251 return;
252 252
253 child->ptrace |= PT_DTRACE; 253 child->ptrace |= PT_DTRACE;
diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S
index d312616effa1..f151d6fae462 100644
--- a/arch/i386/kernel/relocate_kernel.S
+++ b/arch/i386/kernel/relocate_kernel.S
@@ -7,16 +7,138 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/page.h>
11#include <asm/kexec.h>
12
13/*
14 * Must be relocatable PIC code callable as a C function
15 */
16
17#define PTR(x) (x << 2)
18#define PAGE_ALIGNED (1 << PAGE_SHIFT)
19#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
20#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
21
22 .text
23 .align PAGE_ALIGNED
24 .globl relocate_kernel
25relocate_kernel:
26 movl 8(%esp), %ebp /* list of pages */
27
28#ifdef CONFIG_X86_PAE
29 /* map the control page at its virtual address */
30
31 movl PTR(VA_PGD)(%ebp), %edi
32 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
33 andl $0xc0000000, %eax
34 shrl $27, %eax
35 addl %edi, %eax
36
37 movl PTR(PA_PMD_0)(%ebp), %edx
38 orl $PAE_PGD_ATTR, %edx
39 movl %edx, (%eax)
40
41 movl PTR(VA_PMD_0)(%ebp), %edi
42 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
43 andl $0x3fe00000, %eax
44 shrl $18, %eax
45 addl %edi, %eax
46
47 movl PTR(PA_PTE_0)(%ebp), %edx
48 orl $PAGE_ATTR, %edx
49 movl %edx, (%eax)
50
51 movl PTR(VA_PTE_0)(%ebp), %edi
52 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
53 andl $0x001ff000, %eax
54 shrl $9, %eax
55 addl %edi, %eax
56
57 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
58 orl $PAGE_ATTR, %edx
59 movl %edx, (%eax)
60
61 /* identity map the control page at its physical address */
62
63 movl PTR(VA_PGD)(%ebp), %edi
64 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
65 andl $0xc0000000, %eax
66 shrl $27, %eax
67 addl %edi, %eax
68
69 movl PTR(PA_PMD_1)(%ebp), %edx
70 orl $PAE_PGD_ATTR, %edx
71 movl %edx, (%eax)
72
73 movl PTR(VA_PMD_1)(%ebp), %edi
74 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
75 andl $0x3fe00000, %eax
76 shrl $18, %eax
77 addl %edi, %eax
78
79 movl PTR(PA_PTE_1)(%ebp), %edx
80 orl $PAGE_ATTR, %edx
81 movl %edx, (%eax)
82
83 movl PTR(VA_PTE_1)(%ebp), %edi
84 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
85 andl $0x001ff000, %eax
86 shrl $9, %eax
87 addl %edi, %eax
88
89 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
90 orl $PAGE_ATTR, %edx
91 movl %edx, (%eax)
92#else
93 /* map the control page at its virtual address */
94
95 movl PTR(VA_PGD)(%ebp), %edi
96 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
97 andl $0xffc00000, %eax
98 shrl $20, %eax
99 addl %edi, %eax
100
101 movl PTR(PA_PTE_0)(%ebp), %edx
102 orl $PAGE_ATTR, %edx
103 movl %edx, (%eax)
104
105 movl PTR(VA_PTE_0)(%ebp), %edi
106 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
107 andl $0x003ff000, %eax
108 shrl $10, %eax
109 addl %edi, %eax
110
111 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
112 orl $PAGE_ATTR, %edx
113 movl %edx, (%eax)
114
115 /* identity map the control page at its physical address */
116
117 movl PTR(VA_PGD)(%ebp), %edi
118 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
119 andl $0xffc00000, %eax
120 shrl $20, %eax
121 addl %edi, %eax
122
123 movl PTR(PA_PTE_1)(%ebp), %edx
124 orl $PAGE_ATTR, %edx
125 movl %edx, (%eax)
126
127 movl PTR(VA_PTE_1)(%ebp), %edi
128 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
129 andl $0x003ff000, %eax
130 shrl $10, %eax
131 addl %edi, %eax
132
133 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
134 orl $PAGE_ATTR, %edx
135 movl %edx, (%eax)
136#endif
10 137
11 /*
12 * Must be relocatable PIC code callable as a C function, that once
13 * it starts can not use the previous processes stack.
14 */
15 .globl relocate_new_kernel
16relocate_new_kernel: 138relocate_new_kernel:
17 /* read the arguments and say goodbye to the stack */ 139 /* read the arguments and say goodbye to the stack */
18 movl 4(%esp), %ebx /* page_list */ 140 movl 4(%esp), %ebx /* page_list */
19 movl 8(%esp), %ebp /* reboot_code_buffer */ 141 movl 8(%esp), %ebp /* list of pages */
20 movl 12(%esp), %edx /* start address */ 142 movl 12(%esp), %edx /* start address */
21 movl 16(%esp), %ecx /* cpu_has_pae */ 143 movl 16(%esp), %ecx /* cpu_has_pae */
22 144
@@ -24,11 +146,26 @@ relocate_new_kernel:
24 pushl $0 146 pushl $0
25 popfl 147 popfl
26 148
27 /* set a new stack at the bottom of our page... */ 149 /* get physical address of control page now */
28 lea 4096(%ebp), %esp 150 /* this is impossible after page table switch */
151 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
29 152
30 /* store the parameters back on the stack */ 153 /* switch to new set of page tables */
31 pushl %edx /* store the start address */ 154 movl PTR(PA_PGD)(%ebp), %eax
155 movl %eax, %cr3
156
157 /* setup a new stack at the end of the physical control page */
158 lea 4096(%edi), %esp
159
160 /* jump to identity mapped page */
161 movl %edi, %eax
162 addl $(identity_mapped - relocate_kernel), %eax
163 pushl %eax
164 ret
165
166identity_mapped:
167 /* store the start address on the stack */
168 pushl %edx
32 169
33 /* Set cr0 to a known state: 170 /* Set cr0 to a known state:
34 * 31 0 == Paging disabled 171 * 31 0 == Paging disabled
@@ -113,8 +250,3 @@ relocate_new_kernel:
113 xorl %edi, %edi 250 xorl %edi, %edi
114 xorl %ebp, %ebp 251 xorl %ebp, %ebp
115 ret 252 ret
116relocate_new_kernel_end:
117
118 .globl relocate_new_kernel_size
119relocate_new_kernel_size:
120 .long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c
deleted file mode 100644
index 98352c374c76..000000000000
--- a/arch/i386/kernel/semaphore.c
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * i386 semaphore implementation.
3 *
4 * (C) Copyright 1999 Linus Torvalds
5 *
6 * Portions Copyright 1999 Red Hat, Inc.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
14 */
15#include <asm/semaphore.h>
16
17/*
18 * The semaphore operations have a special calling sequence that
19 * allow us to do a simpler in-line version of them. These routines
20 * need to convert that sequence back into the C sequence when
21 * there is contention on the semaphore.
22 *
23 * %eax contains the semaphore pointer on entry. Save the C-clobbered
24 * registers (%eax, %edx and %ecx) except %eax whish is either a return
25 * value or just clobbered..
26 */
27asm(
28".section .sched.text\n"
29".align 4\n"
30".globl __down_failed\n"
31"__down_failed:\n\t"
32#if defined(CONFIG_FRAME_POINTER)
33 "pushl %ebp\n\t"
34 "movl %esp,%ebp\n\t"
35#endif
36 "pushl %edx\n\t"
37 "pushl %ecx\n\t"
38 "call __down\n\t"
39 "popl %ecx\n\t"
40 "popl %edx\n\t"
41#if defined(CONFIG_FRAME_POINTER)
42 "movl %ebp,%esp\n\t"
43 "popl %ebp\n\t"
44#endif
45 "ret"
46);
47
48asm(
49".section .sched.text\n"
50".align 4\n"
51".globl __down_failed_interruptible\n"
52"__down_failed_interruptible:\n\t"
53#if defined(CONFIG_FRAME_POINTER)
54 "pushl %ebp\n\t"
55 "movl %esp,%ebp\n\t"
56#endif
57 "pushl %edx\n\t"
58 "pushl %ecx\n\t"
59 "call __down_interruptible\n\t"
60 "popl %ecx\n\t"
61 "popl %edx\n\t"
62#if defined(CONFIG_FRAME_POINTER)
63 "movl %ebp,%esp\n\t"
64 "popl %ebp\n\t"
65#endif
66 "ret"
67);
68
69asm(
70".section .sched.text\n"
71".align 4\n"
72".globl __down_failed_trylock\n"
73"__down_failed_trylock:\n\t"
74#if defined(CONFIG_FRAME_POINTER)
75 "pushl %ebp\n\t"
76 "movl %esp,%ebp\n\t"
77#endif
78 "pushl %edx\n\t"
79 "pushl %ecx\n\t"
80 "call __down_trylock\n\t"
81 "popl %ecx\n\t"
82 "popl %edx\n\t"
83#if defined(CONFIG_FRAME_POINTER)
84 "movl %ebp,%esp\n\t"
85 "popl %ebp\n\t"
86#endif
87 "ret"
88);
89
90asm(
91".section .sched.text\n"
92".align 4\n"
93".globl __up_wakeup\n"
94"__up_wakeup:\n\t"
95 "pushl %edx\n\t"
96 "pushl %ecx\n\t"
97 "call __up\n\t"
98 "popl %ecx\n\t"
99 "popl %edx\n\t"
100 "ret"
101);
102
103/*
104 * rw spinlock fallbacks
105 */
106#if defined(CONFIG_SMP)
107asm(
108".section .sched.text\n"
109".align 4\n"
110".globl __write_lock_failed\n"
111"__write_lock_failed:\n\t"
112 LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n"
113"1: rep; nop\n\t"
114 "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
115 "jne 1b\n\t"
116 LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
117 "jnz __write_lock_failed\n\t"
118 "ret"
119);
120
121asm(
122".section .sched.text\n"
123".align 4\n"
124".globl __read_lock_failed\n"
125"__read_lock_failed:\n\t"
126 LOCK_PREFIX "incl (%eax)\n"
127"1: rep; nop\n\t"
128 "cmpl $1,(%eax)\n\t"
129 "js 1b\n\t"
130 LOCK_PREFIX "decl (%eax)\n\t"
131 "js __read_lock_failed\n\t"
132 "ret"
133);
134#endif
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 16d99444cf66..76a524b4c90f 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -90,18 +90,6 @@ EXPORT_SYMBOL(boot_cpu_data);
90 90
91unsigned long mmu_cr4_features; 91unsigned long mmu_cr4_features;
92 92
93#ifdef CONFIG_ACPI
94 int acpi_disabled = 0;
95#else
96 int acpi_disabled = 1;
97#endif
98EXPORT_SYMBOL(acpi_disabled);
99
100#ifdef CONFIG_ACPI
101int __initdata acpi_force = 0;
102extern acpi_interrupt_flags acpi_sci_flags;
103#endif
104
105/* for MCA, but anyone else can use it if they want */ 93/* for MCA, but anyone else can use it if they want */
106unsigned int machine_id; 94unsigned int machine_id;
107#ifdef CONFIG_MCA 95#ifdef CONFIG_MCA
@@ -149,7 +137,6 @@ EXPORT_SYMBOL(ist_info);
149struct e820map e820; 137struct e820map e820;
150 138
151extern void early_cpu_init(void); 139extern void early_cpu_init(void);
152extern void generic_apic_probe(char *);
153extern int root_mountflags; 140extern int root_mountflags;
154 141
155unsigned long saved_videomode; 142unsigned long saved_videomode;
@@ -701,238 +688,132 @@ static inline void copy_edd(void)
701} 688}
702#endif 689#endif
703 690
704static void __init parse_cmdline_early (char ** cmdline_p) 691static int __initdata user_defined_memmap = 0;
705{
706 char c = ' ', *to = command_line, *from = saved_command_line;
707 int len = 0;
708 int userdef = 0;
709 692
710 /* Save unparsed command line copy for /proc/cmdline */ 693/*
711 saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; 694 * "mem=nopentium" disables the 4MB page tables.
695 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
696 * to <mem>, overriding the bios size.
697 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
698 * <start> to <start>+<mem>, overriding the bios size.
699 *
700 * HPA tells me bootloaders need to parse mem=, so no new
701 * option should be mem= [also see Documentation/i386/boot.txt]
702 */
703static int __init parse_mem(char *arg)
704{
705 if (!arg)
706 return -EINVAL;
712 707
713 for (;;) { 708 if (strcmp(arg, "nopentium") == 0) {
714 if (c != ' ') 709 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
715 goto next_char; 710 disable_pse = 1;
716 /* 711 } else {
717 * "mem=nopentium" disables the 4MB page tables. 712 /* If the user specifies memory size, we
718 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM 713 * limit the BIOS-provided memory map to
719 * to <mem>, overriding the bios size. 714 * that size. exactmap can be used to specify
720 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from 715 * the exact map. mem=number can be used to
721 * <start> to <start>+<mem>, overriding the bios size. 716 * trim the existing memory map.
722 *
723 * HPA tells me bootloaders need to parse mem=, so no new
724 * option should be mem= [also see Documentation/i386/boot.txt]
725 */ 717 */
726 if (!memcmp(from, "mem=", 4)) { 718 unsigned long long mem_size;
727 if (to != command_line)
728 to--;
729 if (!memcmp(from+4, "nopentium", 9)) {
730 from += 9+4;
731 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
732 disable_pse = 1;
733 } else {
734 /* If the user specifies memory size, we
735 * limit the BIOS-provided memory map to
736 * that size. exactmap can be used to specify
737 * the exact map. mem=number can be used to
738 * trim the existing memory map.
739 */
740 unsigned long long mem_size;
741 719
742 mem_size = memparse(from+4, &from); 720 mem_size = memparse(arg, &arg);
743 limit_regions(mem_size); 721 limit_regions(mem_size);
744 userdef=1; 722 user_defined_memmap = 1;
745 } 723 }
746 } 724 return 0;
747 725}
748 else if (!memcmp(from, "memmap=", 7)) { 726early_param("mem", parse_mem);
749 if (to != command_line)
750 to--;
751 if (!memcmp(from+7, "exactmap", 8)) {
752#ifdef CONFIG_CRASH_DUMP
753 /* If we are doing a crash dump, we
754 * still need to know the real mem
755 * size before original memory map is
756 * reset.
757 */
758 find_max_pfn();
759 saved_max_pfn = max_pfn;
760#endif
761 from += 8+7;
762 e820.nr_map = 0;
763 userdef = 1;
764 } else {
765 /* If the user specifies memory size, we
766 * limit the BIOS-provided memory map to
767 * that size. exactmap can be used to specify
768 * the exact map. mem=number can be used to
769 * trim the existing memory map.
770 */
771 unsigned long long start_at, mem_size;
772
773 mem_size = memparse(from+7, &from);
774 if (*from == '@') {
775 start_at = memparse(from+1, &from);
776 add_memory_region(start_at, mem_size, E820_RAM);
777 } else if (*from == '#') {
778 start_at = memparse(from+1, &from);
779 add_memory_region(start_at, mem_size, E820_ACPI);
780 } else if (*from == '$') {
781 start_at = memparse(from+1, &from);
782 add_memory_region(start_at, mem_size, E820_RESERVED);
783 } else {
784 limit_regions(mem_size);
785 userdef=1;
786 }
787 }
788 }
789
790 else if (!memcmp(from, "noexec=", 7))
791 noexec_setup(from + 7);
792 727
728static int __init parse_memmap(char *arg)
729{
730 if (!arg)
731 return -EINVAL;
793 732
794#ifdef CONFIG_X86_SMP 733 if (strcmp(arg, "exactmap") == 0) {
795 /* 734#ifdef CONFIG_CRASH_DUMP
796 * If the BIOS enumerates physical processors before logical, 735 /* If we are doing a crash dump, we
797 * maxcpus=N at enumeration-time can be used to disable HT. 736 * still need to know the real mem
737 * size before original memory map is
738 * reset.
798 */ 739 */
799 else if (!memcmp(from, "maxcpus=", 8)) { 740 find_max_pfn();
800 extern unsigned int maxcpus; 741 saved_max_pfn = max_pfn;
801
802 maxcpus = simple_strtoul(from + 8, NULL, 0);
803 }
804#endif 742#endif
805 743 e820.nr_map = 0;
806#ifdef CONFIG_ACPI 744 user_defined_memmap = 1;
807 /* "acpi=off" disables both ACPI table parsing and interpreter */ 745 } else {
808 else if (!memcmp(from, "acpi=off", 8)) { 746 /* If the user specifies memory size, we
809 disable_acpi(); 747 * limit the BIOS-provided memory map to
810 } 748 * that size. exactmap can be used to specify
811 749 * the exact map. mem=number can be used to
812 /* acpi=force to over-ride black-list */ 750 * trim the existing memory map.
813 else if (!memcmp(from, "acpi=force", 10)) { 751 */
814 acpi_force = 1; 752 unsigned long long start_at, mem_size;
815 acpi_ht = 1; 753
816 acpi_disabled = 0; 754 mem_size = memparse(arg, &arg);
817 } 755 if (*arg == '@') {
818 756 start_at = memparse(arg+1, &arg);
819 /* acpi=strict disables out-of-spec workarounds */ 757 add_memory_region(start_at, mem_size, E820_RAM);
820 else if (!memcmp(from, "acpi=strict", 11)) { 758 } else if (*arg == '#') {
821 acpi_strict = 1; 759 start_at = memparse(arg+1, &arg);
822 } 760 add_memory_region(start_at, mem_size, E820_ACPI);
823 761 } else if (*arg == '$') {
824 /* Limit ACPI just to boot-time to enable HT */ 762 start_at = memparse(arg+1, &arg);
825 else if (!memcmp(from, "acpi=ht", 7)) { 763 add_memory_region(start_at, mem_size, E820_RESERVED);
826 if (!acpi_force) 764 } else {
827 disable_acpi(); 765 limit_regions(mem_size);
828 acpi_ht = 1; 766 user_defined_memmap = 1;
829 }
830
831 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
832 else if (!memcmp(from, "pci=noacpi", 10)) {
833 acpi_disable_pci();
834 }
835 /* "acpi=noirq" disables ACPI interrupt routing */
836 else if (!memcmp(from, "acpi=noirq", 10)) {
837 acpi_noirq_set();
838 } 767 }
768 }
769 return 0;
770}
771early_param("memmap", parse_memmap);
839 772
840 else if (!memcmp(from, "acpi_sci=edge", 13)) 773#ifdef CONFIG_PROC_VMCORE
841 acpi_sci_flags.trigger = 1; 774/* elfcorehdr= specifies the location of elf core header
842 775 * stored by the crashed kernel.
843 else if (!memcmp(from, "acpi_sci=level", 14)) 776 */
844 acpi_sci_flags.trigger = 3; 777static int __init parse_elfcorehdr(char *arg)
845 778{
846 else if (!memcmp(from, "acpi_sci=high", 13)) 779 if (!arg)
847 acpi_sci_flags.polarity = 1; 780 return -EINVAL;
848
849 else if (!memcmp(from, "acpi_sci=low", 12))
850 acpi_sci_flags.polarity = 3;
851
852#ifdef CONFIG_X86_IO_APIC
853 else if (!memcmp(from, "acpi_skip_timer_override", 24))
854 acpi_skip_timer_override = 1;
855
856 if (!memcmp(from, "disable_timer_pin_1", 19))
857 disable_timer_pin_1 = 1;
858 if (!memcmp(from, "enable_timer_pin_1", 18))
859 disable_timer_pin_1 = -1;
860 781
861 /* disable IO-APIC */ 782 elfcorehdr_addr = memparse(arg, &arg);
862 else if (!memcmp(from, "noapic", 6)) 783 return 0;
863 disable_ioapic_setup(); 784}
864#endif /* CONFIG_X86_IO_APIC */ 785early_param("elfcorehdr", parse_elfcorehdr);
865#endif /* CONFIG_ACPI */ 786#endif /* CONFIG_PROC_VMCORE */
866 787
867#ifdef CONFIG_X86_LOCAL_APIC 788/*
868 /* enable local APIC */ 789 * highmem=size forces highmem to be exactly 'size' bytes.
869 else if (!memcmp(from, "lapic", 5)) 790 * This works even on boxes that have no highmem otherwise.
870 lapic_enable(); 791 * This also works to reduce highmem size on bigger boxes.
792 */
793static int __init parse_highmem(char *arg)
794{
795 if (!arg)
796 return -EINVAL;
871 797
872 /* disable local APIC */ 798 highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
873 else if (!memcmp(from, "nolapic", 6)) 799 return 0;
874 lapic_disable(); 800}
875#endif /* CONFIG_X86_LOCAL_APIC */ 801early_param("highmem", parse_highmem);
876 802
877#ifdef CONFIG_KEXEC 803/*
878 /* crashkernel=size@addr specifies the location to reserve for 804 * vmalloc=size forces the vmalloc area to be exactly 'size'
879 * a crash kernel. By reserving this memory we guarantee 805 * bytes. This can be used to increase (or decrease) the
880 * that linux never set's it up as a DMA target. 806 * vmalloc area - the default is 128m.
881 * Useful for holding code to do something appropriate 807 */
882 * after a kernel panic. 808static int __init parse_vmalloc(char *arg)
883 */ 809{
884 else if (!memcmp(from, "crashkernel=", 12)) { 810 if (!arg)
885 unsigned long size, base; 811 return -EINVAL;
886 size = memparse(from+12, &from);
887 if (*from == '@') {
888 base = memparse(from+1, &from);
889 /* FIXME: Do I want a sanity check
890 * to validate the memory range?
891 */
892 crashk_res.start = base;
893 crashk_res.end = base + size - 1;
894 }
895 }
896#endif
897#ifdef CONFIG_PROC_VMCORE
898 /* elfcorehdr= specifies the location of elf core header
899 * stored by the crashed kernel.
900 */
901 else if (!memcmp(from, "elfcorehdr=", 11))
902 elfcorehdr_addr = memparse(from+11, &from);
903#endif
904 812
905 /* 813 __VMALLOC_RESERVE = memparse(arg, &arg);
906 * highmem=size forces highmem to be exactly 'size' bytes. 814 return 0;
907 * This works even on boxes that have no highmem otherwise.
908 * This also works to reduce highmem size on bigger boxes.
909 */
910 else if (!memcmp(from, "highmem=", 8))
911 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
912
913 /*
914 * vmalloc=size forces the vmalloc area to be exactly 'size'
915 * bytes. This can be used to increase (or decrease) the
916 * vmalloc area - the default is 128m.
917 */
918 else if (!memcmp(from, "vmalloc=", 8))
919 __VMALLOC_RESERVE = memparse(from+8, &from);
920
921 next_char:
922 c = *(from++);
923 if (!c)
924 break;
925 if (COMMAND_LINE_SIZE <= ++len)
926 break;
927 *(to++) = c;
928 }
929 *to = '\0';
930 *cmdline_p = command_line;
931 if (userdef) {
932 printk(KERN_INFO "user-defined physical RAM map:\n");
933 print_memory_map("user");
934 }
935} 815}
816early_param("vmalloc", parse_vmalloc);
936 817
937/* 818/*
938 * reservetop=size reserves a hole at the top of the kernel address space which 819 * reservetop=size reserves a hole at the top of the kernel address space which
@@ -1189,6 +1070,14 @@ static unsigned long __init setup_memory(void)
1189 } 1070 }
1190 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 1071 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1191 pages_to_mb(highend_pfn - highstart_pfn)); 1072 pages_to_mb(highend_pfn - highstart_pfn));
1073 num_physpages = highend_pfn;
1074 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
1075#else
1076 num_physpages = max_low_pfn;
1077 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
1078#endif
1079#ifdef CONFIG_FLATMEM
1080 max_mapnr = num_physpages;
1192#endif 1081#endif
1193 printk(KERN_NOTICE "%ldMB LOWMEM available.\n", 1082 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1194 pages_to_mb(max_low_pfn)); 1083 pages_to_mb(max_low_pfn));
@@ -1518,17 +1407,15 @@ void __init setup_arch(char **cmdline_p)
1518 data_resource.start = virt_to_phys(_etext); 1407 data_resource.start = virt_to_phys(_etext);
1519 data_resource.end = virt_to_phys(_edata)-1; 1408 data_resource.end = virt_to_phys(_edata)-1;
1520 1409
1521 parse_cmdline_early(cmdline_p); 1410 parse_early_param();
1522 1411
1523#ifdef CONFIG_EARLY_PRINTK 1412 if (user_defined_memmap) {
1524 { 1413 printk(KERN_INFO "user-defined physical RAM map:\n");
1525 char *s = strstr(*cmdline_p, "earlyprintk="); 1414 print_memory_map("user");
1526 if (s) {
1527 setup_early_printk(strchr(s, '=') + 1);
1528 printk("early console enabled\n");
1529 }
1530 } 1415 }
1531#endif 1416
1417 strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
1418 *cmdline_p = command_line;
1532 1419
1533 max_low_pfn = setup_memory(); 1420 max_low_pfn = setup_memory();
1534 1421
@@ -1557,7 +1444,7 @@ void __init setup_arch(char **cmdline_p)
1557 dmi_scan_machine(); 1444 dmi_scan_machine();
1558 1445
1559#ifdef CONFIG_X86_GENERICARCH 1446#ifdef CONFIG_X86_GENERICARCH
1560 generic_apic_probe(*cmdline_p); 1447 generic_apic_probe();
1561#endif 1448#endif
1562 if (efi_enabled) 1449 if (efi_enabled)
1563 efi_map_memmap(); 1450 efi_map_memmap();
@@ -1569,9 +1456,11 @@ void __init setup_arch(char **cmdline_p)
1569 acpi_boot_table_init(); 1456 acpi_boot_table_init();
1570#endif 1457#endif
1571 1458
1459#ifdef CONFIG_PCI
1572#ifdef CONFIG_X86_IO_APIC 1460#ifdef CONFIG_X86_IO_APIC
1573 check_acpi_pci(); /* Checks more than just ACPI actually */ 1461 check_acpi_pci(); /* Checks more than just ACPI actually */
1574#endif 1462#endif
1463#endif
1575 1464
1576#ifdef CONFIG_ACPI 1465#ifdef CONFIG_ACPI
1577 acpi_boot_init(); 1466 acpi_boot_init();
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index efe07990e7fc..020d873b7d21 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -177,6 +177,9 @@ static void __devinit smp_store_cpu_info(int id)
177 */ 177 */
178 if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { 178 if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
179 179
180 if (num_possible_cpus() == 1)
181 goto valid_k7;
182
180 /* Athlon 660/661 is valid. */ 183 /* Athlon 660/661 is valid. */
181 if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) 184 if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
182 goto valid_k7; 185 goto valid_k7;
@@ -1376,7 +1379,8 @@ int __cpu_disable(void)
1376 */ 1379 */
1377 if (cpu == 0) 1380 if (cpu == 0)
1378 return -EBUSY; 1381 return -EBUSY;
1379 1382 if (nmi_watchdog == NMI_LOCAL_APIC)
1383 stop_apic_nmi_watchdog(NULL);
1380 clear_local_APIC(); 1384 clear_local_APIC();
1381 /* Allow any queued timer interrupts to get serviced */ 1385 /* Allow any queued timer interrupts to get serviced */
1382 local_irq_enable(); 1386 local_irq_enable();
@@ -1490,3 +1494,16 @@ void __init smp_intr_init(void)
1490 /* IPI for generic function call */ 1494 /* IPI for generic function call */
1491 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 1495 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
1492} 1496}
1497
1498/*
1499 * If the BIOS enumerates physical processors before logical,
1500 * maxcpus=N at enumeration-time can be used to disable HT.
1501 */
1502static int __init parse_maxcpus(char *arg)
1503{
1504 extern unsigned int maxcpus;
1505
1506 maxcpus = simple_strtoul(arg, NULL, 0);
1507 return 0;
1508}
1509early_param("maxcpus", parse_maxcpus);
diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c
deleted file mode 100644
index e62a037ab399..000000000000
--- a/arch/i386/kernel/stacktrace.c
+++ /dev/null
@@ -1,98 +0,0 @@
1/*
2 * arch/i386/kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/sched.h>
9#include <linux/stacktrace.h>
10
11static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
12{
13 return p > (void *)tinfo &&
14 p < (void *)tinfo + THREAD_SIZE - 3;
15}
16
17/*
18 * Save stack-backtrace addresses into a stack_trace buffer:
19 */
20static inline unsigned long
21save_context_stack(struct stack_trace *trace, unsigned int skip,
22 struct thread_info *tinfo, unsigned long *stack,
23 unsigned long ebp)
24{
25 unsigned long addr;
26
27#ifdef CONFIG_FRAME_POINTER
28 while (valid_stack_ptr(tinfo, (void *)ebp)) {
29 addr = *(unsigned long *)(ebp + 4);
30 if (!skip)
31 trace->entries[trace->nr_entries++] = addr;
32 else
33 skip--;
34 if (trace->nr_entries >= trace->max_entries)
35 break;
36 /*
37 * break out of recursive entries (such as
38 * end_of_stack_stop_unwind_function):
39 */
40 if (ebp == *(unsigned long *)ebp)
41 break;
42
43 ebp = *(unsigned long *)ebp;
44 }
45#else
46 while (valid_stack_ptr(tinfo, stack)) {
47 addr = *stack++;
48 if (__kernel_text_address(addr)) {
49 if (!skip)
50 trace->entries[trace->nr_entries++] = addr;
51 else
52 skip--;
53 if (trace->nr_entries >= trace->max_entries)
54 break;
55 }
56 }
57#endif
58
59 return ebp;
60}
61
62/*
63 * Save stack-backtrace addresses into a stack_trace buffer.
64 * If all_contexts is set, all contexts (hardirq, softirq and process)
65 * are saved. If not set then only the current context is saved.
66 */
67void save_stack_trace(struct stack_trace *trace,
68 struct task_struct *task, int all_contexts,
69 unsigned int skip)
70{
71 unsigned long ebp;
72 unsigned long *stack = &ebp;
73
74 WARN_ON(trace->nr_entries || !trace->max_entries);
75
76 if (!task || task == current) {
77 /* Grab ebp right from our regs: */
78 asm ("movl %%ebp, %0" : "=r" (ebp));
79 } else {
80 /* ebp is the last reg pushed by switch_to(): */
81 ebp = *(unsigned long *) task->thread.esp;
82 }
83
84 while (1) {
85 struct thread_info *context = (struct thread_info *)
86 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
87
88 ebp = save_context_stack(trace, skip, context, stack, ebp);
89 stack = (unsigned long *)context->previous_esp;
90 if (!all_contexts || !stack ||
91 trace->nr_entries >= trace->max_entries)
92 break;
93 trace->entries[trace->nr_entries++] = ULONG_MAX;
94 if (trace->nr_entries >= trace->max_entries)
95 break;
96 }
97}
98
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index dd63d4775398..7e639f78b0b9 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -317,3 +317,4 @@ ENTRY(sys_call_table)
317 .long sys_tee /* 315 */ 317 .long sys_tee /* 315 */
318 .long sys_vmsplice 318 .long sys_vmsplice
319 .long sys_move_pages 319 .long sys_move_pages
320 .long sys_getcpu
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 1302e4ab3c4f..86944acfb647 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -130,18 +130,33 @@ static int set_rtc_mmss(unsigned long nowtime)
130 130
131int timer_ack; 131int timer_ack;
132 132
133#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
134unsigned long profile_pc(struct pt_regs *regs) 133unsigned long profile_pc(struct pt_regs *regs)
135{ 134{
136 unsigned long pc = instruction_pointer(regs); 135 unsigned long pc = instruction_pointer(regs);
137 136
138 if (!user_mode_vm(regs) && in_lock_functions(pc)) 137#ifdef CONFIG_SMP
138 if (!user_mode_vm(regs) && in_lock_functions(pc)) {
139#ifdef CONFIG_FRAME_POINTER
139 return *(unsigned long *)(regs->ebp + 4); 140 return *(unsigned long *)(regs->ebp + 4);
140 141#else
142 unsigned long *sp;
143 if ((regs->xcs & 3) == 0)
144 sp = (unsigned long *)&regs->esp;
145 else
146 sp = (unsigned long *)regs->esp;
147 /* Return address is either directly at stack pointer
148 or above a saved eflags. Eflags has bits 22-31 zero,
149 kernel addresses don't. */
150 if (sp[0] >> 22)
151 return sp[0];
152 if (sp[1] >> 22)
153 return sp[1];
154#endif
155 }
156#endif
141 return pc; 157 return pc;
142} 158}
143EXPORT_SYMBOL(profile_pc); 159EXPORT_SYMBOL(profile_pc);
144#endif
145 160
146/* 161/*
147 * This is the same as the above, except we _also_ save the current 162 * This is the same as the above, except we _also_ save the current
diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c
index e2e281d4bcc8..07d6da36a825 100644
--- a/arch/i386/kernel/topology.c
+++ b/arch/i386/kernel/topology.c
@@ -28,6 +28,7 @@
28#include <linux/init.h> 28#include <linux/init.h>
29#include <linux/smp.h> 29#include <linux/smp.h>
30#include <linux/nodemask.h> 30#include <linux/nodemask.h>
31#include <linux/mmzone.h>
31#include <asm/cpu.h> 32#include <asm/cpu.h>
32 33
33static struct i386_cpu cpu_devices[NR_CPUS]; 34static struct i386_cpu cpu_devices[NR_CPUS];
@@ -55,34 +56,18 @@ EXPORT_SYMBOL(arch_register_cpu);
55EXPORT_SYMBOL(arch_unregister_cpu); 56EXPORT_SYMBOL(arch_unregister_cpu);
56#endif /*CONFIG_HOTPLUG_CPU*/ 57#endif /*CONFIG_HOTPLUG_CPU*/
57 58
58
59
60#ifdef CONFIG_NUMA
61#include <linux/mmzone.h>
62
63static int __init topology_init(void) 59static int __init topology_init(void)
64{ 60{
65 int i; 61 int i;
66 62
63#ifdef CONFIG_NUMA
67 for_each_online_node(i) 64 for_each_online_node(i)
68 register_one_node(i); 65 register_one_node(i);
66#endif /* CONFIG_NUMA */
69 67
70 for_each_present_cpu(i) 68 for_each_present_cpu(i)
71 arch_register_cpu(i); 69 arch_register_cpu(i);
72 return 0; 70 return 0;
73} 71}
74 72
75#else /* !CONFIG_NUMA */
76
77static int __init topology_init(void)
78{
79 int i;
80
81 for_each_present_cpu(i)
82 arch_register_cpu(i);
83 return 0;
84}
85
86#endif /* CONFIG_NUMA */
87
88subsys_initcall(topology_init); 73subsys_initcall(topology_init);
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 4fcc6690be99..21aa1cd57773 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -51,6 +51,7 @@
51#include <asm/smp.h> 51#include <asm/smp.h>
52#include <asm/arch_hooks.h> 52#include <asm/arch_hooks.h>
53#include <asm/kdebug.h> 53#include <asm/kdebug.h>
54#include <asm/stacktrace.h>
54 55
55#include <linux/module.h> 56#include <linux/module.h>
56 57
@@ -118,26 +119,16 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
118 p < (void *)tinfo + THREAD_SIZE - 3; 119 p < (void *)tinfo + THREAD_SIZE - 3;
119} 120}
120 121
121/*
122 * Print one address/symbol entries per line.
123 */
124static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
125{
126 printk(" [<%08lx>] ", addr);
127
128 print_symbol("%s\n", addr);
129}
130
131static inline unsigned long print_context_stack(struct thread_info *tinfo, 122static inline unsigned long print_context_stack(struct thread_info *tinfo,
132 unsigned long *stack, unsigned long ebp, 123 unsigned long *stack, unsigned long ebp,
133 char *log_lvl) 124 struct stacktrace_ops *ops, void *data)
134{ 125{
135 unsigned long addr; 126 unsigned long addr;
136 127
137#ifdef CONFIG_FRAME_POINTER 128#ifdef CONFIG_FRAME_POINTER
138 while (valid_stack_ptr(tinfo, (void *)ebp)) { 129 while (valid_stack_ptr(tinfo, (void *)ebp)) {
139 addr = *(unsigned long *)(ebp + 4); 130 addr = *(unsigned long *)(ebp + 4);
140 print_addr_and_symbol(addr, log_lvl); 131 ops->address(data, addr);
141 /* 132 /*
142 * break out of recursive entries (such as 133 * break out of recursive entries (such as
143 * end_of_stack_stop_unwind_function): 134 * end_of_stack_stop_unwind_function):
@@ -150,30 +141,37 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
150 while (valid_stack_ptr(tinfo, stack)) { 141 while (valid_stack_ptr(tinfo, stack)) {
151 addr = *stack++; 142 addr = *stack++;
152 if (__kernel_text_address(addr)) 143 if (__kernel_text_address(addr))
153 print_addr_and_symbol(addr, log_lvl); 144 ops->address(data, addr);
154 } 145 }
155#endif 146#endif
156 return ebp; 147 return ebp;
157} 148}
158 149
150struct ops_and_data {
151 struct stacktrace_ops *ops;
152 void *data;
153};
154
159static asmlinkage int 155static asmlinkage int
160show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) 156dump_trace_unwind(struct unwind_frame_info *info, void *data)
161{ 157{
158 struct ops_and_data *oad = (struct ops_and_data *)data;
162 int n = 0; 159 int n = 0;
163 160
164 while (unwind(info) == 0 && UNW_PC(info)) { 161 while (unwind(info) == 0 && UNW_PC(info)) {
165 n++; 162 n++;
166 print_addr_and_symbol(UNW_PC(info), log_lvl); 163 oad->ops->address(oad->data, UNW_PC(info));
167 if (arch_unw_user_mode(info)) 164 if (arch_unw_user_mode(info))
168 break; 165 break;
169 } 166 }
170 return n; 167 return n;
171} 168}
172 169
173static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 170void dump_trace(struct task_struct *task, struct pt_regs *regs,
174 unsigned long *stack, char *log_lvl) 171 unsigned long *stack,
172 struct stacktrace_ops *ops, void *data)
175{ 173{
176 unsigned long ebp; 174 unsigned long ebp = 0;
177 175
178 if (!task) 176 if (!task)
179 task = current; 177 task = current;
@@ -181,54 +179,116 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
181 if (call_trace >= 0) { 179 if (call_trace >= 0) {
182 int unw_ret = 0; 180 int unw_ret = 0;
183 struct unwind_frame_info info; 181 struct unwind_frame_info info;
182 struct ops_and_data oad = { .ops = ops, .data = data };
184 183
185 if (regs) { 184 if (regs) {
186 if (unwind_init_frame_info(&info, task, regs) == 0) 185 if (unwind_init_frame_info(&info, task, regs) == 0)
187 unw_ret = show_trace_unwind(&info, log_lvl); 186 unw_ret = dump_trace_unwind(&info, &oad);
188 } else if (task == current) 187 } else if (task == current)
189 unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl); 188 unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
190 else { 189 else {
191 if (unwind_init_blocked(&info, task) == 0) 190 if (unwind_init_blocked(&info, task) == 0)
192 unw_ret = show_trace_unwind(&info, log_lvl); 191 unw_ret = dump_trace_unwind(&info, &oad);
193 } 192 }
194 if (unw_ret > 0) { 193 if (unw_ret > 0) {
195 if (call_trace == 1 && !arch_unw_user_mode(&info)) { 194 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
196 print_symbol("DWARF2 unwinder stuck at %s\n", 195 ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
197 UNW_PC(&info)); 196 UNW_PC(&info));
198 if (UNW_SP(&info) >= PAGE_OFFSET) { 197 if (UNW_SP(&info) >= PAGE_OFFSET) {
199 printk("Leftover inexact backtrace:\n"); 198 ops->warning(data, "Leftover inexact backtrace:\n");
200 stack = (void *)UNW_SP(&info); 199 stack = (void *)UNW_SP(&info);
200 if (!stack)
201 return;
202 ebp = UNW_FP(&info);
201 } else 203 } else
202 printk("Full inexact backtrace again:\n"); 204 ops->warning(data, "Full inexact backtrace again:\n");
203 } else if (call_trace >= 1) 205 } else if (call_trace >= 1)
204 return; 206 return;
205 else 207 else
206 printk("Full inexact backtrace again:\n"); 208 ops->warning(data, "Full inexact backtrace again:\n");
207 } else 209 } else
208 printk("Inexact backtrace:\n"); 210 ops->warning(data, "Inexact backtrace:\n");
211 }
212 if (!stack) {
213 unsigned long dummy;
214 stack = &dummy;
215 if (task && task != current)
216 stack = (unsigned long *)task->thread.esp;
209 } 217 }
210 218
211 if (task == current) { 219#ifdef CONFIG_FRAME_POINTER
212 /* Grab ebp right from our regs */ 220 if (!ebp) {
213 asm ("movl %%ebp, %0" : "=r" (ebp) : ); 221 if (task == current) {
214 } else { 222 /* Grab ebp right from our regs */
215 /* ebp is the last reg pushed by switch_to */ 223 asm ("movl %%ebp, %0" : "=r" (ebp) : );
216 ebp = *(unsigned long *) task->thread.esp; 224 } else {
225 /* ebp is the last reg pushed by switch_to */
226 ebp = *(unsigned long *) task->thread.esp;
227 }
217 } 228 }
229#endif
218 230
219 while (1) { 231 while (1) {
220 struct thread_info *context; 232 struct thread_info *context;
221 context = (struct thread_info *) 233 context = (struct thread_info *)
222 ((unsigned long)stack & (~(THREAD_SIZE - 1))); 234 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
223 ebp = print_context_stack(context, stack, ebp, log_lvl); 235 ebp = print_context_stack(context, stack, ebp, ops, data);
236 /* Should be after the line below, but somewhere
237 in early boot context comes out corrupted and we
238 can't reference it -AK */
239 if (ops->stack(data, "IRQ") < 0)
240 break;
224 stack = (unsigned long*)context->previous_esp; 241 stack = (unsigned long*)context->previous_esp;
225 if (!stack) 242 if (!stack)
226 break; 243 break;
227 printk("%s =======================\n", log_lvl);
228 } 244 }
229} 245}
246EXPORT_SYMBOL(dump_trace);
247
248static void
249print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
250{
251 printk(data);
252 print_symbol(msg, symbol);
253 printk("\n");
254}
255
256static void print_trace_warning(void *data, char *msg)
257{
258 printk("%s%s\n", (char *)data, msg);
259}
260
261static int print_trace_stack(void *data, char *name)
262{
263 return 0;
264}
265
266/*
267 * Print one address/symbol entries per line.
268 */
269static void print_trace_address(void *data, unsigned long addr)
270{
271 printk("%s [<%08lx>] ", (char *)data, addr);
272 print_symbol("%s\n", addr);
273}
274
275static struct stacktrace_ops print_trace_ops = {
276 .warning = print_trace_warning,
277 .warning_symbol = print_trace_warning_symbol,
278 .stack = print_trace_stack,
279 .address = print_trace_address,
280};
281
282static void
283show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
284 unsigned long * stack, char *log_lvl)
285{
286 dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
287 printk("%s =======================\n", log_lvl);
288}
230 289
231void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack) 290void show_trace(struct task_struct *task, struct pt_regs *regs,
291 unsigned long * stack)
232{ 292{
233 show_trace_log_lvl(task, regs, stack, ""); 293 show_trace_log_lvl(task, regs, stack, "");
234} 294}
@@ -291,8 +351,9 @@ void show_registers(struct pt_regs *regs)
291 ss = regs->xss & 0xffff; 351 ss = regs->xss & 0xffff;
292 } 352 }
293 print_modules(); 353 print_modules();
294 printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n" 354 printk(KERN_EMERG "CPU: %d\n"
295 "EFLAGS: %08lx (%s %.*s) \n", 355 KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
356 KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
296 smp_processor_id(), 0xffff & regs->xcs, regs->eip, 357 smp_processor_id(), 0xffff & regs->xcs, regs->eip,
297 print_tainted(), regs->eflags, system_utsname.release, 358 print_tainted(), regs->eflags, system_utsname.release,
298 (int)strcspn(system_utsname.version, " "), 359 (int)strcspn(system_utsname.version, " "),
@@ -634,18 +695,24 @@ gp_in_kernel:
634 } 695 }
635} 696}
636 697
637static void mem_parity_error(unsigned char reason, struct pt_regs * regs) 698static __kprobes void
699mem_parity_error(unsigned char reason, struct pt_regs * regs)
638{ 700{
639 printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying " 701 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
640 "to continue\n"); 702 "CPU %d.\n", reason, smp_processor_id());
641 printk(KERN_EMERG "You probably have a hardware problem with your RAM " 703 printk(KERN_EMERG "You probably have a hardware problem with your RAM "
642 "chips\n"); 704 "chips\n");
705 if (panic_on_unrecovered_nmi)
706 panic("NMI: Not continuing");
707
708 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
643 709
644 /* Clear and disable the memory parity error line. */ 710 /* Clear and disable the memory parity error line. */
645 clear_mem_error(reason); 711 clear_mem_error(reason);
646} 712}
647 713
648static void io_check_error(unsigned char reason, struct pt_regs * regs) 714static __kprobes void
715io_check_error(unsigned char reason, struct pt_regs * regs)
649{ 716{
650 unsigned long i; 717 unsigned long i;
651 718
@@ -661,7 +728,8 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs)
661 outb(reason, 0x61); 728 outb(reason, 0x61);
662} 729}
663 730
664static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) 731static __kprobes void
732unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
665{ 733{
666#ifdef CONFIG_MCA 734#ifdef CONFIG_MCA
667 /* Might actually be able to figure out what the guilty party 735 /* Might actually be able to figure out what the guilty party
@@ -671,15 +739,18 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
671 return; 739 return;
672 } 740 }
673#endif 741#endif
674 printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 742 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
675 reason, smp_processor_id()); 743 "CPU %d.\n", reason, smp_processor_id());
676 printk("Dazed and confused, but trying to continue\n"); 744 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
677 printk("Do you have a strange power saving mode enabled?\n"); 745 if (panic_on_unrecovered_nmi)
746 panic("NMI: Not continuing");
747
748 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
678} 749}
679 750
680static DEFINE_SPINLOCK(nmi_print_lock); 751static DEFINE_SPINLOCK(nmi_print_lock);
681 752
682void die_nmi (struct pt_regs *regs, const char *msg) 753void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
683{ 754{
684 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == 755 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
685 NOTIFY_STOP) 756 NOTIFY_STOP)
@@ -711,7 +782,7 @@ void die_nmi (struct pt_regs *regs, const char *msg)
711 do_exit(SIGSEGV); 782 do_exit(SIGSEGV);
712} 783}
713 784
714static void default_do_nmi(struct pt_regs * regs) 785static __kprobes void default_do_nmi(struct pt_regs * regs)
715{ 786{
716 unsigned char reason = 0; 787 unsigned char reason = 0;
717 788
@@ -728,12 +799,12 @@ static void default_do_nmi(struct pt_regs * regs)
728 * Ok, so this is none of the documented NMI sources, 799 * Ok, so this is none of the documented NMI sources,
729 * so it must be the NMI watchdog. 800 * so it must be the NMI watchdog.
730 */ 801 */
731 if (nmi_watchdog) { 802 if (nmi_watchdog_tick(regs, reason))
732 nmi_watchdog_tick(regs);
733 return; 803 return;
734 } 804 if (!do_nmi_callback(regs, smp_processor_id()))
735#endif 805#endif
736 unknown_nmi_error(reason, regs); 806 unknown_nmi_error(reason, regs);
807
737 return; 808 return;
738 } 809 }
739 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 810 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
@@ -749,14 +820,7 @@ static void default_do_nmi(struct pt_regs * regs)
749 reassert_nmi(); 820 reassert_nmi();
750} 821}
751 822
752static int dummy_nmi_callback(struct pt_regs * regs, int cpu) 823fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
753{
754 return 0;
755}
756
757static nmi_callback_t nmi_callback = dummy_nmi_callback;
758
759fastcall void do_nmi(struct pt_regs * regs, long error_code)
760{ 824{
761 int cpu; 825 int cpu;
762 826
@@ -766,25 +830,11 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
766 830
767 ++nmi_count(cpu); 831 ++nmi_count(cpu);
768 832
769 if (!rcu_dereference(nmi_callback)(regs, cpu)) 833 default_do_nmi(regs);
770 default_do_nmi(regs);
771 834
772 nmi_exit(); 835 nmi_exit();
773} 836}
774 837
775void set_nmi_callback(nmi_callback_t callback)
776{
777 vmalloc_sync_all();
778 rcu_assign_pointer(nmi_callback, callback);
779}
780EXPORT_SYMBOL_GPL(set_nmi_callback);
781
782void unset_nmi_callback(void)
783{
784 nmi_callback = dummy_nmi_callback;
785}
786EXPORT_SYMBOL_GPL(unset_nmi_callback);
787
788#ifdef CONFIG_KPROBES 838#ifdef CONFIG_KPROBES
789fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) 839fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
790{ 840{
@@ -1124,20 +1174,6 @@ void __init trap_init_f00f_bug(void)
1124} 1174}
1125#endif 1175#endif
1126 1176
1127#define _set_gate(gate_addr,type,dpl,addr,seg) \
1128do { \
1129 int __d0, __d1; \
1130 __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
1131 "movw %4,%%dx\n\t" \
1132 "movl %%eax,%0\n\t" \
1133 "movl %%edx,%1" \
1134 :"=m" (*((long *) (gate_addr))), \
1135 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
1136 :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
1137 "3" ((char *) (addr)),"2" ((seg) << 16)); \
1138} while (0)
1139
1140
1141/* 1177/*
1142 * This needs to use 'idt_table' rather than 'idt', and 1178 * This needs to use 'idt_table' rather than 'idt', and
1143 * thus use the _nonmapped_ version of the IDT, as the 1179 * thus use the _nonmapped_ version of the IDT, as the
@@ -1146,7 +1182,7 @@ do { \
1146 */ 1182 */
1147void set_intr_gate(unsigned int n, void *addr) 1183void set_intr_gate(unsigned int n, void *addr)
1148{ 1184{
1149 _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); 1185 _set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS);
1150} 1186}
1151 1187
1152/* 1188/*
@@ -1154,22 +1190,22 @@ void set_intr_gate(unsigned int n, void *addr)
1154 */ 1190 */
1155static inline void set_system_intr_gate(unsigned int n, void *addr) 1191static inline void set_system_intr_gate(unsigned int n, void *addr)
1156{ 1192{
1157 _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS); 1193 _set_gate(n, DESCTYPE_INT | DESCTYPE_DPL3, addr, __KERNEL_CS);
1158} 1194}
1159 1195
1160static void __init set_trap_gate(unsigned int n, void *addr) 1196static void __init set_trap_gate(unsigned int n, void *addr)
1161{ 1197{
1162 _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); 1198 _set_gate(n, DESCTYPE_TRAP, addr, __KERNEL_CS);
1163} 1199}
1164 1200
1165static void __init set_system_gate(unsigned int n, void *addr) 1201static void __init set_system_gate(unsigned int n, void *addr)
1166{ 1202{
1167 _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); 1203 _set_gate(n, DESCTYPE_TRAP | DESCTYPE_DPL3, addr, __KERNEL_CS);
1168} 1204}
1169 1205
1170static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) 1206static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
1171{ 1207{
1172 _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); 1208 _set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3));
1173} 1209}
1174 1210
1175 1211
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 7e0d8dab2075..b8fa0a8b2e47 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -192,7 +192,7 @@ int recalibrate_cpu_khz(void)
192 192
193EXPORT_SYMBOL(recalibrate_cpu_khz); 193EXPORT_SYMBOL(recalibrate_cpu_khz);
194 194
195void tsc_init(void) 195void __init tsc_init(void)
196{ 196{
197 if (!cpu_has_tsc || tsc_disable) 197 if (!cpu_has_tsc || tsc_disable)
198 return; 198 return;
diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
index 914933e9ec3d..d86a548b8d54 100644
--- a/arch/i386/lib/Makefile
+++ b/arch/i386/lib/Makefile
@@ -4,6 +4,6 @@
4 4
5 5
6lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \ 6lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
7 bitops.o 7 bitops.o semaphore.o
8 8
9lib-$(CONFIG_X86_USE_3DNOW) += mmx.o 9lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff --git a/arch/i386/lib/semaphore.S b/arch/i386/lib/semaphore.S
new file mode 100644
index 000000000000..01f80b5c45d2
--- /dev/null
+++ b/arch/i386/lib/semaphore.S
@@ -0,0 +1,217 @@
1/*
2 * i386 semaphore implementation.
3 *
4 * (C) Copyright 1999 Linus Torvalds
5 *
6 * Portions Copyright 1999 Red Hat, Inc.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
14 */
15
16#include <linux/config.h>
17#include <linux/linkage.h>
18#include <asm/rwlock.h>
19#include <asm/alternative-asm.i>
20#include <asm/frame.i>
21#include <asm/dwarf2.h>
22
23/*
24 * The semaphore operations have a special calling sequence that
25 * allow us to do a simpler in-line version of them. These routines
26 * need to convert that sequence back into the C sequence when
27 * there is contention on the semaphore.
28 *
29 * %eax contains the semaphore pointer on entry. Save the C-clobbered
30 * registers (%eax, %edx and %ecx) except %eax whish is either a return
31 * value or just clobbered..
32 */
33 .section .sched.text
34ENTRY(__down_failed)
35 CFI_STARTPROC
36 FRAME
37 pushl %edx
38 CFI_ADJUST_CFA_OFFSET 4
39 CFI_REL_OFFSET edx,0
40 pushl %ecx
41 CFI_ADJUST_CFA_OFFSET 4
42 CFI_REL_OFFSET ecx,0
43 call __down
44 popl %ecx
45 CFI_ADJUST_CFA_OFFSET -4
46 CFI_RESTORE ecx
47 popl %edx
48 CFI_ADJUST_CFA_OFFSET -4
49 CFI_RESTORE edx
50 ENDFRAME
51 ret
52 CFI_ENDPROC
53 END(__down_failed)
54
55ENTRY(__down_failed_interruptible)
56 CFI_STARTPROC
57 FRAME
58 pushl %edx
59 CFI_ADJUST_CFA_OFFSET 4
60 CFI_REL_OFFSET edx,0
61 pushl %ecx
62 CFI_ADJUST_CFA_OFFSET 4
63 CFI_REL_OFFSET ecx,0
64 call __down_interruptible
65 popl %ecx
66 CFI_ADJUST_CFA_OFFSET -4
67 CFI_RESTORE ecx
68 popl %edx
69 CFI_ADJUST_CFA_OFFSET -4
70 CFI_RESTORE edx
71 ENDFRAME
72 ret
73 CFI_ENDPROC
74 END(__down_failed_interruptible)
75
76ENTRY(__down_failed_trylock)
77 CFI_STARTPROC
78 FRAME
79 pushl %edx
80 CFI_ADJUST_CFA_OFFSET 4
81 CFI_REL_OFFSET edx,0
82 pushl %ecx
83 CFI_ADJUST_CFA_OFFSET 4
84 CFI_REL_OFFSET ecx,0
85 call __down_trylock
86 popl %ecx
87 CFI_ADJUST_CFA_OFFSET -4
88 CFI_RESTORE ecx
89 popl %edx
90 CFI_ADJUST_CFA_OFFSET -4
91 CFI_RESTORE edx
92 ENDFRAME
93 ret
94 CFI_ENDPROC
95 END(__down_failed_trylock)
96
97ENTRY(__up_wakeup)
98 CFI_STARTPROC
99 FRAME
100 pushl %edx
101 CFI_ADJUST_CFA_OFFSET 4
102 CFI_REL_OFFSET edx,0
103 pushl %ecx
104 CFI_ADJUST_CFA_OFFSET 4
105 CFI_REL_OFFSET ecx,0
106 call __up
107 popl %ecx
108 CFI_ADJUST_CFA_OFFSET -4
109 CFI_RESTORE ecx
110 popl %edx
111 CFI_ADJUST_CFA_OFFSET -4
112 CFI_RESTORE edx
113 ENDFRAME
114 ret
115 CFI_ENDPROC
116 END(__up_wakeup)
117
118/*
119 * rw spinlock fallbacks
120 */
121#ifdef CONFIG_SMP
122ENTRY(__write_lock_failed)
123 CFI_STARTPROC simple
124 FRAME
1252: LOCK_PREFIX
126 addl $ RW_LOCK_BIAS,(%eax)
1271: rep; nop
128 cmpl $ RW_LOCK_BIAS,(%eax)
129 jne 1b
130 LOCK_PREFIX
131 subl $ RW_LOCK_BIAS,(%eax)
132 jnz 2b
133 ENDFRAME
134 ret
135 CFI_ENDPROC
136 END(__write_lock_failed)
137
138ENTRY(__read_lock_failed)
139 CFI_STARTPROC
140 FRAME
1412: LOCK_PREFIX
142 incl (%eax)
1431: rep; nop
144 cmpl $1,(%eax)
145 js 1b
146 LOCK_PREFIX
147 decl (%eax)
148 js 2b
149 ENDFRAME
150 ret
151 CFI_ENDPROC
152 END(__read_lock_failed)
153
154#endif
155
156/* Fix up special calling conventions */
157ENTRY(call_rwsem_down_read_failed)
158 CFI_STARTPROC
159 push %ecx
160 CFI_ADJUST_CFA_OFFSET 4
161 CFI_REL_OFFSET ecx,0
162 push %edx
163 CFI_ADJUST_CFA_OFFSET 4
164 CFI_REL_OFFSET edx,0
165 call rwsem_down_read_failed
166 pop %edx
167 CFI_ADJUST_CFA_OFFSET -4
168 pop %ecx
169 CFI_ADJUST_CFA_OFFSET -4
170 ret
171 CFI_ENDPROC
172 END(call_rwsem_down_read_failed)
173
174ENTRY(call_rwsem_down_write_failed)
175 CFI_STARTPROC
176 push %ecx
177 CFI_ADJUST_CFA_OFFSET 4
178 CFI_REL_OFFSET ecx,0
179 calll rwsem_down_write_failed
180 pop %ecx
181 CFI_ADJUST_CFA_OFFSET -4
182 ret
183 CFI_ENDPROC
184 END(call_rwsem_down_write_failed)
185
186ENTRY(call_rwsem_wake)
187 CFI_STARTPROC
188 decw %dx /* do nothing if still outstanding active readers */
189 jnz 1f
190 push %ecx
191 CFI_ADJUST_CFA_OFFSET 4
192 CFI_REL_OFFSET ecx,0
193 call rwsem_wake
194 pop %ecx
195 CFI_ADJUST_CFA_OFFSET -4
1961: ret
197 CFI_ENDPROC
198 END(call_rwsem_wake)
199
200/* Fix up special calling conventions */
201ENTRY(call_rwsem_downgrade_wake)
202 CFI_STARTPROC
203 push %ecx
204 CFI_ADJUST_CFA_OFFSET 4
205 CFI_REL_OFFSET ecx,0
206 push %edx
207 CFI_ADJUST_CFA_OFFSET 4
208 CFI_REL_OFFSET edx,0
209 call rwsem_downgrade_wake
210 pop %edx
211 CFI_ADJUST_CFA_OFFSET -4
212 pop %ecx
213 CFI_ADJUST_CFA_OFFSET -4
214 ret
215 CFI_ENDPROC
216 END(call_rwsem_downgrade_wake)
217
diff --git a/arch/i386/mach-generic/bigsmp.c b/arch/i386/mach-generic/bigsmp.c
index ef7a6e6fcb9f..33d9f93557ba 100644
--- a/arch/i386/mach-generic/bigsmp.c
+++ b/arch/i386/mach-generic/bigsmp.c
@@ -5,6 +5,7 @@
5#define APIC_DEFINITION 1 5#define APIC_DEFINITION 1
6#include <linux/threads.h> 6#include <linux/threads.h>
7#include <linux/cpumask.h> 7#include <linux/cpumask.h>
8#include <asm/smp.h>
8#include <asm/mpspec.h> 9#include <asm/mpspec.h>
9#include <asm/genapic.h> 10#include <asm/genapic.h>
10#include <asm/fixmap.h> 11#include <asm/fixmap.h>
diff --git a/arch/i386/mach-generic/es7000.c b/arch/i386/mach-generic/es7000.c
index 845cdd0b3593..aa144d82334d 100644
--- a/arch/i386/mach-generic/es7000.c
+++ b/arch/i386/mach-generic/es7000.c
@@ -4,6 +4,7 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
7#include <asm/mpspec.h> 8#include <asm/mpspec.h>
8#include <asm/genapic.h> 9#include <asm/genapic.h>
9#include <asm/fixmap.h> 10#include <asm/fixmap.h>
diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c
index bcd1bcfaa723..94b1fd9cbe3c 100644
--- a/arch/i386/mach-generic/probe.c
+++ b/arch/i386/mach-generic/probe.c
@@ -9,6 +9,7 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/ctype.h> 10#include <linux/ctype.h>
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/errno.h>
12#include <asm/fixmap.h> 13#include <asm/fixmap.h>
13#include <asm/mpspec.h> 14#include <asm/mpspec.h>
14#include <asm/apicdef.h> 15#include <asm/apicdef.h>
@@ -29,7 +30,24 @@ struct genapic *apic_probe[] __initdata = {
29 NULL, 30 NULL,
30}; 31};
31 32
32static int cmdline_apic; 33static int cmdline_apic __initdata;
34static int __init parse_apic(char *arg)
35{
36 int i;
37
38 if (!arg)
39 return -EINVAL;
40
41 for (i = 0; apic_probe[i]; i++) {
42 if (!strcmp(apic_probe[i]->name, arg)) {
43 genapic = apic_probe[i];
44 cmdline_apic = 1;
45 return 0;
46 }
47 }
48 return -ENOENT;
49}
50early_param("apic", parse_apic);
33 51
34void __init generic_bigsmp_probe(void) 52void __init generic_bigsmp_probe(void)
35{ 53{
@@ -48,40 +66,20 @@ void __init generic_bigsmp_probe(void)
48 } 66 }
49} 67}
50 68
51void __init generic_apic_probe(char *command_line) 69void __init generic_apic_probe(void)
52{ 70{
53 char *s; 71 if (!cmdline_apic) {
54 int i; 72 int i;
55 int changed = 0; 73 for (i = 0; apic_probe[i]; i++) {
56 74 if (apic_probe[i]->probe()) {
57 s = strstr(command_line, "apic=");
58 if (s && (s == command_line || isspace(s[-1]))) {
59 char *p = strchr(s, ' '), old;
60 if (!p)
61 p = strchr(s, '\0');
62 old = *p;
63 *p = 0;
64 for (i = 0; !changed && apic_probe[i]; i++) {
65 if (!strcmp(apic_probe[i]->name, s+5)) {
66 changed = 1;
67 genapic = apic_probe[i]; 75 genapic = apic_probe[i];
76 break;
68 } 77 }
69 } 78 }
70 if (!changed) 79 /* Not visible without early console */
71 printk(KERN_ERR "Unknown genapic `%s' specified.\n", s); 80 if (!apic_probe[i])
72 *p = old; 81 panic("Didn't find an APIC driver");
73 cmdline_apic = changed;
74 }
75 for (i = 0; !changed && apic_probe[i]; i++) {
76 if (apic_probe[i]->probe()) {
77 changed = 1;
78 genapic = apic_probe[i];
79 }
80 } 82 }
81 /* Not visible without early console */
82 if (!changed)
83 panic("Didn't find an APIC driver");
84
85 printk(KERN_INFO "Using APIC driver %s\n", genapic->name); 83 printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
86} 84}
87 85
@@ -119,7 +117,9 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
119 return 0; 117 return 0;
120} 118}
121 119
120#ifdef CONFIG_SMP
122int hard_smp_processor_id(void) 121int hard_smp_processor_id(void)
123{ 122{
124 return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); 123 return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID));
125} 124}
125#endif
diff --git a/arch/i386/mach-generic/summit.c b/arch/i386/mach-generic/summit.c
index b73501ddd653..f7e5d66648dc 100644
--- a/arch/i386/mach-generic/summit.c
+++ b/arch/i386/mach-generic/summit.c
@@ -4,6 +4,7 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
7#include <asm/mpspec.h> 8#include <asm/mpspec.h>
8#include <asm/genapic.h> 9#include <asm/genapic.h>
9#include <asm/fixmap.h> 10#include <asm/fixmap.h>
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index fb5d8b747de4..941d1a5ebabb 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -322,6 +322,11 @@ unsigned long __init setup_memory(void)
322 highstart_pfn = system_max_low_pfn; 322 highstart_pfn = system_max_low_pfn;
323 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 323 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
324 pages_to_mb(highend_pfn - highstart_pfn)); 324 pages_to_mb(highend_pfn - highstart_pfn));
325 num_physpages = highend_pfn;
326 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
327#else
328 num_physpages = system_max_low_pfn;
329 high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1;
325#endif 330#endif
326 printk(KERN_NOTICE "%ldMB LOWMEM available.\n", 331 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
327 pages_to_mb(system_max_low_pfn)); 332 pages_to_mb(system_max_low_pfn));
diff --git a/arch/i386/mm/extable.c b/arch/i386/mm/extable.c
index de03c5430abc..0ce4f22a2635 100644
--- a/arch/i386/mm/extable.c
+++ b/arch/i386/mm/extable.c
@@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs)
11 const struct exception_table_entry *fixup; 11 const struct exception_table_entry *fixup;
12 12
13#ifdef CONFIG_PNPBIOS 13#ifdef CONFIG_PNPBIOS
14 if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3))) 14 if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs)))
15 { 15 {
16 extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; 16 extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
17 extern u32 pnp_bios_is_utter_crap; 17 extern u32 pnp_bios_is_utter_crap;
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index f7279468323a..5e17a3f43b41 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -27,21 +27,24 @@
27#include <asm/uaccess.h> 27#include <asm/uaccess.h>
28#include <asm/desc.h> 28#include <asm/desc.h>
29#include <asm/kdebug.h> 29#include <asm/kdebug.h>
30#include <asm/segment.h>
30 31
31extern void die(const char *,struct pt_regs *,long); 32extern void die(const char *,struct pt_regs *,long);
32 33
33#ifdef CONFIG_KPROBES 34static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
34ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); 35
35int register_page_fault_notifier(struct notifier_block *nb) 36int register_page_fault_notifier(struct notifier_block *nb)
36{ 37{
37 vmalloc_sync_all(); 38 vmalloc_sync_all();
38 return atomic_notifier_chain_register(&notify_page_fault_chain, nb); 39 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
39} 40}
41EXPORT_SYMBOL_GPL(register_page_fault_notifier);
40 42
41int unregister_page_fault_notifier(struct notifier_block *nb) 43int unregister_page_fault_notifier(struct notifier_block *nb)
42{ 44{
43 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb); 45 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
44} 46}
47EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
45 48
46static inline int notify_page_fault(enum die_val val, const char *str, 49static inline int notify_page_fault(enum die_val val, const char *str,
47 struct pt_regs *regs, long err, int trap, int sig) 50 struct pt_regs *regs, long err, int trap, int sig)
@@ -55,14 +58,6 @@ static inline int notify_page_fault(enum die_val val, const char *str,
55 }; 58 };
56 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args); 59 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
57} 60}
58#else
59static inline int notify_page_fault(enum die_val val, const char *str,
60 struct pt_regs *regs, long err, int trap, int sig)
61{
62 return NOTIFY_DONE;
63}
64#endif
65
66 61
67/* 62/*
68 * Unlock any spinlocks which will prevent us from getting the 63 * Unlock any spinlocks which will prevent us from getting the
@@ -119,10 +114,10 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
119 } 114 }
120 115
121 /* The standard kernel/user address space limit. */ 116 /* The standard kernel/user address space limit. */
122 *eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg; 117 *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
123 118
124 /* By far the most common cases. */ 119 /* By far the most common cases. */
125 if (likely(seg == __USER_CS || seg == __KERNEL_CS)) 120 if (likely(SEGMENT_IS_FLAT_CODE(seg)))
126 return eip; 121 return eip;
127 122
128 /* Check the segment exists, is within the current LDT/GDT size, 123 /* Check the segment exists, is within the current LDT/GDT size,
@@ -436,11 +431,7 @@ good_area:
436 write = 0; 431 write = 0;
437 switch (error_code & 3) { 432 switch (error_code & 3) {
438 default: /* 3: write, present */ 433 default: /* 3: write, present */
439#ifdef TEST_VERIFY_AREA 434 /* fall through */
440 if (regs->cs == KERNEL_CS)
441 printk("WP fault at %08lx\n", regs->eip);
442#endif
443 /* fall through */
444 case 2: /* write, not present */ 435 case 2: /* write, not present */
445 if (!(vma->vm_flags & VM_WRITE)) 436 if (!(vma->vm_flags & VM_WRITE))
446 goto bad_area; 437 goto bad_area;
diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c
index b6eb4dcb8777..ba44000b9069 100644
--- a/arch/i386/mm/highmem.c
+++ b/arch/i386/mm/highmem.c
@@ -54,7 +54,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
54 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; 54 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
55 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); 55 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
56 56
57 if (vaddr < FIXADDR_START) { // FIXME 57 if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
58 dec_preempt_count(); 58 dec_preempt_count();
59 preempt_check_resched(); 59 preempt_check_resched();
60 return; 60 return;
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index efd0bcdac65d..4a5a914b3432 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -435,16 +435,22 @@ u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
435 * on Enable 435 * on Enable
436 * off Disable 436 * off Disable
437 */ 437 */
438void __init noexec_setup(const char *str) 438static int __init noexec_setup(char *str)
439{ 439{
440 if (!strncmp(str, "on",2) && cpu_has_nx) { 440 if (!str || !strcmp(str, "on")) {
441 __supported_pte_mask |= _PAGE_NX; 441 if (cpu_has_nx) {
442 disable_nx = 0; 442 __supported_pte_mask |= _PAGE_NX;
443 } else if (!strncmp(str,"off",3)) { 443 disable_nx = 0;
444 }
445 } else if (!strcmp(str,"off")) {
444 disable_nx = 1; 446 disable_nx = 1;
445 __supported_pte_mask &= ~_PAGE_NX; 447 __supported_pte_mask &= ~_PAGE_NX;
446 } 448 } else
449 return -EINVAL;
450
451 return 0;
447} 452}
453early_param("noexec", noexec_setup);
448 454
449int nx_enabled = 0; 455int nx_enabled = 0;
450#ifdef CONFIG_X86_PAE 456#ifdef CONFIG_X86_PAE
@@ -552,18 +558,6 @@ static void __init test_wp_bit(void)
552 } 558 }
553} 559}
554 560
555static void __init set_max_mapnr_init(void)
556{
557#ifdef CONFIG_HIGHMEM
558 num_physpages = highend_pfn;
559#else
560 num_physpages = max_low_pfn;
561#endif
562#ifdef CONFIG_FLATMEM
563 max_mapnr = num_physpages;
564#endif
565}
566
567static struct kcore_list kcore_mem, kcore_vmalloc; 561static struct kcore_list kcore_mem, kcore_vmalloc;
568 562
569void __init mem_init(void) 563void __init mem_init(void)
@@ -590,14 +584,6 @@ void __init mem_init(void)
590 } 584 }
591#endif 585#endif
592 586
593 set_max_mapnr_init();
594
595#ifdef CONFIG_HIGHMEM
596 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
597#else
598 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
599#endif
600
601 /* this will put all low memory onto the freelists */ 587 /* this will put all low memory onto the freelists */
602 totalram_pages += free_all_bootmem(); 588 totalram_pages += free_all_bootmem();
603 589
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index 5f8dc8a21bd7..3700eef78743 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -17,14 +17,15 @@
17#include <asm/nmi.h> 17#include <asm/nmi.h>
18#include <asm/msr.h> 18#include <asm/msr.h>
19#include <asm/apic.h> 19#include <asm/apic.h>
20#include <asm/kdebug.h>
20 21
21#include "op_counter.h" 22#include "op_counter.h"
22#include "op_x86_model.h" 23#include "op_x86_model.h"
23 24
24static struct op_x86_model_spec const * model; 25static struct op_x86_model_spec const * model;
25static struct op_msrs cpu_msrs[NR_CPUS]; 26static struct op_msrs cpu_msrs[NR_CPUS];
26static unsigned long saved_lvtpc[NR_CPUS]; 27static unsigned long saved_lvtpc[NR_CPUS];
27 28
28static int nmi_start(void); 29static int nmi_start(void);
29static void nmi_stop(void); 30static void nmi_stop(void);
30 31
@@ -82,13 +83,24 @@ static void exit_driverfs(void)
82#define exit_driverfs() do { } while (0) 83#define exit_driverfs() do { } while (0)
83#endif /* CONFIG_PM */ 84#endif /* CONFIG_PM */
84 85
85 86static int profile_exceptions_notify(struct notifier_block *self,
86static int nmi_callback(struct pt_regs * regs, int cpu) 87 unsigned long val, void *data)
87{ 88{
88 return model->check_ctrs(regs, &cpu_msrs[cpu]); 89 struct die_args *args = (struct die_args *)data;
90 int ret = NOTIFY_DONE;
91 int cpu = smp_processor_id();
92
93 switch(val) {
94 case DIE_NMI:
95 if (model->check_ctrs(args->regs, &cpu_msrs[cpu]))
96 ret = NOTIFY_STOP;
97 break;
98 default:
99 break;
100 }
101 return ret;
89} 102}
90 103
91
92static void nmi_cpu_save_registers(struct op_msrs * msrs) 104static void nmi_cpu_save_registers(struct op_msrs * msrs)
93{ 105{
94 unsigned int const nr_ctrs = model->num_counters; 106 unsigned int const nr_ctrs = model->num_counters;
@@ -98,15 +110,19 @@ static void nmi_cpu_save_registers(struct op_msrs * msrs)
98 unsigned int i; 110 unsigned int i;
99 111
100 for (i = 0; i < nr_ctrs; ++i) { 112 for (i = 0; i < nr_ctrs; ++i) {
101 rdmsr(counters[i].addr, 113 if (counters[i].addr){
102 counters[i].saved.low, 114 rdmsr(counters[i].addr,
103 counters[i].saved.high); 115 counters[i].saved.low,
116 counters[i].saved.high);
117 }
104 } 118 }
105 119
106 for (i = 0; i < nr_ctrls; ++i) { 120 for (i = 0; i < nr_ctrls; ++i) {
107 rdmsr(controls[i].addr, 121 if (controls[i].addr){
108 controls[i].saved.low, 122 rdmsr(controls[i].addr,
109 controls[i].saved.high); 123 controls[i].saved.low,
124 controls[i].saved.high);
125 }
110 } 126 }
111} 127}
112 128
@@ -170,27 +186,29 @@ static void nmi_cpu_setup(void * dummy)
170 apic_write(APIC_LVTPC, APIC_DM_NMI); 186 apic_write(APIC_LVTPC, APIC_DM_NMI);
171} 187}
172 188
189static struct notifier_block profile_exceptions_nb = {
190 .notifier_call = profile_exceptions_notify,
191 .next = NULL,
192 .priority = 0
193};
173 194
174static int nmi_setup(void) 195static int nmi_setup(void)
175{ 196{
197 int err=0;
198
176 if (!allocate_msrs()) 199 if (!allocate_msrs())
177 return -ENOMEM; 200 return -ENOMEM;
178 201
179 /* We walk a thin line between law and rape here. 202 if ((err = register_die_notifier(&profile_exceptions_nb))){
180 * We need to be careful to install our NMI handler
181 * without actually triggering any NMIs as this will
182 * break the core code horrifically.
183 */
184 if (reserve_lapic_nmi() < 0) {
185 free_msrs(); 203 free_msrs();
186 return -EBUSY; 204 return err;
187 } 205 }
206
188 /* We need to serialize save and setup for HT because the subset 207 /* We need to serialize save and setup for HT because the subset
189 * of msrs are distinct for save and setup operations 208 * of msrs are distinct for save and setup operations
190 */ 209 */
191 on_each_cpu(nmi_save_registers, NULL, 0, 1); 210 on_each_cpu(nmi_save_registers, NULL, 0, 1);
192 on_each_cpu(nmi_cpu_setup, NULL, 0, 1); 211 on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
193 set_nmi_callback(nmi_callback);
194 nmi_enabled = 1; 212 nmi_enabled = 1;
195 return 0; 213 return 0;
196} 214}
@@ -205,15 +223,19 @@ static void nmi_restore_registers(struct op_msrs * msrs)
205 unsigned int i; 223 unsigned int i;
206 224
207 for (i = 0; i < nr_ctrls; ++i) { 225 for (i = 0; i < nr_ctrls; ++i) {
208 wrmsr(controls[i].addr, 226 if (controls[i].addr){
209 controls[i].saved.low, 227 wrmsr(controls[i].addr,
210 controls[i].saved.high); 228 controls[i].saved.low,
229 controls[i].saved.high);
230 }
211 } 231 }
212 232
213 for (i = 0; i < nr_ctrs; ++i) { 233 for (i = 0; i < nr_ctrs; ++i) {
214 wrmsr(counters[i].addr, 234 if (counters[i].addr){
215 counters[i].saved.low, 235 wrmsr(counters[i].addr,
216 counters[i].saved.high); 236 counters[i].saved.low,
237 counters[i].saved.high);
238 }
217 } 239 }
218} 240}
219 241
@@ -234,6 +256,7 @@ static void nmi_cpu_shutdown(void * dummy)
234 apic_write(APIC_LVTPC, saved_lvtpc[cpu]); 256 apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
235 apic_write(APIC_LVTERR, v); 257 apic_write(APIC_LVTERR, v);
236 nmi_restore_registers(msrs); 258 nmi_restore_registers(msrs);
259 model->shutdown(msrs);
237} 260}
238 261
239 262
@@ -241,8 +264,7 @@ static void nmi_shutdown(void)
241{ 264{
242 nmi_enabled = 0; 265 nmi_enabled = 0;
243 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); 266 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
244 unset_nmi_callback(); 267 unregister_die_notifier(&profile_exceptions_nb);
245 release_lapic_nmi();
246 free_msrs(); 268 free_msrs();
247} 269}
248 270
@@ -284,6 +306,14 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root)
284 struct dentry * dir; 306 struct dentry * dir;
285 char buf[4]; 307 char buf[4];
286 308
309 /* quick little hack to _not_ expose a counter if it is not
310 * available for use. This should protect userspace app.
311 * NOTE: assumes 1:1 mapping here (that counters are organized
312 * sequentially in their struct assignment).
313 */
314 if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
315 continue;
316
287 snprintf(buf, sizeof(buf), "%d", i); 317 snprintf(buf, sizeof(buf), "%d", i);
288 dir = oprofilefs_mkdir(sb, root, buf); 318 dir = oprofilefs_mkdir(sb, root, buf);
289 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 319 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c
index 930a1127bb30..abf0ba52a635 100644
--- a/arch/i386/oprofile/nmi_timer_int.c
+++ b/arch/i386/oprofile/nmi_timer_int.c
@@ -17,34 +17,49 @@
17#include <asm/nmi.h> 17#include <asm/nmi.h>
18#include <asm/apic.h> 18#include <asm/apic.h>
19#include <asm/ptrace.h> 19#include <asm/ptrace.h>
20#include <asm/kdebug.h>
20 21
21static int nmi_timer_callback(struct pt_regs * regs, int cpu) 22static int profile_timer_exceptions_notify(struct notifier_block *self,
23 unsigned long val, void *data)
22{ 24{
23 oprofile_add_sample(regs, 0); 25 struct die_args *args = (struct die_args *)data;
24 return 1; 26 int ret = NOTIFY_DONE;
27
28 switch(val) {
29 case DIE_NMI:
30 oprofile_add_sample(args->regs, 0);
31 ret = NOTIFY_STOP;
32 break;
33 default:
34 break;
35 }
36 return ret;
25} 37}
26 38
39static struct notifier_block profile_timer_exceptions_nb = {
40 .notifier_call = profile_timer_exceptions_notify,
41 .next = NULL,
42 .priority = 0
43};
44
27static int timer_start(void) 45static int timer_start(void)
28{ 46{
29 disable_timer_nmi_watchdog(); 47 if (register_die_notifier(&profile_timer_exceptions_nb))
30 set_nmi_callback(nmi_timer_callback); 48 return 1;
31 return 0; 49 return 0;
32} 50}
33 51
34 52
35static void timer_stop(void) 53static void timer_stop(void)
36{ 54{
37 enable_timer_nmi_watchdog(); 55 unregister_die_notifier(&profile_timer_exceptions_nb);
38 unset_nmi_callback();
39 synchronize_sched(); /* Allow already-started NMIs to complete. */ 56 synchronize_sched(); /* Allow already-started NMIs to complete. */
40} 57}
41 58
42 59
43int __init op_nmi_timer_init(struct oprofile_operations * ops) 60int __init op_nmi_timer_init(struct oprofile_operations * ops)
44{ 61{
45 extern int nmi_active; 62 if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
46
47 if (nmi_active <= 0)
48 return -ENODEV; 63 return -ENODEV;
49 64
50 ops->start = timer_start; 65 ops->start = timer_start;
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c
index 693bdea4a52b..3057a19e4641 100644
--- a/arch/i386/oprofile/op_model_athlon.c
+++ b/arch/i386/oprofile/op_model_athlon.c
@@ -21,10 +21,12 @@
21#define NUM_COUNTERS 4 21#define NUM_COUNTERS 4
22#define NUM_CONTROLS 4 22#define NUM_CONTROLS 4
23 23
24#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
24#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) 25#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
25#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0) 26#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
26#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) 27#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
27 28
29#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
28#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0) 30#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
29#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0) 31#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
30#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) 32#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
@@ -40,15 +42,21 @@ static unsigned long reset_value[NUM_COUNTERS];
40 42
41static void athlon_fill_in_addresses(struct op_msrs * const msrs) 43static void athlon_fill_in_addresses(struct op_msrs * const msrs)
42{ 44{
43 msrs->counters[0].addr = MSR_K7_PERFCTR0; 45 int i;
44 msrs->counters[1].addr = MSR_K7_PERFCTR1; 46
45 msrs->counters[2].addr = MSR_K7_PERFCTR2; 47 for (i=0; i < NUM_COUNTERS; i++) {
46 msrs->counters[3].addr = MSR_K7_PERFCTR3; 48 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
47 49 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
48 msrs->controls[0].addr = MSR_K7_EVNTSEL0; 50 else
49 msrs->controls[1].addr = MSR_K7_EVNTSEL1; 51 msrs->counters[i].addr = 0;
50 msrs->controls[2].addr = MSR_K7_EVNTSEL2; 52 }
51 msrs->controls[3].addr = MSR_K7_EVNTSEL3; 53
54 for (i=0; i < NUM_CONTROLS; i++) {
55 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
56 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
57 else
58 msrs->controls[i].addr = 0;
59 }
52} 60}
53 61
54 62
@@ -59,19 +67,23 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs)
59 67
60 /* clear all counters */ 68 /* clear all counters */
61 for (i = 0 ; i < NUM_CONTROLS; ++i) { 69 for (i = 0 ; i < NUM_CONTROLS; ++i) {
70 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
71 continue;
62 CTRL_READ(low, high, msrs, i); 72 CTRL_READ(low, high, msrs, i);
63 CTRL_CLEAR(low); 73 CTRL_CLEAR(low);
64 CTRL_WRITE(low, high, msrs, i); 74 CTRL_WRITE(low, high, msrs, i);
65 } 75 }
66 76
67 /* avoid a false detection of ctr overflows in NMI handler */ 77 /* avoid a false detection of ctr overflows in NMI handler */
68 for (i = 0; i < NUM_COUNTERS; ++i) { 78 for (i = 0; i < NUM_COUNTERS; ++i) {
79 if (unlikely(!CTR_IS_RESERVED(msrs,i)))
80 continue;
69 CTR_WRITE(1, msrs, i); 81 CTR_WRITE(1, msrs, i);
70 } 82 }
71 83
72 /* enable active counters */ 84 /* enable active counters */
73 for (i = 0; i < NUM_COUNTERS; ++i) { 85 for (i = 0; i < NUM_COUNTERS; ++i) {
74 if (counter_config[i].enabled) { 86 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
75 reset_value[i] = counter_config[i].count; 87 reset_value[i] = counter_config[i].count;
76 88
77 CTR_WRITE(counter_config[i].count, msrs, i); 89 CTR_WRITE(counter_config[i].count, msrs, i);
@@ -98,6 +110,8 @@ static int athlon_check_ctrs(struct pt_regs * const regs,
98 int i; 110 int i;
99 111
100 for (i = 0 ; i < NUM_COUNTERS; ++i) { 112 for (i = 0 ; i < NUM_COUNTERS; ++i) {
113 if (!reset_value[i])
114 continue;
101 CTR_READ(low, high, msrs, i); 115 CTR_READ(low, high, msrs, i);
102 if (CTR_OVERFLOWED(low)) { 116 if (CTR_OVERFLOWED(low)) {
103 oprofile_add_sample(regs, i); 117 oprofile_add_sample(regs, i);
@@ -132,12 +146,27 @@ static void athlon_stop(struct op_msrs const * const msrs)
132 /* Subtle: stop on all counters to avoid race with 146 /* Subtle: stop on all counters to avoid race with
133 * setting our pm callback */ 147 * setting our pm callback */
134 for (i = 0 ; i < NUM_COUNTERS ; ++i) { 148 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
149 if (!reset_value[i])
150 continue;
135 CTRL_READ(low, high, msrs, i); 151 CTRL_READ(low, high, msrs, i);
136 CTRL_SET_INACTIVE(low); 152 CTRL_SET_INACTIVE(low);
137 CTRL_WRITE(low, high, msrs, i); 153 CTRL_WRITE(low, high, msrs, i);
138 } 154 }
139} 155}
140 156
157static void athlon_shutdown(struct op_msrs const * const msrs)
158{
159 int i;
160
161 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
162 if (CTR_IS_RESERVED(msrs,i))
163 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
164 }
165 for (i = 0 ; i < NUM_CONTROLS ; ++i) {
166 if (CTRL_IS_RESERVED(msrs,i))
167 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
168 }
169}
141 170
142struct op_x86_model_spec const op_athlon_spec = { 171struct op_x86_model_spec const op_athlon_spec = {
143 .num_counters = NUM_COUNTERS, 172 .num_counters = NUM_COUNTERS,
@@ -146,5 +175,6 @@ struct op_x86_model_spec const op_athlon_spec = {
146 .setup_ctrs = &athlon_setup_ctrs, 175 .setup_ctrs = &athlon_setup_ctrs,
147 .check_ctrs = &athlon_check_ctrs, 176 .check_ctrs = &athlon_check_ctrs,
148 .start = &athlon_start, 177 .start = &athlon_start,
149 .stop = &athlon_stop 178 .stop = &athlon_stop,
179 .shutdown = &athlon_shutdown
150}; 180};
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c
index 7c61d357b82b..47925927b12f 100644
--- a/arch/i386/oprofile/op_model_p4.c
+++ b/arch/i386/oprofile/op_model_p4.c
@@ -32,7 +32,7 @@
32#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) 32#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
33 33
34static unsigned int num_counters = NUM_COUNTERS_NON_HT; 34static unsigned int num_counters = NUM_COUNTERS_NON_HT;
35 35static unsigned int num_controls = NUM_CONTROLS_NON_HT;
36 36
37/* this has to be checked dynamically since the 37/* this has to be checked dynamically since the
38 hyper-threadedness of a chip is discovered at 38 hyper-threadedness of a chip is discovered at
@@ -40,8 +40,10 @@ static unsigned int num_counters = NUM_COUNTERS_NON_HT;
40static inline void setup_num_counters(void) 40static inline void setup_num_counters(void)
41{ 41{
42#ifdef CONFIG_SMP 42#ifdef CONFIG_SMP
43 if (smp_num_siblings == 2) 43 if (smp_num_siblings == 2){
44 num_counters = NUM_COUNTERS_HT2; 44 num_counters = NUM_COUNTERS_HT2;
45 num_controls = NUM_CONTROLS_HT2;
46 }
45#endif 47#endif
46} 48}
47 49
@@ -97,15 +99,6 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
97 99
98#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT 100#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
99 101
100/* All cccr we don't use. */
101static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
102 MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3,
103 MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3,
104 MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3,
105 MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1,
106 MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3
107};
108
109/* p4 event codes in libop/op_event.h are indices into this table. */ 102/* p4 event codes in libop/op_event.h are indices into this table. */
110 103
111static struct p4_event_binding p4_events[NUM_EVENTS] = { 104static struct p4_event_binding p4_events[NUM_EVENTS] = {
@@ -372,6 +365,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
372#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) 365#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
373#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) 366#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
374 367
368#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
369#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
375#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) 370#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
376#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) 371#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
377#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) 372#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
@@ -401,29 +396,34 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT];
401static void p4_fill_in_addresses(struct op_msrs * const msrs) 396static void p4_fill_in_addresses(struct op_msrs * const msrs)
402{ 397{
403 unsigned int i; 398 unsigned int i;
404 unsigned int addr, stag; 399 unsigned int addr, cccraddr, stag;
405 400
406 setup_num_counters(); 401 setup_num_counters();
407 stag = get_stagger(); 402 stag = get_stagger();
408 403
409 /* the counter registers we pay attention to */ 404 /* initialize some registers */
410 for (i = 0; i < num_counters; ++i) { 405 for (i = 0; i < num_counters; ++i) {
411 msrs->counters[i].addr = 406 msrs->counters[i].addr = 0;
412 p4_counters[VIRT_CTR(stag, i)].counter_address;
413 } 407 }
414 408 for (i = 0; i < num_controls; ++i) {
415 /* FIXME: bad feeling, we don't save the 10 counters we don't use. */ 409 msrs->controls[i].addr = 0;
416
417 /* 18 CCCR registers */
418 for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
419 addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
420 msrs->controls[i].addr = addr;
421 } 410 }
422 411
412 /* the counter & cccr registers we pay attention to */
413 for (i = 0; i < num_counters; ++i) {
414 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
415 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
416 if (reserve_perfctr_nmi(addr)){
417 msrs->counters[i].addr = addr;
418 msrs->controls[i].addr = cccraddr;
419 }
420 }
421
423 /* 43 ESCR registers in three or four discontiguous group */ 422 /* 43 ESCR registers in three or four discontiguous group */
424 for (addr = MSR_P4_BSU_ESCR0 + stag; 423 for (addr = MSR_P4_BSU_ESCR0 + stag;
425 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { 424 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
426 msrs->controls[i].addr = addr; 425 if (reserve_evntsel_nmi(addr))
426 msrs->controls[i].addr = addr;
427 } 427 }
428 428
429 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 429 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
@@ -431,47 +431,57 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
431 if (boot_cpu_data.x86_model >= 0x3) { 431 if (boot_cpu_data.x86_model >= 0x3) {
432 for (addr = MSR_P4_BSU_ESCR0 + stag; 432 for (addr = MSR_P4_BSU_ESCR0 + stag;
433 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { 433 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
434 msrs->controls[i].addr = addr; 434 if (reserve_evntsel_nmi(addr))
435 msrs->controls[i].addr = addr;
435 } 436 }
436 } else { 437 } else {
437 for (addr = MSR_P4_IQ_ESCR0 + stag; 438 for (addr = MSR_P4_IQ_ESCR0 + stag;
438 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { 439 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
439 msrs->controls[i].addr = addr; 440 if (reserve_evntsel_nmi(addr))
441 msrs->controls[i].addr = addr;
440 } 442 }
441 } 443 }
442 444
443 for (addr = MSR_P4_RAT_ESCR0 + stag; 445 for (addr = MSR_P4_RAT_ESCR0 + stag;
444 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { 446 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
445 msrs->controls[i].addr = addr; 447 if (reserve_evntsel_nmi(addr))
448 msrs->controls[i].addr = addr;
446 } 449 }
447 450
448 for (addr = MSR_P4_MS_ESCR0 + stag; 451 for (addr = MSR_P4_MS_ESCR0 + stag;
449 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 452 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
450 msrs->controls[i].addr = addr; 453 if (reserve_evntsel_nmi(addr))
454 msrs->controls[i].addr = addr;
451 } 455 }
452 456
453 for (addr = MSR_P4_IX_ESCR0 + stag; 457 for (addr = MSR_P4_IX_ESCR0 + stag;
454 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 458 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
455 msrs->controls[i].addr = addr; 459 if (reserve_evntsel_nmi(addr))
460 msrs->controls[i].addr = addr;
456 } 461 }
457 462
458 /* there are 2 remaining non-contiguously located ESCRs */ 463 /* there are 2 remaining non-contiguously located ESCRs */
459 464
460 if (num_counters == NUM_COUNTERS_NON_HT) { 465 if (num_counters == NUM_COUNTERS_NON_HT) {
461 /* standard non-HT CPUs handle both remaining ESCRs*/ 466 /* standard non-HT CPUs handle both remaining ESCRs*/
462 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 467 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
463 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; 468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
469 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
470 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
464 471
465 } else if (stag == 0) { 472 } else if (stag == 0) {
466 /* HT CPUs give the first remainder to the even thread, as 473 /* HT CPUs give the first remainder to the even thread, as
467 the 32nd control register */ 474 the 32nd control register */
468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; 475 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
476 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
469 477
470 } else { 478 } else {
471 /* and two copies of the second to the odd thread, 479 /* and two copies of the second to the odd thread,
472 for the 22st and 23nd control registers */ 480 for the 22st and 23nd control registers */
473 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 481 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
474 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 482 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
483 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
484 }
475 } 485 }
476} 486}
477 487
@@ -544,7 +554,6 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
544{ 554{
545 unsigned int i; 555 unsigned int i;
546 unsigned int low, high; 556 unsigned int low, high;
547 unsigned int addr;
548 unsigned int stag; 557 unsigned int stag;
549 558
550 stag = get_stagger(); 559 stag = get_stagger();
@@ -557,59 +566,24 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
557 566
558 /* clear the cccrs we will use */ 567 /* clear the cccrs we will use */
559 for (i = 0 ; i < num_counters ; i++) { 568 for (i = 0 ; i < num_counters ; i++) {
569 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
570 continue;
560 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 571 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
561 CCCR_CLEAR(low); 572 CCCR_CLEAR(low);
562 CCCR_SET_REQUIRED_BITS(low); 573 CCCR_SET_REQUIRED_BITS(low);
563 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 574 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
564 } 575 }
565 576
566 /* clear cccrs outside our concern */
567 for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
568 rdmsr(p4_unused_cccr[i], low, high);
569 CCCR_CLEAR(low);
570 CCCR_SET_REQUIRED_BITS(low);
571 wrmsr(p4_unused_cccr[i], low, high);
572 }
573
574 /* clear all escrs (including those outside our concern) */ 577 /* clear all escrs (including those outside our concern) */
575 for (addr = MSR_P4_BSU_ESCR0 + stag; 578 for (i = num_counters; i < num_controls; i++) {
576 addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) { 579 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
577 wrmsr(addr, 0, 0); 580 continue;
578 } 581 wrmsr(msrs->controls[i].addr, 0, 0);
579
580 /* On older models clear also MSR_P4_IQ_ESCR0/1 */
581 if (boot_cpu_data.x86_model < 0x3) {
582 wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
583 wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
584 }
585
586 for (addr = MSR_P4_RAT_ESCR0 + stag;
587 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
588 wrmsr(addr, 0, 0);
589 }
590
591 for (addr = MSR_P4_MS_ESCR0 + stag;
592 addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){
593 wrmsr(addr, 0, 0);
594 }
595
596 for (addr = MSR_P4_IX_ESCR0 + stag;
597 addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){
598 wrmsr(addr, 0, 0);
599 } 582 }
600 583
601 if (num_counters == NUM_COUNTERS_NON_HT) {
602 wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
603 wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
604 } else if (stag == 0) {
605 wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
606 } else {
607 wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
608 }
609
610 /* setup all counters */ 584 /* setup all counters */
611 for (i = 0 ; i < num_counters ; ++i) { 585 for (i = 0 ; i < num_counters ; ++i) {
612 if (counter_config[i].enabled) { 586 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
613 reset_value[i] = counter_config[i].count; 587 reset_value[i] = counter_config[i].count;
614 pmc_setup_one_p4_counter(i); 588 pmc_setup_one_p4_counter(i);
615 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); 589 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
@@ -696,12 +670,32 @@ static void p4_stop(struct op_msrs const * const msrs)
696 stag = get_stagger(); 670 stag = get_stagger();
697 671
698 for (i = 0; i < num_counters; ++i) { 672 for (i = 0; i < num_counters; ++i) {
673 if (!reset_value[i])
674 continue;
699 CCCR_READ(low, high, VIRT_CTR(stag, i)); 675 CCCR_READ(low, high, VIRT_CTR(stag, i));
700 CCCR_SET_DISABLE(low); 676 CCCR_SET_DISABLE(low);
701 CCCR_WRITE(low, high, VIRT_CTR(stag, i)); 677 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
702 } 678 }
703} 679}
704 680
681static void p4_shutdown(struct op_msrs const * const msrs)
682{
683 int i;
684
685 for (i = 0 ; i < num_counters ; ++i) {
686 if (CTR_IS_RESERVED(msrs,i))
687 release_perfctr_nmi(msrs->counters[i].addr);
688 }
689 /* some of the control registers are specially reserved in
690 * conjunction with the counter registers (hence the starting offset).
691 * This saves a few bits.
692 */
693 for (i = num_counters ; i < num_controls ; ++i) {
694 if (CTRL_IS_RESERVED(msrs,i))
695 release_evntsel_nmi(msrs->controls[i].addr);
696 }
697}
698
705 699
706#ifdef CONFIG_SMP 700#ifdef CONFIG_SMP
707struct op_x86_model_spec const op_p4_ht2_spec = { 701struct op_x86_model_spec const op_p4_ht2_spec = {
@@ -711,7 +705,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = {
711 .setup_ctrs = &p4_setup_ctrs, 705 .setup_ctrs = &p4_setup_ctrs,
712 .check_ctrs = &p4_check_ctrs, 706 .check_ctrs = &p4_check_ctrs,
713 .start = &p4_start, 707 .start = &p4_start,
714 .stop = &p4_stop 708 .stop = &p4_stop,
709 .shutdown = &p4_shutdown
715}; 710};
716#endif 711#endif
717 712
@@ -722,5 +717,6 @@ struct op_x86_model_spec const op_p4_spec = {
722 .setup_ctrs = &p4_setup_ctrs, 717 .setup_ctrs = &p4_setup_ctrs,
723 .check_ctrs = &p4_check_ctrs, 718 .check_ctrs = &p4_check_ctrs,
724 .start = &p4_start, 719 .start = &p4_start,
725 .stop = &p4_stop 720 .stop = &p4_stop,
721 .shutdown = &p4_shutdown
726}; 722};
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
index 5c3ab4b027ad..f88e05ba8eb3 100644
--- a/arch/i386/oprofile/op_model_ppro.c
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -22,10 +22,12 @@
22#define NUM_COUNTERS 2 22#define NUM_COUNTERS 2
23#define NUM_CONTROLS 2 23#define NUM_CONTROLS 2
24 24
25#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
25#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) 26#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
26#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) 27#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
27#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) 28#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
28 29
30#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
29#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0) 31#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
30#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0) 32#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
31#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) 33#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
@@ -41,11 +43,21 @@ static unsigned long reset_value[NUM_COUNTERS];
41 43
42static void ppro_fill_in_addresses(struct op_msrs * const msrs) 44static void ppro_fill_in_addresses(struct op_msrs * const msrs)
43{ 45{
44 msrs->counters[0].addr = MSR_P6_PERFCTR0; 46 int i;
45 msrs->counters[1].addr = MSR_P6_PERFCTR1; 47
48 for (i=0; i < NUM_COUNTERS; i++) {
49 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
50 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
51 else
52 msrs->counters[i].addr = 0;
53 }
46 54
47 msrs->controls[0].addr = MSR_P6_EVNTSEL0; 55 for (i=0; i < NUM_CONTROLS; i++) {
48 msrs->controls[1].addr = MSR_P6_EVNTSEL1; 56 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
57 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
58 else
59 msrs->controls[i].addr = 0;
60 }
49} 61}
50 62
51 63
@@ -56,6 +68,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
56 68
57 /* clear all counters */ 69 /* clear all counters */
58 for (i = 0 ; i < NUM_CONTROLS; ++i) { 70 for (i = 0 ; i < NUM_CONTROLS; ++i) {
71 if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
72 continue;
59 CTRL_READ(low, high, msrs, i); 73 CTRL_READ(low, high, msrs, i);
60 CTRL_CLEAR(low); 74 CTRL_CLEAR(low);
61 CTRL_WRITE(low, high, msrs, i); 75 CTRL_WRITE(low, high, msrs, i);
@@ -63,12 +77,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
63 77
64 /* avoid a false detection of ctr overflows in NMI handler */ 78 /* avoid a false detection of ctr overflows in NMI handler */
65 for (i = 0; i < NUM_COUNTERS; ++i) { 79 for (i = 0; i < NUM_COUNTERS; ++i) {
80 if (unlikely(!CTR_IS_RESERVED(msrs,i)))
81 continue;
66 CTR_WRITE(1, msrs, i); 82 CTR_WRITE(1, msrs, i);
67 } 83 }
68 84
69 /* enable active counters */ 85 /* enable active counters */
70 for (i = 0; i < NUM_COUNTERS; ++i) { 86 for (i = 0; i < NUM_COUNTERS; ++i) {
71 if (counter_config[i].enabled) { 87 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
72 reset_value[i] = counter_config[i].count; 88 reset_value[i] = counter_config[i].count;
73 89
74 CTR_WRITE(counter_config[i].count, msrs, i); 90 CTR_WRITE(counter_config[i].count, msrs, i);
@@ -81,6 +97,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
81 CTRL_SET_UM(low, counter_config[i].unit_mask); 97 CTRL_SET_UM(low, counter_config[i].unit_mask);
82 CTRL_SET_EVENT(low, counter_config[i].event); 98 CTRL_SET_EVENT(low, counter_config[i].event);
83 CTRL_WRITE(low, high, msrs, i); 99 CTRL_WRITE(low, high, msrs, i);
100 } else {
101 reset_value[i] = 0;
84 } 102 }
85 } 103 }
86} 104}
@@ -93,6 +111,8 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
93 int i; 111 int i;
94 112
95 for (i = 0 ; i < NUM_COUNTERS; ++i) { 113 for (i = 0 ; i < NUM_COUNTERS; ++i) {
114 if (!reset_value[i])
115 continue;
96 CTR_READ(low, high, msrs, i); 116 CTR_READ(low, high, msrs, i);
97 if (CTR_OVERFLOWED(low)) { 117 if (CTR_OVERFLOWED(low)) {
98 oprofile_add_sample(regs, i); 118 oprofile_add_sample(regs, i);
@@ -118,18 +138,38 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
118static void ppro_start(struct op_msrs const * const msrs) 138static void ppro_start(struct op_msrs const * const msrs)
119{ 139{
120 unsigned int low,high; 140 unsigned int low,high;
121 CTRL_READ(low, high, msrs, 0); 141
122 CTRL_SET_ACTIVE(low); 142 if (reset_value[0]) {
123 CTRL_WRITE(low, high, msrs, 0); 143 CTRL_READ(low, high, msrs, 0);
144 CTRL_SET_ACTIVE(low);
145 CTRL_WRITE(low, high, msrs, 0);
146 }
124} 147}
125 148
126 149
127static void ppro_stop(struct op_msrs const * const msrs) 150static void ppro_stop(struct op_msrs const * const msrs)
128{ 151{
129 unsigned int low,high; 152 unsigned int low,high;
130 CTRL_READ(low, high, msrs, 0); 153
131 CTRL_SET_INACTIVE(low); 154 if (reset_value[0]) {
132 CTRL_WRITE(low, high, msrs, 0); 155 CTRL_READ(low, high, msrs, 0);
156 CTRL_SET_INACTIVE(low);
157 CTRL_WRITE(low, high, msrs, 0);
158 }
159}
160
161static void ppro_shutdown(struct op_msrs const * const msrs)
162{
163 int i;
164
165 for (i = 0 ; i < NUM_COUNTERS ; ++i) {
166 if (CTR_IS_RESERVED(msrs,i))
167 release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
168 }
169 for (i = 0 ; i < NUM_CONTROLS ; ++i) {
170 if (CTRL_IS_RESERVED(msrs,i))
171 release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
172 }
133} 173}
134 174
135 175
@@ -140,5 +180,6 @@ struct op_x86_model_spec const op_ppro_spec = {
140 .setup_ctrs = &ppro_setup_ctrs, 180 .setup_ctrs = &ppro_setup_ctrs,
141 .check_ctrs = &ppro_check_ctrs, 181 .check_ctrs = &ppro_check_ctrs,
142 .start = &ppro_start, 182 .start = &ppro_start,
143 .stop = &ppro_stop 183 .stop = &ppro_stop,
184 .shutdown = &ppro_shutdown
144}; 185};
diff --git a/arch/i386/oprofile/op_x86_model.h b/arch/i386/oprofile/op_x86_model.h
index 123b7e90a9ee..abb1aa95b979 100644
--- a/arch/i386/oprofile/op_x86_model.h
+++ b/arch/i386/oprofile/op_x86_model.h
@@ -40,6 +40,7 @@ struct op_x86_model_spec {
40 struct op_msrs const * const msrs); 40 struct op_msrs const * const msrs);
41 void (*start)(struct op_msrs const * const msrs); 41 void (*start)(struct op_msrs const * const msrs);
42 void (*stop)(struct op_msrs const * const msrs); 42 void (*stop)(struct op_msrs const * const msrs);
43 void (*shutdown)(struct op_msrs const * const msrs);
43}; 44};
44 45
45extern struct op_x86_model_spec const op_ppro_spec; 46extern struct op_x86_model_spec const op_ppro_spec;
diff --git a/arch/i386/pci/Makefile b/arch/i386/pci/Makefile
index 62ad75c57e6a..1594d2f55c8f 100644
--- a/arch/i386/pci/Makefile
+++ b/arch/i386/pci/Makefile
@@ -11,4 +11,4 @@ pci-y += legacy.o irq.o
11pci-$(CONFIG_X86_VISWS) := visws.o fixup.o 11pci-$(CONFIG_X86_VISWS) := visws.o fixup.o
12pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o 12pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o
13 13
14obj-y += $(pci-y) common.o 14obj-y += $(pci-y) common.o early.o
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 0a362e3aeac5..68bce194e688 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -242,6 +242,10 @@ char * __devinit pcibios_setup(char *str)
242 acpi_noirq_set(); 242 acpi_noirq_set();
243 return NULL; 243 return NULL;
244 } 244 }
245 else if (!strcmp(str, "noearly")) {
246 pci_probe |= PCI_PROBE_NOEARLY;
247 return NULL;
248 }
245#ifndef CONFIG_X86_VISWS 249#ifndef CONFIG_X86_VISWS
246 else if (!strcmp(str, "usepirqmask")) { 250 else if (!strcmp(str, "usepirqmask")) {
247 pci_probe |= PCI_USE_PIRQ_MASK; 251 pci_probe |= PCI_USE_PIRQ_MASK;
diff --git a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c
index 5d81fb510375..5acf0b4743cf 100644
--- a/arch/i386/pci/direct.c
+++ b/arch/i386/pci/direct.c
@@ -254,7 +254,16 @@ static int __init pci_check_type2(void)
254 return works; 254 return works;
255} 255}
256 256
257void __init pci_direct_init(void) 257void __init pci_direct_init(int type)
258{
259 printk(KERN_INFO "PCI: Using configuration type %d\n", type);
260 if (type == 1)
261 raw_pci_ops = &pci_direct_conf1;
262 else
263 raw_pci_ops = &pci_direct_conf2;
264}
265
266int __init pci_direct_probe(void)
258{ 267{
259 struct resource *region, *region2; 268 struct resource *region, *region2;
260 269
@@ -264,19 +273,16 @@ void __init pci_direct_init(void)
264 if (!region) 273 if (!region)
265 goto type2; 274 goto type2;
266 275
267 if (pci_check_type1()) { 276 if (pci_check_type1())
268 printk(KERN_INFO "PCI: Using configuration type 1\n"); 277 return 1;
269 raw_pci_ops = &pci_direct_conf1;
270 return;
271 }
272 release_resource(region); 278 release_resource(region);
273 279
274 type2: 280 type2:
275 if ((pci_probe & PCI_PROBE_CONF2) == 0) 281 if ((pci_probe & PCI_PROBE_CONF2) == 0)
276 return; 282 return 0;
277 region = request_region(0xCF8, 4, "PCI conf2"); 283 region = request_region(0xCF8, 4, "PCI conf2");
278 if (!region) 284 if (!region)
279 return; 285 return 0;
280 region2 = request_region(0xC000, 0x1000, "PCI conf2"); 286 region2 = request_region(0xC000, 0x1000, "PCI conf2");
281 if (!region2) 287 if (!region2)
282 goto fail2; 288 goto fail2;
@@ -284,10 +290,11 @@ void __init pci_direct_init(void)
284 if (pci_check_type2()) { 290 if (pci_check_type2()) {
285 printk(KERN_INFO "PCI: Using configuration type 2\n"); 291 printk(KERN_INFO "PCI: Using configuration type 2\n");
286 raw_pci_ops = &pci_direct_conf2; 292 raw_pci_ops = &pci_direct_conf2;
287 return; 293 return 2;
288 } 294 }
289 295
290 release_resource(region2); 296 release_resource(region2);
291 fail2: 297 fail2:
292 release_resource(region); 298 release_resource(region);
299 return 0;
293} 300}
diff --git a/arch/i386/pci/early.c b/arch/i386/pci/early.c
new file mode 100644
index 000000000000..713d6c866cae
--- /dev/null
+++ b/arch/i386/pci/early.c
@@ -0,0 +1,52 @@
1#include <linux/kernel.h>
2#include <linux/pci.h>
3#include <asm/pci-direct.h>
4#include <asm/io.h>
5#include "pci.h"
6
7/* Direct PCI access. This is used for PCI accesses in early boot before
8 the PCI subsystem works. */
9
10#define PDprintk(x...)
11
12u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset)
13{
14 u32 v;
15 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
16 v = inl(0xcfc);
17 if (v != 0xffffffff)
18 PDprintk("%x reading 4 from %x: %x\n", slot, offset, v);
19 return v;
20}
21
22u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset)
23{
24 u8 v;
25 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
26 v = inb(0xcfc + (offset&3));
27 PDprintk("%x reading 1 from %x: %x\n", slot, offset, v);
28 return v;
29}
30
31u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset)
32{
33 u16 v;
34 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
35 v = inw(0xcfc + (offset&2));
36 PDprintk("%x reading 2 from %x: %x\n", slot, offset, v);
37 return v;
38}
39
40void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset,
41 u32 val)
42{
43 PDprintk("%x writing to %x: %x\n", slot, offset, val);
44 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
45 outl(val, 0xcfc);
46}
47
48int early_pci_allowed(void)
49{
50 return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) ==
51 PCI_PROBE_CONF1;
52}
diff --git a/arch/i386/pci/init.c b/arch/i386/pci/init.c
index 51087a9d9172..d028e1b05c36 100644
--- a/arch/i386/pci/init.c
+++ b/arch/i386/pci/init.c
@@ -6,8 +6,13 @@
6 in the right sequence from here. */ 6 in the right sequence from here. */
7static __init int pci_access_init(void) 7static __init int pci_access_init(void)
8{ 8{
9 int type = 0;
10
11#ifdef CONFIG_PCI_DIRECT
12 type = pci_direct_probe();
13#endif
9#ifdef CONFIG_PCI_MMCONFIG 14#ifdef CONFIG_PCI_MMCONFIG
10 pci_mmcfg_init(); 15 pci_mmcfg_init(type);
11#endif 16#endif
12 if (raw_pci_ops) 17 if (raw_pci_ops)
13 return 0; 18 return 0;
@@ -21,7 +26,7 @@ static __init int pci_access_init(void)
21 * fails. 26 * fails.
22 */ 27 */
23#ifdef CONFIG_PCI_DIRECT 28#ifdef CONFIG_PCI_DIRECT
24 pci_direct_init(); 29 pci_direct_init(type);
25#endif 30#endif
26 return 0; 31 return 0;
27} 32}
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
index 972180f738d9..05be8db58a8c 100644
--- a/arch/i386/pci/mmconfig.c
+++ b/arch/i386/pci/mmconfig.c
@@ -151,6 +151,38 @@ static struct pci_raw_ops pci_mmcfg = {
151 .write = pci_mmcfg_write, 151 .write = pci_mmcfg_write,
152}; 152};
153 153
154
155static __init void pci_mmcfg_insert_resources(void)
156{
157#define PCI_MMCFG_RESOURCE_NAME_LEN 19
158 int i;
159 struct resource *res;
160 char *names;
161 unsigned num_buses;
162
163 res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res),
164 pci_mmcfg_config_num, GFP_KERNEL);
165
166 if (!res) {
167 printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n");
168 return;
169 }
170
171 names = (void *)&res[pci_mmcfg_config_num];
172 for (i = 0; i < pci_mmcfg_config_num; i++, res++) {
173 num_buses = pci_mmcfg_config[i].end_bus_number -
174 pci_mmcfg_config[i].start_bus_number + 1;
175 res->name = names;
176 snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u",
177 pci_mmcfg_config[i].pci_segment_group_number);
178 res->start = pci_mmcfg_config[i].base_address;
179 res->end = res->start + (num_buses << 20) - 1;
180 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
181 insert_resource(&iomem_resource, res);
182 names += PCI_MMCFG_RESOURCE_NAME_LEN;
183 }
184}
185
154/* K8 systems have some devices (typically in the builtin northbridge) 186/* K8 systems have some devices (typically in the builtin northbridge)
155 that are only accessible using type1 187 that are only accessible using type1
156 Normally this can be expressed in the MCFG by not listing them 188 Normally this can be expressed in the MCFG by not listing them
@@ -187,7 +219,9 @@ static __init void unreachable_devices(void)
187 } 219 }
188} 220}
189 221
190void __init pci_mmcfg_init(void) 222
223
224void __init pci_mmcfg_init(int type)
191{ 225{
192 if ((pci_probe & PCI_PROBE_MMCONF) == 0) 226 if ((pci_probe & PCI_PROBE_MMCONF) == 0)
193 return; 227 return;
@@ -198,7 +232,9 @@ void __init pci_mmcfg_init(void)
198 (pci_mmcfg_config[0].base_address == 0)) 232 (pci_mmcfg_config[0].base_address == 0))
199 return; 233 return;
200 234
201 if (!e820_all_mapped(pci_mmcfg_config[0].base_address, 235 /* Only do this check when type 1 works. If it doesn't work
236 assume we run on a Mac and always use MCFG */
237 if (type == 1 && !e820_all_mapped(pci_mmcfg_config[0].base_address,
202 pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN, 238 pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
203 E820_RESERVED)) { 239 E820_RESERVED)) {
204 printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n", 240 printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
@@ -212,4 +248,5 @@ void __init pci_mmcfg_init(void)
212 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; 248 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
213 249
214 unreachable_devices(); 250 unreachable_devices();
251 pci_mmcfg_insert_resources();
215} 252}
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
index bf4e79335388..1814f74569c6 100644
--- a/arch/i386/pci/pci.h
+++ b/arch/i386/pci/pci.h
@@ -17,6 +17,7 @@
17#define PCI_PROBE_CONF2 0x0004 17#define PCI_PROBE_CONF2 0x0004
18#define PCI_PROBE_MMCONF 0x0008 18#define PCI_PROBE_MMCONF 0x0008
19#define PCI_PROBE_MASK 0x000f 19#define PCI_PROBE_MASK 0x000f
20#define PCI_PROBE_NOEARLY 0x0010
20 21
21#define PCI_NO_SORT 0x0100 22#define PCI_NO_SORT 0x0100
22#define PCI_BIOS_SORT 0x0200 23#define PCI_BIOS_SORT 0x0200
@@ -81,7 +82,9 @@ extern int pci_conf1_write(unsigned int seg, unsigned int bus,
81extern int pci_conf1_read(unsigned int seg, unsigned int bus, 82extern int pci_conf1_read(unsigned int seg, unsigned int bus,
82 unsigned int devfn, int reg, int len, u32 *value); 83 unsigned int devfn, int reg, int len, u32 *value);
83 84
84extern void pci_direct_init(void); 85extern int pci_direct_probe(void);
86extern void pci_direct_init(int type);
85extern void pci_pcbios_init(void); 87extern void pci_pcbios_init(void);
86extern void pci_mmcfg_init(void); 88extern void pci_mmcfg_init(int type);
87extern void pcibios_sort(void); 89extern void pcibios_sort(void);
90
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index de83f38288d0..d9428a0fc8fb 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -59,9 +59,7 @@ static inline unsigned long save_context_stack(struct stack_trace *trace,
59 } 59 }
60} 60}
61 61
62void save_stack_trace(struct stack_trace *trace, 62void save_stack_trace(struct stack_trace *trace, struct task_struct *task)
63 struct task_struct *task, int all_contexts,
64 unsigned int skip)
65{ 63{
66 register unsigned long sp asm ("15"); 64 register unsigned long sp asm ("15");
67 unsigned long orig_sp; 65 unsigned long orig_sp;
@@ -69,22 +67,23 @@ void save_stack_trace(struct stack_trace *trace,
69 sp &= PSW_ADDR_INSN; 67 sp &= PSW_ADDR_INSN;
70 orig_sp = sp; 68 orig_sp = sp;
71 69
72 sp = save_context_stack(trace, &skip, sp, 70 sp = save_context_stack(trace, &trace->skip, sp,
73 S390_lowcore.panic_stack - PAGE_SIZE, 71 S390_lowcore.panic_stack - PAGE_SIZE,
74 S390_lowcore.panic_stack); 72 S390_lowcore.panic_stack);
75 if ((sp != orig_sp) && !all_contexts) 73 if ((sp != orig_sp) && !trace->all_contexts)
76 return; 74 return;
77 sp = save_context_stack(trace, &skip, sp, 75 sp = save_context_stack(trace, &trace->skip, sp,
78 S390_lowcore.async_stack - ASYNC_SIZE, 76 S390_lowcore.async_stack - ASYNC_SIZE,
79 S390_lowcore.async_stack); 77 S390_lowcore.async_stack);
80 if ((sp != orig_sp) && !all_contexts) 78 if ((sp != orig_sp) && !trace->all_contexts)
81 return; 79 return;
82 if (task) 80 if (task)
83 save_context_stack(trace, &skip, sp, 81 save_context_stack(trace, &trace->skip, sp,
84 (unsigned long) task_stack_page(task), 82 (unsigned long) task_stack_page(task),
85 (unsigned long) task_stack_page(task) + THREAD_SIZE); 83 (unsigned long) task_stack_page(task) + THREAD_SIZE);
86 else 84 else
87 save_context_stack(trace, &skip, sp, S390_lowcore.thread_info, 85 save_context_stack(trace, &trace->skip, sp,
86 S390_lowcore.thread_info,
88 S390_lowcore.thread_info + THREAD_SIZE); 87 S390_lowcore.thread_info + THREAD_SIZE);
89 return; 88 return;
90} 89}
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 59cc70275754..0e32adf03be1 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -4,7 +4,7 @@ obj-y = bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
4 4
5obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o 5obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o
6 6
7subarch-obj-y = lib/bitops.o kernel/semaphore.o 7subarch-obj-y = lib/bitops.o lib/semaphore.o
8subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem.o 8subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem.o
9subarch-obj-$(CONFIG_MODULES) += kernel/module.o 9subarch-obj-$(CONFIG_MODULES) += kernel/module.o
10 10
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 581ce9af0ec8..efe249e7d6b3 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -109,6 +109,7 @@ config X86_PC
109 109
110config X86_VSMP 110config X86_VSMP
111 bool "Support for ScaleMP vSMP" 111 bool "Support for ScaleMP vSMP"
112 depends on PCI
112 help 113 help
113 Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is 114 Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is
114 supposed to run on these EM64T-based machines. Only choose this option 115 supposed to run on these EM64T-based machines. Only choose this option
@@ -295,7 +296,7 @@ config NUMA
295 296
296config K8_NUMA 297config K8_NUMA
297 bool "Old style AMD Opteron NUMA detection" 298 bool "Old style AMD Opteron NUMA detection"
298 depends on NUMA 299 depends on NUMA && PCI
299 default y 300 default y
300 help 301 help
301 Enable K8 NUMA node topology detection. You should say Y here if 302 Enable K8 NUMA node topology detection. You should say Y here if
@@ -425,7 +426,6 @@ config IOMMU
425 426
426config CALGARY_IOMMU 427config CALGARY_IOMMU
427 bool "IBM Calgary IOMMU support" 428 bool "IBM Calgary IOMMU support"
428 default y
429 select SWIOTLB 429 select SWIOTLB
430 depends on PCI && EXPERIMENTAL 430 depends on PCI && EXPERIMENTAL
431 help 431 help
@@ -472,8 +472,7 @@ config X86_MCE_AMD
472 the DRAM Error Threshold. 472 the DRAM Error Threshold.
473 473
474config KEXEC 474config KEXEC
475 bool "kexec system call (EXPERIMENTAL)" 475 bool "kexec system call"
476 depends on EXPERIMENTAL
477 help 476 help
478 kexec is a system call that implements the ability to shutdown your 477 kexec is a system call that implements the ability to shutdown your
479 current kernel, and to start another kernel. It is like a reboot 478 current kernel, and to start another kernel. It is like a reboot
@@ -492,7 +491,14 @@ config CRASH_DUMP
492 bool "kernel crash dumps (EXPERIMENTAL)" 491 bool "kernel crash dumps (EXPERIMENTAL)"
493 depends on EXPERIMENTAL 492 depends on EXPERIMENTAL
494 help 493 help
495 Generate crash dump after being started by kexec. 494 Generate crash dump after being started by kexec.
495 This should be normally only set in special crash dump kernels
496 which are loaded in the main kernel with kexec-tools into
497 a specially reserved region and then later executed after
498 a crash by kdump/kexec. The crash dump kernel must be compiled
499 to a memory address not used by the main kernel or BIOS using
500 PHYSICAL_START.
501 For more details see Documentation/kdump/kdump.txt
496 502
497config PHYSICAL_START 503config PHYSICAL_START
498 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) 504 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
@@ -530,6 +536,30 @@ config SECCOMP
530 536
531 If unsure, say Y. Only embedded should say N here. 537 If unsure, say Y. Only embedded should say N here.
532 538
539config CC_STACKPROTECTOR
540 bool "Enable -fstack-protector buffer overflow detection (EXPRIMENTAL)"
541 depends on EXPERIMENTAL
542 help
543 This option turns on the -fstack-protector GCC feature. This
544 feature puts, at the beginning of critical functions, a canary
545 value on the stack just before the return address, and validates
546 the value just before actually returning. Stack based buffer
547 overflows (that need to overwrite this return address) now also
548 overwrite the canary, which gets detected and the attack is then
549 neutralized via a kernel panic.
550
551 This feature requires gcc version 4.2 or above, or a distribution
552 gcc with the feature backported. Older versions are automatically
553 detected and for those versions, this configuration option is ignored.
554
555config CC_STACKPROTECTOR_ALL
556 bool "Use stack-protector for all functions"
557 depends on CC_STACKPROTECTOR
558 help
559 Normally, GCC only inserts the canary value protection for
560 functions that use large-ish on-stack buffers. By enabling
561 this option, GCC will be asked to do this for ALL functions.
562
533source kernel/Kconfig.hz 563source kernel/Kconfig.hz
534 564
535config REORDER 565config REORDER
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 431bb4bc36cd..1c0f18d4f887 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -54,6 +54,16 @@ endif
54cflags-y += $(call cc-option,-funit-at-a-time) 54cflags-y += $(call cc-option,-funit-at-a-time)
55# prevent gcc from generating any FP code by mistake 55# prevent gcc from generating any FP code by mistake
56cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) 56cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
57# do binutils support CFI?
58cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
59AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
60
61# is .cfi_signal_frame supported too?
62cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
63AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
64
65cflags-$(CONFIG_CC_STACKPROTECTOR) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) -fstack-protector )
66cflags-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) -fstack-protector-all )
57 67
58CFLAGS += $(cflags-y) 68CFLAGS += $(cflags-y)
59CFLAGS_KERNEL += $(cflags-kernel-y) 69CFLAGS_KERNEL += $(cflags-kernel-y)
diff --git a/arch/x86_64/boot/compressed/Makefile b/arch/x86_64/boot/compressed/Makefile
index f89d96f11a9f..e70fa6e1da08 100644
--- a/arch/x86_64/boot/compressed/Makefile
+++ b/arch/x86_64/boot/compressed/Makefile
@@ -7,7 +7,8 @@
7# 7#
8 8
9targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o 9targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
10EXTRA_AFLAGS := -traditional -m32 10EXTRA_AFLAGS := -traditional
11AFLAGS := $(subst -m64,-m32,$(AFLAGS))
11 12
12# cannot use EXTRA_CFLAGS because base CFLAGS contains -mkernel which conflicts with 13# cannot use EXTRA_CFLAGS because base CFLAGS contains -mkernel which conflicts with
13# -m32 14# -m32
diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S
index a50b631f4d2b..c3bfd223ab49 100644
--- a/arch/x86_64/boot/setup.S
+++ b/arch/x86_64/boot/setup.S
@@ -526,12 +526,12 @@ is_disk1:
526 movw %cs, %ax # aka SETUPSEG 526 movw %cs, %ax # aka SETUPSEG
527 subw $DELTA_INITSEG, %ax # aka INITSEG 527 subw $DELTA_INITSEG, %ax # aka INITSEG
528 movw %ax, %ds 528 movw %ax, %ds
529 movw $0, (0x1ff) # default is no pointing device 529 movb $0, (0x1ff) # default is no pointing device
530 int $0x11 # int 0x11: equipment list 530 int $0x11 # int 0x11: equipment list
531 testb $0x04, %al # check if mouse installed 531 testb $0x04, %al # check if mouse installed
532 jz no_psmouse 532 jz no_psmouse
533 533
534 movw $0xAA, (0x1ff) # device present 534 movb $0xAA, (0x1ff) # device present
535no_psmouse: 535no_psmouse:
536 536
537#include "../../i386/boot/edd.S" 537#include "../../i386/boot/edd.S"
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 5fb970715941..647610ecb580 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.18-rc4 3# Linux kernel version: 2.6.18-git5
4# Thu Aug 24 21:05:55 2006 4# Tue Sep 26 09:30:47 2006
5# 5#
6CONFIG_X86_64=y 6CONFIG_X86_64=y
7CONFIG_64BIT=y 7CONFIG_64BIT=y
@@ -19,6 +19,7 @@ CONFIG_GENERIC_ISA_DMA=y
19CONFIG_GENERIC_IOMAP=y 19CONFIG_GENERIC_IOMAP=y
20CONFIG_ARCH_MAY_HAVE_PC_FDC=y 20CONFIG_ARCH_MAY_HAVE_PC_FDC=y
21CONFIG_DMI=y 21CONFIG_DMI=y
22CONFIG_AUDIT_ARCH=y
22CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" 23CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
23 24
24# 25#
@@ -38,16 +39,16 @@ CONFIG_SYSVIPC=y
38CONFIG_POSIX_MQUEUE=y 39CONFIG_POSIX_MQUEUE=y
39# CONFIG_BSD_PROCESS_ACCT is not set 40# CONFIG_BSD_PROCESS_ACCT is not set
40# CONFIG_TASKSTATS is not set 41# CONFIG_TASKSTATS is not set
41CONFIG_SYSCTL=y
42# CONFIG_AUDIT is not set 42# CONFIG_AUDIT is not set
43CONFIG_IKCONFIG=y 43CONFIG_IKCONFIG=y
44CONFIG_IKCONFIG_PROC=y 44CONFIG_IKCONFIG_PROC=y
45# CONFIG_CPUSETS is not set 45# CONFIG_CPUSETS is not set
46# CONFIG_RELAY is not set 46# CONFIG_RELAY is not set
47CONFIG_INITRAMFS_SOURCE="" 47CONFIG_INITRAMFS_SOURCE=""
48CONFIG_UID16=y
49CONFIG_CC_OPTIMIZE_FOR_SIZE=y 48CONFIG_CC_OPTIMIZE_FOR_SIZE=y
50# CONFIG_EMBEDDED is not set 49# CONFIG_EMBEDDED is not set
50CONFIG_UID16=y
51CONFIG_SYSCTL=y
51CONFIG_KALLSYMS=y 52CONFIG_KALLSYMS=y
52CONFIG_KALLSYMS_ALL=y 53CONFIG_KALLSYMS_ALL=y
53# CONFIG_KALLSYMS_EXTRA_PASS is not set 54# CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -56,12 +57,12 @@ CONFIG_PRINTK=y
56CONFIG_BUG=y 57CONFIG_BUG=y
57CONFIG_ELF_CORE=y 58CONFIG_ELF_CORE=y
58CONFIG_BASE_FULL=y 59CONFIG_BASE_FULL=y
59CONFIG_RT_MUTEXES=y
60CONFIG_FUTEX=y 60CONFIG_FUTEX=y
61CONFIG_EPOLL=y 61CONFIG_EPOLL=y
62CONFIG_SHMEM=y 62CONFIG_SHMEM=y
63CONFIG_SLAB=y 63CONFIG_SLAB=y
64CONFIG_VM_EVENT_COUNTERS=y 64CONFIG_VM_EVENT_COUNTERS=y
65CONFIG_RT_MUTEXES=y
65# CONFIG_TINY_SHMEM is not set 66# CONFIG_TINY_SHMEM is not set
66CONFIG_BASE_SMALL=0 67CONFIG_BASE_SMALL=0
67# CONFIG_SLOB is not set 68# CONFIG_SLOB is not set
@@ -160,6 +161,7 @@ CONFIG_X86_MCE_AMD=y
160# CONFIG_CRASH_DUMP is not set 161# CONFIG_CRASH_DUMP is not set
161CONFIG_PHYSICAL_START=0x200000 162CONFIG_PHYSICAL_START=0x200000
162CONFIG_SECCOMP=y 163CONFIG_SECCOMP=y
164# CONFIG_CC_STACKPROTECTOR is not set
163# CONFIG_HZ_100 is not set 165# CONFIG_HZ_100 is not set
164CONFIG_HZ_250=y 166CONFIG_HZ_250=y
165# CONFIG_HZ_1000 is not set 167# CONFIG_HZ_1000 is not set
@@ -307,18 +309,23 @@ CONFIG_IP_PNP_DHCP=y
307CONFIG_INET_DIAG=y 309CONFIG_INET_DIAG=y
308CONFIG_INET_TCP_DIAG=y 310CONFIG_INET_TCP_DIAG=y
309# CONFIG_TCP_CONG_ADVANCED is not set 311# CONFIG_TCP_CONG_ADVANCED is not set
310CONFIG_TCP_CONG_BIC=y 312CONFIG_TCP_CONG_CUBIC=y
313CONFIG_DEFAULT_TCP_CONG="cubic"
311CONFIG_IPV6=y 314CONFIG_IPV6=y
312# CONFIG_IPV6_PRIVACY is not set 315# CONFIG_IPV6_PRIVACY is not set
313# CONFIG_IPV6_ROUTER_PREF is not set 316# CONFIG_IPV6_ROUTER_PREF is not set
314# CONFIG_INET6_AH is not set 317# CONFIG_INET6_AH is not set
315# CONFIG_INET6_ESP is not set 318# CONFIG_INET6_ESP is not set
316# CONFIG_INET6_IPCOMP is not set 319# CONFIG_INET6_IPCOMP is not set
320# CONFIG_IPV6_MIP6 is not set
317# CONFIG_INET6_XFRM_TUNNEL is not set 321# CONFIG_INET6_XFRM_TUNNEL is not set
318# CONFIG_INET6_TUNNEL is not set 322# CONFIG_INET6_TUNNEL is not set
319# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set 323# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
320# CONFIG_INET6_XFRM_MODE_TUNNEL is not set 324# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
325# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
321# CONFIG_IPV6_TUNNEL is not set 326# CONFIG_IPV6_TUNNEL is not set
327# CONFIG_IPV6_SUBTREES is not set
328# CONFIG_IPV6_MULTIPLE_TABLES is not set
322# CONFIG_NETWORK_SECMARK is not set 329# CONFIG_NETWORK_SECMARK is not set
323# CONFIG_NETFILTER is not set 330# CONFIG_NETFILTER is not set
324 331
@@ -345,7 +352,6 @@ CONFIG_IPV6=y
345# CONFIG_ATALK is not set 352# CONFIG_ATALK is not set
346# CONFIG_X25 is not set 353# CONFIG_X25 is not set
347# CONFIG_LAPB is not set 354# CONFIG_LAPB is not set
348# CONFIG_NET_DIVERT is not set
349# CONFIG_ECONET is not set 355# CONFIG_ECONET is not set
350# CONFIG_WAN_ROUTER is not set 356# CONFIG_WAN_ROUTER is not set
351 357
@@ -487,6 +493,7 @@ CONFIG_IDEDMA_AUTO=y
487# 493#
488# CONFIG_RAID_ATTRS is not set 494# CONFIG_RAID_ATTRS is not set
489CONFIG_SCSI=y 495CONFIG_SCSI=y
496CONFIG_SCSI_NETLINK=y
490# CONFIG_SCSI_PROC_FS is not set 497# CONFIG_SCSI_PROC_FS is not set
491 498
492# 499#
@@ -508,12 +515,13 @@ CONFIG_SCSI_CONSTANTS=y
508# CONFIG_SCSI_LOGGING is not set 515# CONFIG_SCSI_LOGGING is not set
509 516
510# 517#
511# SCSI Transport Attributes 518# SCSI Transports
512# 519#
513CONFIG_SCSI_SPI_ATTRS=y 520CONFIG_SCSI_SPI_ATTRS=y
514CONFIG_SCSI_FC_ATTRS=y 521CONFIG_SCSI_FC_ATTRS=y
515# CONFIG_SCSI_ISCSI_ATTRS is not set 522# CONFIG_SCSI_ISCSI_ATTRS is not set
516CONFIG_SCSI_SAS_ATTRS=y 523CONFIG_SCSI_SAS_ATTRS=y
524# CONFIG_SCSI_SAS_LIBSAS is not set
517 525
518# 526#
519# SCSI low-level drivers 527# SCSI low-level drivers
@@ -532,29 +540,14 @@ CONFIG_AIC79XX_RESET_DELAY_MS=4000
532# CONFIG_AIC79XX_DEBUG_ENABLE is not set 540# CONFIG_AIC79XX_DEBUG_ENABLE is not set
533CONFIG_AIC79XX_DEBUG_MASK=0 541CONFIG_AIC79XX_DEBUG_MASK=0
534# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set 542# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
543# CONFIG_SCSI_AIC94XX is not set
544# CONFIG_SCSI_ARCMSR is not set
535CONFIG_MEGARAID_NEWGEN=y 545CONFIG_MEGARAID_NEWGEN=y
536CONFIG_MEGARAID_MM=y 546CONFIG_MEGARAID_MM=y
537CONFIG_MEGARAID_MAILBOX=y 547CONFIG_MEGARAID_MAILBOX=y
538# CONFIG_MEGARAID_LEGACY is not set 548# CONFIG_MEGARAID_LEGACY is not set
539CONFIG_MEGARAID_SAS=y 549CONFIG_MEGARAID_SAS=y
540CONFIG_SCSI_SATA=y
541CONFIG_SCSI_SATA_AHCI=y
542CONFIG_SCSI_SATA_SVW=y
543CONFIG_SCSI_ATA_PIIX=y
544# CONFIG_SCSI_SATA_MV is not set
545CONFIG_SCSI_SATA_NV=y
546# CONFIG_SCSI_PDC_ADMA is not set
547# CONFIG_SCSI_HPTIOP is not set 550# CONFIG_SCSI_HPTIOP is not set
548# CONFIG_SCSI_SATA_QSTOR is not set
549# CONFIG_SCSI_SATA_PROMISE is not set
550# CONFIG_SCSI_SATA_SX4 is not set
551CONFIG_SCSI_SATA_SIL=y
552# CONFIG_SCSI_SATA_SIL24 is not set
553# CONFIG_SCSI_SATA_SIS is not set
554# CONFIG_SCSI_SATA_ULI is not set
555CONFIG_SCSI_SATA_VIA=y
556# CONFIG_SCSI_SATA_VITESSE is not set
557CONFIG_SCSI_SATA_INTEL_COMBINED=y
558# CONFIG_SCSI_BUSLOGIC is not set 551# CONFIG_SCSI_BUSLOGIC is not set
559# CONFIG_SCSI_DMX3191D is not set 552# CONFIG_SCSI_DMX3191D is not set
560# CONFIG_SCSI_EATA is not set 553# CONFIG_SCSI_EATA is not set
@@ -563,6 +556,7 @@ CONFIG_SCSI_SATA_INTEL_COMBINED=y
563# CONFIG_SCSI_IPS is not set 556# CONFIG_SCSI_IPS is not set
564# CONFIG_SCSI_INITIO is not set 557# CONFIG_SCSI_INITIO is not set
565# CONFIG_SCSI_INIA100 is not set 558# CONFIG_SCSI_INIA100 is not set
559# CONFIG_SCSI_STEX is not set
566# CONFIG_SCSI_SYM53C8XX_2 is not set 560# CONFIG_SCSI_SYM53C8XX_2 is not set
567# CONFIG_SCSI_IPR is not set 561# CONFIG_SCSI_IPR is not set
568# CONFIG_SCSI_QLOGIC_1280 is not set 562# CONFIG_SCSI_QLOGIC_1280 is not set
@@ -573,6 +567,62 @@ CONFIG_SCSI_SATA_INTEL_COMBINED=y
573# CONFIG_SCSI_DEBUG is not set 567# CONFIG_SCSI_DEBUG is not set
574 568
575# 569#
570# Serial ATA (prod) and Parallel ATA (experimental) drivers
571#
572CONFIG_ATA=y
573CONFIG_SATA_AHCI=y
574CONFIG_SATA_SVW=y
575CONFIG_ATA_PIIX=y
576# CONFIG_SATA_MV is not set
577CONFIG_SATA_NV=y
578# CONFIG_PDC_ADMA is not set
579# CONFIG_SATA_QSTOR is not set
580# CONFIG_SATA_PROMISE is not set
581# CONFIG_SATA_SX4 is not set
582CONFIG_SATA_SIL=y
583# CONFIG_SATA_SIL24 is not set
584# CONFIG_SATA_SIS is not set
585# CONFIG_SATA_ULI is not set
586CONFIG_SATA_VIA=y
587# CONFIG_SATA_VITESSE is not set
588CONFIG_SATA_INTEL_COMBINED=y
589# CONFIG_PATA_ALI is not set
590# CONFIG_PATA_AMD is not set
591# CONFIG_PATA_ARTOP is not set
592# CONFIG_PATA_ATIIXP is not set
593# CONFIG_PATA_CMD64X is not set
594# CONFIG_PATA_CS5520 is not set
595# CONFIG_PATA_CS5530 is not set
596# CONFIG_PATA_CYPRESS is not set
597# CONFIG_PATA_EFAR is not set
598# CONFIG_ATA_GENERIC is not set
599# CONFIG_PATA_HPT366 is not set
600# CONFIG_PATA_HPT37X is not set
601# CONFIG_PATA_HPT3X2N is not set
602# CONFIG_PATA_HPT3X3 is not set
603# CONFIG_PATA_IT821X is not set
604# CONFIG_PATA_JMICRON is not set
605# CONFIG_PATA_LEGACY is not set
606# CONFIG_PATA_TRIFLEX is not set
607# CONFIG_PATA_MPIIX is not set
608# CONFIG_PATA_OLDPIIX is not set
609# CONFIG_PATA_NETCELL is not set
610# CONFIG_PATA_NS87410 is not set
611# CONFIG_PATA_OPTI is not set
612# CONFIG_PATA_OPTIDMA is not set
613# CONFIG_PATA_PDC_OLD is not set
614# CONFIG_PATA_QDI is not set
615# CONFIG_PATA_RADISYS is not set
616# CONFIG_PATA_RZ1000 is not set
617# CONFIG_PATA_SC1200 is not set
618# CONFIG_PATA_SERVERWORKS is not set
619# CONFIG_PATA_PDC2027X is not set
620# CONFIG_PATA_SIL680 is not set
621# CONFIG_PATA_SIS is not set
622# CONFIG_PATA_VIA is not set
623# CONFIG_PATA_WINBOND is not set
624
625#
576# Multi-device support (RAID and LVM) 626# Multi-device support (RAID and LVM)
577# 627#
578CONFIG_MD=y 628CONFIG_MD=y
@@ -678,6 +728,7 @@ CONFIG_NET_PCI=y
678# CONFIG_ADAPTEC_STARFIRE is not set 728# CONFIG_ADAPTEC_STARFIRE is not set
679CONFIG_B44=y 729CONFIG_B44=y
680CONFIG_FORCEDETH=y 730CONFIG_FORCEDETH=y
731# CONFIG_FORCEDETH_NAPI is not set
681# CONFIG_DGRS is not set 732# CONFIG_DGRS is not set
682# CONFIG_EEPRO100 is not set 733# CONFIG_EEPRO100 is not set
683CONFIG_E100=y 734CONFIG_E100=y
@@ -714,6 +765,7 @@ CONFIG_E1000=y
714# CONFIG_VIA_VELOCITY is not set 765# CONFIG_VIA_VELOCITY is not set
715CONFIG_TIGON3=y 766CONFIG_TIGON3=y
716CONFIG_BNX2=y 767CONFIG_BNX2=y
768# CONFIG_QLA3XXX is not set
717 769
718# 770#
719# Ethernet (10000 Mbit) 771# Ethernet (10000 Mbit)
@@ -1036,6 +1088,7 @@ CONFIG_SOUND=y
1036# Open Sound System 1088# Open Sound System
1037# 1089#
1038CONFIG_SOUND_PRIME=y 1090CONFIG_SOUND_PRIME=y
1091CONFIG_OSS_OBSOLETE_DRIVER=y
1039# CONFIG_SOUND_BT878 is not set 1092# CONFIG_SOUND_BT878 is not set
1040# CONFIG_SOUND_EMU10K1 is not set 1093# CONFIG_SOUND_EMU10K1 is not set
1041# CONFIG_SOUND_FUSION is not set 1094# CONFIG_SOUND_FUSION is not set
@@ -1046,7 +1099,6 @@ CONFIG_SOUND_ICH=y
1046# CONFIG_SOUND_MSNDPIN is not set 1099# CONFIG_SOUND_MSNDPIN is not set
1047# CONFIG_SOUND_VIA82CXXX is not set 1100# CONFIG_SOUND_VIA82CXXX is not set
1048# CONFIG_SOUND_OSS is not set 1101# CONFIG_SOUND_OSS is not set
1049# CONFIG_SOUND_TVMIXER is not set
1050 1102
1051# 1103#
1052# USB support 1104# USB support
@@ -1203,7 +1255,6 @@ CONFIG_USB_MON=y
1203# InfiniBand support 1255# InfiniBand support
1204# 1256#
1205# CONFIG_INFINIBAND is not set 1257# CONFIG_INFINIBAND is not set
1206# CONFIG_IPATH_CORE is not set
1207 1258
1208# 1259#
1209# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) 1260# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
@@ -1449,10 +1500,6 @@ CONFIG_DEBUG_STACKOVERFLOW=y
1449# CONFIG_CRYPTO is not set 1500# CONFIG_CRYPTO is not set
1450 1501
1451# 1502#
1452# Hardware crypto devices
1453#
1454
1455#
1456# Library routines 1503# Library routines
1457# 1504#
1458# CONFIG_CRC_CCITT is not set 1505# CONFIG_CRC_CCITT is not set
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c
index 3bf58af98936..396d3c100011 100644
--- a/arch/x86_64/ia32/ia32_aout.c
+++ b/arch/x86_64/ia32/ia32_aout.c
@@ -333,7 +333,8 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
333 return error; 333 return error;
334 } 334 }
335 335
336 error = bprm->file->f_op->read(bprm->file, (char *)text_addr, 336 error = bprm->file->f_op->read(bprm->file,
337 (char __user *)text_addr,
337 ex.a_text+ex.a_data, &pos); 338 ex.a_text+ex.a_data, &pos);
338 if ((signed long)error < 0) { 339 if ((signed long)error < 0) {
339 send_sig(SIGKILL, current, 0); 340 send_sig(SIGKILL, current, 0);
@@ -366,7 +367,8 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
366 down_write(&current->mm->mmap_sem); 367 down_write(&current->mm->mmap_sem);
367 do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); 368 do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
368 up_write(&current->mm->mmap_sem); 369 up_write(&current->mm->mmap_sem);
369 bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex), 370 bprm->file->f_op->read(bprm->file,
371 (char __user *)N_TXTADDR(ex),
370 ex.a_text+ex.a_data, &pos); 372 ex.a_text+ex.a_data, &pos);
371 flush_icache_range((unsigned long) N_TXTADDR(ex), 373 flush_icache_range((unsigned long) N_TXTADDR(ex),
372 (unsigned long) N_TXTADDR(ex) + 374 (unsigned long) N_TXTADDR(ex) +
@@ -477,7 +479,7 @@ static int load_aout_library(struct file *file)
477 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); 479 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
478 up_write(&current->mm->mmap_sem); 480 up_write(&current->mm->mmap_sem);
479 481
480 file->f_op->read(file, (char *)start_addr, 482 file->f_op->read(file, (char __user *)start_addr,
481 ex.a_text + ex.a_data, &pos); 483 ex.a_text + ex.a_data, &pos);
482 flush_icache_range((unsigned long) start_addr, 484 flush_icache_range((unsigned long) start_addr,
483 (unsigned long) start_addr + ex.a_text + ex.a_data); 485 (unsigned long) start_addr + ex.a_text + ex.a_data);
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
index 25e5ca22204c..a6ba9951e86c 100644
--- a/arch/x86_64/ia32/ia32_signal.c
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -113,25 +113,19 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
113} 113}
114 114
115asmlinkage long 115asmlinkage long
116sys32_sigsuspend(int history0, int history1, old_sigset_t mask, 116sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
117 struct pt_regs *regs)
118{ 117{
119 sigset_t saveset;
120
121 mask &= _BLOCKABLE; 118 mask &= _BLOCKABLE;
122 spin_lock_irq(&current->sighand->siglock); 119 spin_lock_irq(&current->sighand->siglock);
123 saveset = current->blocked; 120 current->saved_sigmask = current->blocked;
124 siginitset(&current->blocked, mask); 121 siginitset(&current->blocked, mask);
125 recalc_sigpending(); 122 recalc_sigpending();
126 spin_unlock_irq(&current->sighand->siglock); 123 spin_unlock_irq(&current->sighand->siglock);
127 124
128 regs->rax = -EINTR; 125 current->state = TASK_INTERRUPTIBLE;
129 while (1) { 126 schedule();
130 current->state = TASK_INTERRUPTIBLE; 127 set_thread_flag(TIF_RESTORE_SIGMASK);
131 schedule(); 128 return -ERESTARTNOHAND;
132 if (do_signal(regs, &saveset))
133 return -EINTR;
134 }
135} 129}
136 130
137asmlinkage long 131asmlinkage long
@@ -437,15 +431,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
437 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 431 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
438 goto give_sigsegv; 432 goto give_sigsegv;
439 433
440 { 434 err |= __put_user(sig, &frame->sig);
441 struct exec_domain *ed = current_thread_info()->exec_domain;
442 err |= __put_user((ed
443 && ed->signal_invmap
444 && sig < 32
445 ? ed->signal_invmap[sig]
446 : sig),
447 &frame->sig);
448 }
449 if (err) 435 if (err)
450 goto give_sigsegv; 436 goto give_sigsegv;
451 437
@@ -492,6 +478,11 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
492 regs->rsp = (unsigned long) frame; 478 regs->rsp = (unsigned long) frame;
493 regs->rip = (unsigned long) ka->sa.sa_handler; 479 regs->rip = (unsigned long) ka->sa.sa_handler;
494 480
481 /* Make -mregparm=3 work */
482 regs->rax = sig;
483 regs->rdx = 0;
484 regs->rcx = 0;
485
495 asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); 486 asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
496 asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); 487 asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
497 488
@@ -499,20 +490,20 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
499 regs->ss = __USER32_DS; 490 regs->ss = __USER32_DS;
500 491
501 set_fs(USER_DS); 492 set_fs(USER_DS);
502 regs->eflags &= ~TF_MASK; 493 regs->eflags &= ~TF_MASK;
503 if (test_thread_flag(TIF_SINGLESTEP)) 494 if (test_thread_flag(TIF_SINGLESTEP))
504 ptrace_notify(SIGTRAP); 495 ptrace_notify(SIGTRAP);
505 496
506#if DEBUG_SIG 497#if DEBUG_SIG
507 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", 498 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
508 current->comm, current->pid, frame, regs->rip, frame->pretcode); 499 current->comm, current->pid, frame, regs->rip, frame->pretcode);
509#endif 500#endif
510 501
511 return 1; 502 return 0;
512 503
513give_sigsegv: 504give_sigsegv:
514 force_sigsegv(sig, current); 505 force_sigsegv(sig, current);
515 return 0; 506 return -EFAULT;
516} 507}
517 508
518int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 509int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@@ -595,18 +586,18 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
595 regs->ss = __USER32_DS; 586 regs->ss = __USER32_DS;
596 587
597 set_fs(USER_DS); 588 set_fs(USER_DS);
598 regs->eflags &= ~TF_MASK; 589 regs->eflags &= ~TF_MASK;
599 if (test_thread_flag(TIF_SINGLESTEP)) 590 if (test_thread_flag(TIF_SINGLESTEP))
600 ptrace_notify(SIGTRAP); 591 ptrace_notify(SIGTRAP);
601 592
602#if DEBUG_SIG 593#if DEBUG_SIG
603 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", 594 printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
604 current->comm, current->pid, frame, regs->rip, frame->pretcode); 595 current->comm, current->pid, frame, regs->rip, frame->pretcode);
605#endif 596#endif
606 597
607 return 1; 598 return 0;
608 599
609give_sigsegv: 600give_sigsegv:
610 force_sigsegv(sig, current); 601 force_sigsegv(sig, current);
611 return 0; 602 return -EFAULT;
612} 603}
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 5d4a7d125ed0..b4aa875e175b 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -71,6 +71,7 @@
71 */ 71 */
72ENTRY(ia32_sysenter_target) 72ENTRY(ia32_sysenter_target)
73 CFI_STARTPROC32 simple 73 CFI_STARTPROC32 simple
74 CFI_SIGNAL_FRAME
74 CFI_DEF_CFA rsp,0 75 CFI_DEF_CFA rsp,0
75 CFI_REGISTER rsp,rbp 76 CFI_REGISTER rsp,rbp
76 swapgs 77 swapgs
@@ -186,6 +187,7 @@ ENDPROC(ia32_sysenter_target)
186 */ 187 */
187ENTRY(ia32_cstar_target) 188ENTRY(ia32_cstar_target)
188 CFI_STARTPROC32 simple 189 CFI_STARTPROC32 simple
190 CFI_SIGNAL_FRAME
189 CFI_DEF_CFA rsp,PDA_STACKOFFSET 191 CFI_DEF_CFA rsp,PDA_STACKOFFSET
190 CFI_REGISTER rip,rcx 192 CFI_REGISTER rip,rcx
191 /*CFI_REGISTER rflags,r11*/ 193 /*CFI_REGISTER rflags,r11*/
@@ -293,6 +295,7 @@ ia32_badarg:
293 295
294ENTRY(ia32_syscall) 296ENTRY(ia32_syscall)
295 CFI_STARTPROC simple 297 CFI_STARTPROC simple
298 CFI_SIGNAL_FRAME
296 CFI_DEF_CFA rsp,SS+8-RIP 299 CFI_DEF_CFA rsp,SS+8-RIP
297 /*CFI_REL_OFFSET ss,SS-RIP*/ 300 /*CFI_REL_OFFSET ss,SS-RIP*/
298 CFI_REL_OFFSET rsp,RSP-RIP 301 CFI_REL_OFFSET rsp,RSP-RIP
@@ -370,6 +373,7 @@ ENTRY(ia32_ptregs_common)
370 popq %r11 373 popq %r11
371 CFI_ENDPROC 374 CFI_ENDPROC
372 CFI_STARTPROC32 simple 375 CFI_STARTPROC32 simple
376 CFI_SIGNAL_FRAME
373 CFI_DEF_CFA rsp,SS+8-ARGOFFSET 377 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
374 CFI_REL_OFFSET rax,RAX-ARGOFFSET 378 CFI_REL_OFFSET rax,RAX-ARGOFFSET
375 CFI_REL_OFFSET rcx,RCX-ARGOFFSET 379 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
@@ -703,8 +707,8 @@ ia32_sys_call_table:
703 .quad sys_readlinkat /* 305 */ 707 .quad sys_readlinkat /* 305 */
704 .quad sys_fchmodat 708 .quad sys_fchmodat
705 .quad sys_faccessat 709 .quad sys_faccessat
706 .quad quiet_ni_syscall /* pselect6 for now */ 710 .quad compat_sys_pselect6
707 .quad quiet_ni_syscall /* ppoll for now */ 711 .quad compat_sys_ppoll
708 .quad sys_unshare /* 310 */ 712 .quad sys_unshare /* 310 */
709 .quad compat_sys_set_robust_list 713 .quad compat_sys_set_robust_list
710 .quad compat_sys_get_robust_list 714 .quad compat_sys_get_robust_list
@@ -713,4 +717,5 @@ ia32_sys_call_table:
713 .quad sys_tee 717 .quad sys_tee
714 .quad compat_sys_vmsplice 718 .quad compat_sys_vmsplice
715 .quad compat_sys_move_pages 719 .quad compat_sys_move_pages
720 .quad sys_getcpu
716ia32_syscall_end: 721ia32_syscall_end:
diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c
index 659c0722f6b8..d18198ed636b 100644
--- a/arch/x86_64/ia32/ptrace32.c
+++ b/arch/x86_64/ia32/ptrace32.c
@@ -117,6 +117,10 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
117 if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1) 117 if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1)
118 return -EIO; 118 return -EIO;
119 child->thread.debugreg7 = val; 119 child->thread.debugreg7 = val;
120 if (val)
121 set_tsk_thread_flag(child, TIF_DEBUG);
122 else
123 clear_tsk_thread_flag(child, TIF_DEBUG);
120 break; 124 break;
121 125
122 default: 126 default:
@@ -371,8 +375,10 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
371 ret = -EIO; 375 ret = -EIO;
372 if (!access_ok(VERIFY_READ, u, sizeof(*u))) 376 if (!access_ok(VERIFY_READ, u, sizeof(*u)))
373 break; 377 break;
374 /* no checking to be bug-to-bug compatible with i386 */ 378 /* no checking to be bug-to-bug compatible with i386. */
375 __copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u)); 379 /* but silence warning */
380 if (__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u)))
381 ;
376 set_stopped_child_used_math(child); 382 set_stopped_child_used_math(child);
377 child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; 383 child->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask;
378 ret = 0; 384 ret = 0;
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index 9c130993380d..b0e82c7947dc 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -60,6 +60,7 @@
60#include <linux/highuid.h> 60#include <linux/highuid.h>
61#include <linux/vmalloc.h> 61#include <linux/vmalloc.h>
62#include <linux/fsnotify.h> 62#include <linux/fsnotify.h>
63#include <linux/sysctl.h>
63#include <asm/mman.h> 64#include <asm/mman.h>
64#include <asm/types.h> 65#include <asm/types.h>
65#include <asm/uaccess.h> 66#include <asm/uaccess.h>
@@ -389,7 +390,9 @@ sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
389 } 390 }
390 } 391 }
391 set_fs (KERNEL_DS); 392 set_fs (KERNEL_DS);
392 ret = sys_rt_sigprocmask(how, set ? &s : NULL, oset ? &s : NULL, 393 ret = sys_rt_sigprocmask(how,
394 set ? (sigset_t __user *)&s : NULL,
395 oset ? (sigset_t __user *)&s : NULL,
393 sigsetsize); 396 sigsetsize);
394 set_fs (old_fs); 397 set_fs (old_fs);
395 if (ret) return ret; 398 if (ret) return ret;
@@ -541,7 +544,7 @@ sys32_sysinfo(struct sysinfo32 __user *info)
541 int bitcount = 0; 544 int bitcount = 0;
542 545
543 set_fs (KERNEL_DS); 546 set_fs (KERNEL_DS);
544 ret = sys_sysinfo(&s); 547 ret = sys_sysinfo((struct sysinfo __user *)&s);
545 set_fs (old_fs); 548 set_fs (old_fs);
546 549
547 /* Check to see if any memory value is too large for 32-bit and scale 550 /* Check to see if any memory value is too large for 32-bit and scale
@@ -589,7 +592,7 @@ sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *int
589 mm_segment_t old_fs = get_fs (); 592 mm_segment_t old_fs = get_fs ();
590 593
591 set_fs (KERNEL_DS); 594 set_fs (KERNEL_DS);
592 ret = sys_sched_rr_get_interval(pid, &t); 595 ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
593 set_fs (old_fs); 596 set_fs (old_fs);
594 if (put_compat_timespec(&t, interval)) 597 if (put_compat_timespec(&t, interval))
595 return -EFAULT; 598 return -EFAULT;
@@ -605,7 +608,7 @@ sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize)
605 mm_segment_t old_fs = get_fs(); 608 mm_segment_t old_fs = get_fs();
606 609
607 set_fs (KERNEL_DS); 610 set_fs (KERNEL_DS);
608 ret = sys_rt_sigpending(&s, sigsetsize); 611 ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize);
609 set_fs (old_fs); 612 set_fs (old_fs);
610 if (!ret) { 613 if (!ret) {
611 switch (_NSIG_WORDS) { 614 switch (_NSIG_WORDS) {
@@ -630,7 +633,7 @@ sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo)
630 if (copy_siginfo_from_user32(&info, uinfo)) 633 if (copy_siginfo_from_user32(&info, uinfo))
631 return -EFAULT; 634 return -EFAULT;
632 set_fs (KERNEL_DS); 635 set_fs (KERNEL_DS);
633 ret = sys_rt_sigqueueinfo(pid, sig, &info); 636 ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info);
634 set_fs (old_fs); 637 set_fs (old_fs);
635 return ret; 638 return ret;
636} 639}
@@ -666,9 +669,6 @@ sys32_sysctl(struct sysctl_ia32 __user *args32)
666 size_t oldlen; 669 size_t oldlen;
667 int __user *namep; 670 int __user *namep;
668 long ret; 671 long ret;
669 extern int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp,
670 void *newval, size_t newlen);
671
672 672
673 if (copy_from_user(&a32, args32, sizeof (a32))) 673 if (copy_from_user(&a32, args32, sizeof (a32)))
674 return -EFAULT; 674 return -EFAULT;
@@ -692,7 +692,8 @@ sys32_sysctl(struct sysctl_ia32 __user *args32)
692 692
693 set_fs(KERNEL_DS); 693 set_fs(KERNEL_DS);
694 lock_kernel(); 694 lock_kernel();
695 ret = do_sysctl(namep, a32.nlen, oldvalp, &oldlen, newvalp, (size_t) a32.newlen); 695 ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *)&oldlen,
696 newvalp, (size_t) a32.newlen);
696 unlock_kernel(); 697 unlock_kernel();
697 set_fs(old_fs); 698 set_fs(old_fs);
698 699
@@ -743,7 +744,8 @@ sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count)
743 return -EFAULT; 744 return -EFAULT;
744 745
745 set_fs(KERNEL_DS); 746 set_fs(KERNEL_DS);
746 ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count); 747 ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL,
748 count);
747 set_fs(old_fs); 749 set_fs(old_fs);
748 750
749 if (offset && put_user(of, offset)) 751 if (offset && put_user(of, offset))
@@ -778,7 +780,7 @@ asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
778 780
779asmlinkage long sys32_olduname(struct oldold_utsname __user * name) 781asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
780{ 782{
781 int error; 783 int err;
782 784
783 if (!name) 785 if (!name)
784 return -EFAULT; 786 return -EFAULT;
@@ -787,27 +789,31 @@ asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
787 789
788 down_read(&uts_sem); 790 down_read(&uts_sem);
789 791
790 error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); 792 err = __copy_to_user(&name->sysname,&system_utsname.sysname,
791 __put_user(0,name->sysname+__OLD_UTS_LEN); 793 __OLD_UTS_LEN);
792 __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); 794 err |= __put_user(0,name->sysname+__OLD_UTS_LEN);
793 __put_user(0,name->nodename+__OLD_UTS_LEN); 795 err |= __copy_to_user(&name->nodename,&system_utsname.nodename,
794 __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); 796 __OLD_UTS_LEN);
795 __put_user(0,name->release+__OLD_UTS_LEN); 797 err |= __put_user(0,name->nodename+__OLD_UTS_LEN);
796 __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); 798 err |= __copy_to_user(&name->release,&system_utsname.release,
797 __put_user(0,name->version+__OLD_UTS_LEN); 799 __OLD_UTS_LEN);
800 err |= __put_user(0,name->release+__OLD_UTS_LEN);
801 err |= __copy_to_user(&name->version,&system_utsname.version,
802 __OLD_UTS_LEN);
803 err |= __put_user(0,name->version+__OLD_UTS_LEN);
798 { 804 {
799 char *arch = "x86_64"; 805 char *arch = "x86_64";
800 if (personality(current->personality) == PER_LINUX32) 806 if (personality(current->personality) == PER_LINUX32)
801 arch = "i686"; 807 arch = "i686";
802 808
803 __copy_to_user(&name->machine,arch,strlen(arch)+1); 809 err |= __copy_to_user(&name->machine,arch,strlen(arch)+1);
804 } 810 }
805 811
806 up_read(&uts_sem); 812 up_read(&uts_sem);
807 813
808 error = error ? -EFAULT : 0; 814 err = err ? -EFAULT : 0;
809 815
810 return error; 816 return err;
811} 817}
812 818
813long sys32_uname(struct old_utsname __user * name) 819long sys32_uname(struct old_utsname __user * name)
@@ -831,7 +837,7 @@ long sys32_ustat(unsigned dev, struct ustat32 __user *u32p)
831 837
832 seg = get_fs(); 838 seg = get_fs();
833 set_fs(KERNEL_DS); 839 set_fs(KERNEL_DS);
834 ret = sys_ustat(dev,&u); 840 ret = sys_ustat(dev, (struct ustat __user *)&u);
835 set_fs(seg); 841 set_fs(seg);
836 if (ret >= 0) { 842 if (ret >= 0) {
837 if (!access_ok(VERIFY_WRITE,u32p,sizeof(struct ustat32)) || 843 if (!access_ok(VERIFY_WRITE,u32p,sizeof(struct ustat32)) ||
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index b5aaeafc1cd3..3c7cbff04d3d 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -11,7 +11,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
11 pci-dma.o pci-nommu.o alternative.o 11 pci-dma.o pci-nommu.o alternative.o
12 12
13obj-$(CONFIG_STACKTRACE) += stacktrace.o 13obj-$(CONFIG_STACKTRACE) += stacktrace.o
14obj-$(CONFIG_X86_MCE) += mce.o 14obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o
15obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 15obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
16obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o 16obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
17obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ 17obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/
@@ -20,8 +20,8 @@ obj-$(CONFIG_X86_MSR) += msr.o
20obj-$(CONFIG_MICROCODE) += microcode.o 20obj-$(CONFIG_MICROCODE) += microcode.o
21obj-$(CONFIG_X86_CPUID) += cpuid.o 21obj-$(CONFIG_X86_CPUID) += cpuid.o
22obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o 22obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o
23obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o 23obj-y += apic.o nmi.o
24obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o \ 24obj-y += io_apic.o mpparse.o \
25 genapic.o genapic_cluster.o genapic_flat.o 25 genapic.o genapic_cluster.o genapic_flat.o
26obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o 26obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
27obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 27obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
@@ -39,12 +39,14 @@ obj-$(CONFIG_K8_NB) += k8.o
39obj-$(CONFIG_AUDIT) += audit.o 39obj-$(CONFIG_AUDIT) += audit.o
40 40
41obj-$(CONFIG_MODULES) += module.o 41obj-$(CONFIG_MODULES) += module.o
42obj-$(CONFIG_PCI) += early-quirks.o
42 43
43obj-y += topology.o 44obj-y += topology.o
44obj-y += intel_cacheinfo.o 45obj-y += intel_cacheinfo.o
45 46
46CFLAGS_vsyscall.o := $(PROFILING) -g0 47CFLAGS_vsyscall.o := $(PROFILING) -g0
47 48
49therm_throt-y += ../../i386/kernel/cpu/mcheck/therm_throt.o
48bootflag-y += ../../i386/kernel/bootflag.o 50bootflag-y += ../../i386/kernel/bootflag.o
49cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o 51cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o
50topology-y += ../../i386/kernel/topology.o 52topology-y += ../../i386/kernel/topology.o
@@ -54,4 +56,3 @@ quirks-y += ../../i386/kernel/quirks.o
54i8237-y += ../../i386/kernel/i8237.o 56i8237-y += ../../i386/kernel/i8237.o
55msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o 57msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
56alternative-y += ../../i386/kernel/alternative.o 58alternative-y += ../../i386/kernel/alternative.o
57
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index 58af8e73738b..b487396c4c5b 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -17,6 +17,7 @@
17#include <linux/pci_ids.h> 17#include <linux/pci_ids.h>
18#include <linux/pci.h> 18#include <linux/pci.h>
19#include <linux/bitops.h> 19#include <linux/bitops.h>
20#include <linux/ioport.h>
20#include <asm/e820.h> 21#include <asm/e820.h>
21#include <asm/io.h> 22#include <asm/io.h>
22#include <asm/proto.h> 23#include <asm/proto.h>
@@ -33,6 +34,18 @@ int fallback_aper_force __initdata = 0;
33 34
34int fix_aperture __initdata = 1; 35int fix_aperture __initdata = 1;
35 36
37static struct resource gart_resource = {
38 .name = "GART",
39 .flags = IORESOURCE_MEM,
40};
41
42static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
43{
44 gart_resource.start = aper_base;
45 gart_resource.end = aper_base + aper_size - 1;
46 insert_resource(&iomem_resource, &gart_resource);
47}
48
36/* This code runs before the PCI subsystem is initialized, so just 49/* This code runs before the PCI subsystem is initialized, so just
37 access the northbridge directly. */ 50 access the northbridge directly. */
38 51
@@ -48,7 +61,7 @@ static u32 __init allocate_aperture(void)
48 61
49 /* 62 /*
50 * Aperture has to be naturally aligned. This means an 2GB aperture won't 63 * Aperture has to be naturally aligned. This means an 2GB aperture won't
51 * have much chances to find a place in the lower 4GB of memory. 64 * have much chance of finding a place in the lower 4GB of memory.
52 * Unfortunately we cannot move it up because that would make the 65 * Unfortunately we cannot move it up because that would make the
53 * IOMMU useless. 66 * IOMMU useless.
54 */ 67 */
@@ -62,6 +75,7 @@ static u32 __init allocate_aperture(void)
62 } 75 }
63 printk("Mapping aperture over %d KB of RAM @ %lx\n", 76 printk("Mapping aperture over %d KB of RAM @ %lx\n",
64 aper_size >> 10, __pa(p)); 77 aper_size >> 10, __pa(p));
78 insert_aperture_resource((u32)__pa(p), aper_size);
65 return (u32)__pa(p); 79 return (u32)__pa(p);
66} 80}
67 81
@@ -198,7 +212,7 @@ void __init iommu_hole_init(void)
198 u64 aper_base, last_aper_base = 0; 212 u64 aper_base, last_aper_base = 0;
199 int valid_agp = 0; 213 int valid_agp = 0;
200 214
201 if (iommu_aperture_disabled || !fix_aperture) 215 if (iommu_aperture_disabled || !fix_aperture || !early_pci_allowed())
202 return; 216 return;
203 217
204 printk("Checking aperture...\n"); 218 printk("Checking aperture...\n");
@@ -233,8 +247,13 @@ void __init iommu_hole_init(void)
233 last_aper_base = aper_base; 247 last_aper_base = aper_base;
234 } 248 }
235 249
236 if (!fix && !fallback_aper_force) 250 if (!fix && !fallback_aper_force) {
251 if (last_aper_base) {
252 unsigned long n = (32 * 1024 * 1024) << last_aper_order;
253 insert_aperture_resource((u32)last_aper_base, n);
254 }
237 return; 255 return;
256 }
238 257
239 if (!fallback_aper_force) 258 if (!fallback_aper_force)
240 aper_alloc = search_agp_bridge(&aper_order, &valid_agp); 259 aper_alloc = search_agp_bridge(&aper_order, &valid_agp);
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 2b8cef037a65..135ff25e6b44 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -25,6 +25,7 @@
25#include <linux/kernel_stat.h> 25#include <linux/kernel_stat.h>
26#include <linux/sysdev.h> 26#include <linux/sysdev.h>
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/ioport.h>
28 29
29#include <asm/atomic.h> 30#include <asm/atomic.h>
30#include <asm/smp.h> 31#include <asm/smp.h>
@@ -36,13 +37,20 @@
36#include <asm/idle.h> 37#include <asm/idle.h>
37#include <asm/proto.h> 38#include <asm/proto.h>
38#include <asm/timex.h> 39#include <asm/timex.h>
40#include <asm/apic.h>
39 41
42int apic_mapped;
40int apic_verbosity; 43int apic_verbosity;
41int apic_runs_main_timer; 44int apic_runs_main_timer;
42int apic_calibrate_pmtmr __initdata; 45int apic_calibrate_pmtmr __initdata;
43 46
44int disable_apic_timer __initdata; 47int disable_apic_timer __initdata;
45 48
49static struct resource lapic_resource = {
50 .name = "Local APIC",
51 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
52};
53
46/* 54/*
47 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as 55 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
48 * IPIs in place of local APIC timers 56 * IPIs in place of local APIC timers
@@ -136,72 +144,40 @@ void clear_local_APIC(void)
136 apic_read(APIC_ESR); 144 apic_read(APIC_ESR);
137} 145}
138 146
139void __init connect_bsp_APIC(void)
140{
141 if (pic_mode) {
142 /*
143 * Do not trust the local APIC being empty at bootup.
144 */
145 clear_local_APIC();
146 /*
147 * PIC mode, enable APIC mode in the IMCR, i.e.
148 * connect BSP's local APIC to INT and NMI lines.
149 */
150 apic_printk(APIC_VERBOSE, "leaving PIC mode, enabling APIC mode.\n");
151 outb(0x70, 0x22);
152 outb(0x01, 0x23);
153 }
154}
155
156void disconnect_bsp_APIC(int virt_wire_setup) 147void disconnect_bsp_APIC(int virt_wire_setup)
157{ 148{
158 if (pic_mode) { 149 /* Go back to Virtual Wire compatibility mode */
159 /* 150 unsigned long value;
160 * Put the board back into PIC mode (has an effect 151
161 * only on certain older boards). Note that APIC 152 /* For the spurious interrupt use vector F, and enable it */
162 * interrupts, including IPIs, won't work beyond 153 value = apic_read(APIC_SPIV);
163 * this point! The only exception are INIT IPIs. 154 value &= ~APIC_VECTOR_MASK;
164 */ 155 value |= APIC_SPIV_APIC_ENABLED;
165 apic_printk(APIC_QUIET, "disabling APIC mode, entering PIC mode.\n"); 156 value |= 0xf;
166 outb(0x70, 0x22); 157 apic_write(APIC_SPIV, value);
167 outb(0x00, 0x23);
168 }
169 else {
170 /* Go back to Virtual Wire compatibility mode */
171 unsigned long value;
172
173 /* For the spurious interrupt use vector F, and enable it */
174 value = apic_read(APIC_SPIV);
175 value &= ~APIC_VECTOR_MASK;
176 value |= APIC_SPIV_APIC_ENABLED;
177 value |= 0xf;
178 apic_write(APIC_SPIV, value);
179
180 if (!virt_wire_setup) {
181 /* For LVT0 make it edge triggered, active high, external and enabled */
182 value = apic_read(APIC_LVT0);
183 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
184 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
185 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
186 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
187 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
188 apic_write(APIC_LVT0, value);
189 }
190 else {
191 /* Disable LVT0 */
192 apic_write(APIC_LVT0, APIC_LVT_MASKED);
193 }
194 158
195 /* For LVT1 make it edge triggered, active high, nmi and enabled */ 159 if (!virt_wire_setup) {
196 value = apic_read(APIC_LVT1); 160 /* For LVT0 make it edge triggered, active high, external and enabled */
197 value &= ~( 161 value = apic_read(APIC_LVT0);
198 APIC_MODE_MASK | APIC_SEND_PENDING | 162 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
199 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 163 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
200 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); 164 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
201 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; 165 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
202 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); 166 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
203 apic_write(APIC_LVT1, value); 167 apic_write(APIC_LVT0, value);
168 } else {
169 /* Disable LVT0 */
170 apic_write(APIC_LVT0, APIC_LVT_MASKED);
204 } 171 }
172
173 /* For LVT1 make it edge triggered, active high, nmi and enabled */
174 value = apic_read(APIC_LVT1);
175 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
176 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
177 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
178 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
179 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
180 apic_write(APIC_LVT1, value);
205} 181}
206 182
207void disable_local_APIC(void) 183void disable_local_APIC(void)
@@ -297,8 +273,6 @@ void __init sync_Arb_IDs(void)
297 | APIC_DM_INIT); 273 | APIC_DM_INIT);
298} 274}
299 275
300extern void __error_in_apic_c (void);
301
302/* 276/*
303 * An initial setup of the virtual wire mode. 277 * An initial setup of the virtual wire mode.
304 */ 278 */
@@ -345,8 +319,7 @@ void __cpuinit setup_local_APIC (void)
345 319
346 value = apic_read(APIC_LVR); 320 value = apic_read(APIC_LVR);
347 321
348 if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) 322 BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
349 __error_in_apic_c();
350 323
351 /* 324 /*
352 * Double-check whether this APIC is really registered. 325 * Double-check whether this APIC is really registered.
@@ -399,32 +372,8 @@ void __cpuinit setup_local_APIC (void)
399 */ 372 */
400 value |= APIC_SPIV_APIC_ENABLED; 373 value |= APIC_SPIV_APIC_ENABLED;
401 374
402 /* 375 /* We always use processor focus */
403 * Some unknown Intel IO/APIC (or APIC) errata is biting us with 376
404 * certain networking cards. If high frequency interrupts are
405 * happening on a particular IOAPIC pin, plus the IOAPIC routing
406 * entry is masked/unmasked at a high rate as well then sooner or
407 * later IOAPIC line gets 'stuck', no more interrupts are received
408 * from the device. If focus CPU is disabled then the hang goes
409 * away, oh well :-(
410 *
411 * [ This bug can be reproduced easily with a level-triggered
412 * PCI Ne2000 networking cards and PII/PIII processors, dual
413 * BX chipset. ]
414 */
415 /*
416 * Actually disabling the focus CPU check just makes the hang less
417 * frequent as it makes the interrupt distributon model be more
418 * like LRU than MRU (the short-term load is more even across CPUs).
419 * See also the comment in end_level_ioapic_irq(). --macro
420 */
421#if 1
422 /* Enable focus processor (bit==0) */
423 value &= ~APIC_SPIV_FOCUS_DISABLED;
424#else
425 /* Disable focus processor (bit==1) */
426 value |= APIC_SPIV_FOCUS_DISABLED;
427#endif
428 /* 377 /*
429 * Set spurious IRQ vector 378 * Set spurious IRQ vector
430 */ 379 */
@@ -442,7 +391,7 @@ void __cpuinit setup_local_APIC (void)
442 * TODO: set up through-local-APIC from through-I/O-APIC? --macro 391 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
443 */ 392 */
444 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; 393 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
445 if (!smp_processor_id() && (pic_mode || !value)) { 394 if (!smp_processor_id() && !value) {
446 value = APIC_DM_EXTINT; 395 value = APIC_DM_EXTINT;
447 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", smp_processor_id()); 396 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", smp_processor_id());
448 } else { 397 } else {
@@ -479,8 +428,7 @@ void __cpuinit setup_local_APIC (void)
479 } 428 }
480 429
481 nmi_watchdog_default(); 430 nmi_watchdog_default();
482 if (nmi_watchdog == NMI_LOCAL_APIC) 431 setup_apic_nmi_watchdog(NULL);
483 setup_apic_nmi_watchdog();
484 apic_pm_activate(); 432 apic_pm_activate();
485} 433}
486 434
@@ -527,8 +475,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
527 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 475 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
528 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 476 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
529 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 477 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
530 local_save_flags(flags); 478 local_irq_save(flags);
531 local_irq_disable();
532 disable_local_APIC(); 479 disable_local_APIC();
533 local_irq_restore(flags); 480 local_irq_restore(flags);
534 return 0; 481 return 0;
@@ -606,18 +553,24 @@ static void apic_pm_activate(void) { }
606 553
607static int __init apic_set_verbosity(char *str) 554static int __init apic_set_verbosity(char *str)
608{ 555{
556 if (str == NULL) {
557 skip_ioapic_setup = 0;
558 ioapic_force = 1;
559 return 0;
560 }
609 if (strcmp("debug", str) == 0) 561 if (strcmp("debug", str) == 0)
610 apic_verbosity = APIC_DEBUG; 562 apic_verbosity = APIC_DEBUG;
611 else if (strcmp("verbose", str) == 0) 563 else if (strcmp("verbose", str) == 0)
612 apic_verbosity = APIC_VERBOSE; 564 apic_verbosity = APIC_VERBOSE;
613 else 565 else {
614 printk(KERN_WARNING "APIC Verbosity level %s not recognised" 566 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
615 " use apic=verbose or apic=debug", str); 567 " use apic=verbose or apic=debug\n", str);
568 return -EINVAL;
569 }
616 570
617 return 1; 571 return 0;
618} 572}
619 573early_param("apic", apic_set_verbosity);
620__setup("apic=", apic_set_verbosity);
621 574
622/* 575/*
623 * Detect and enable local APICs on non-SMP boards. 576 * Detect and enable local APICs on non-SMP boards.
@@ -638,6 +591,40 @@ static int __init detect_init_APIC (void)
638 return 0; 591 return 0;
639} 592}
640 593
594#ifdef CONFIG_X86_IO_APIC
595static struct resource * __init ioapic_setup_resources(void)
596{
597#define IOAPIC_RESOURCE_NAME_SIZE 11
598 unsigned long n;
599 struct resource *res;
600 char *mem;
601 int i;
602
603 if (nr_ioapics <= 0)
604 return NULL;
605
606 n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
607 n *= nr_ioapics;
608
609 res = alloc_bootmem(n);
610
611 if (!res)
612 return NULL;
613
614 memset(res, 0, n);
615 mem = (void *)&res[nr_ioapics];
616
617 for (i = 0; i < nr_ioapics; i++) {
618 res[i].name = mem;
619 res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
620 snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
621 mem += IOAPIC_RESOURCE_NAME_SIZE;
622 }
623
624 return res;
625}
626#endif
627
641void __init init_apic_mappings(void) 628void __init init_apic_mappings(void)
642{ 629{
643 unsigned long apic_phys; 630 unsigned long apic_phys;
@@ -654,19 +641,26 @@ void __init init_apic_mappings(void)
654 apic_phys = mp_lapic_addr; 641 apic_phys = mp_lapic_addr;
655 642
656 set_fixmap_nocache(FIX_APIC_BASE, apic_phys); 643 set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
644 apic_mapped = 1;
657 apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); 645 apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys);
658 646
647 /* Put local APIC into the resource map. */
648 lapic_resource.start = apic_phys;
649 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
650 insert_resource(&iomem_resource, &lapic_resource);
651
659 /* 652 /*
660 * Fetch the APIC ID of the BSP in case we have a 653 * Fetch the APIC ID of the BSP in case we have a
661 * default configuration (or the MP table is broken). 654 * default configuration (or the MP table is broken).
662 */ 655 */
663 boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); 656 boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
664 657
665#ifdef CONFIG_X86_IO_APIC
666 { 658 {
667 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 659 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
668 int i; 660 int i;
661 struct resource *ioapic_res;
669 662
663 ioapic_res = ioapic_setup_resources();
670 for (i = 0; i < nr_ioapics; i++) { 664 for (i = 0; i < nr_ioapics; i++) {
671 if (smp_found_config) { 665 if (smp_found_config) {
672 ioapic_phys = mp_ioapics[i].mpc_apicaddr; 666 ioapic_phys = mp_ioapics[i].mpc_apicaddr;
@@ -678,9 +672,15 @@ void __init init_apic_mappings(void)
678 apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n", 672 apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n",
679 __fix_to_virt(idx), ioapic_phys); 673 __fix_to_virt(idx), ioapic_phys);
680 idx++; 674 idx++;
675
676 if (ioapic_res) {
677 ioapic_res->start = ioapic_phys;
678 ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
679 insert_resource(&iomem_resource, ioapic_res);
680 ioapic_res++;
681 }
681 } 682 }
682 } 683 }
683#endif
684} 684}
685 685
686/* 686/*
@@ -951,7 +951,7 @@ void smp_local_timer_interrupt(struct pt_regs *regs)
951 * We take the 'long' return path, and there every subsystem 951 * We take the 'long' return path, and there every subsystem
952 * grabs the appropriate locks (kernel lock/ irq lock). 952 * grabs the appropriate locks (kernel lock/ irq lock).
953 * 953 *
954 * we might want to decouple profiling from the 'long path', 954 * We might want to decouple profiling from the 'long path',
955 * and do the profiling totally in assembly. 955 * and do the profiling totally in assembly.
956 * 956 *
957 * Currently this isn't too much of an issue (performance wise), 957 * Currently this isn't too much of an issue (performance wise),
@@ -1123,19 +1123,15 @@ int __init APIC_init_uniprocessor (void)
1123 1123
1124 verify_local_APIC(); 1124 verify_local_APIC();
1125 1125
1126 connect_bsp_APIC();
1127
1128 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); 1126 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
1129 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id)); 1127 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id));
1130 1128
1131 setup_local_APIC(); 1129 setup_local_APIC();
1132 1130
1133#ifdef CONFIG_X86_IO_APIC
1134 if (smp_found_config && !skip_ioapic_setup && nr_ioapics) 1131 if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
1135 setup_IO_APIC(); 1132 setup_IO_APIC();
1136 else 1133 else
1137 nr_ioapics = 0; 1134 nr_ioapics = 0;
1138#endif
1139 setup_boot_APIC_clock(); 1135 setup_boot_APIC_clock();
1140 check_nmi_watchdog(); 1136 check_nmi_watchdog();
1141 return 0; 1137 return 0;
@@ -1144,14 +1140,17 @@ int __init APIC_init_uniprocessor (void)
1144static __init int setup_disableapic(char *str) 1140static __init int setup_disableapic(char *str)
1145{ 1141{
1146 disable_apic = 1; 1142 disable_apic = 1;
1147 return 1; 1143 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
1148} 1144 return 0;
1145}
1146early_param("disableapic", setup_disableapic);
1149 1147
1148/* same as disableapic, for compatibility */
1150static __init int setup_nolapic(char *str) 1149static __init int setup_nolapic(char *str)
1151{ 1150{
1152 disable_apic = 1; 1151 return setup_disableapic(str);
1153 return 1;
1154} 1152}
1153early_param("nolapic", setup_nolapic);
1155 1154
1156static __init int setup_noapictimer(char *str) 1155static __init int setup_noapictimer(char *str)
1157{ 1156{
@@ -1184,11 +1183,5 @@ static __init int setup_apicpmtimer(char *s)
1184} 1183}
1185__setup("apicpmtimer", setup_apicpmtimer); 1184__setup("apicpmtimer", setup_apicpmtimer);
1186 1185
1187/* dummy parsing: see setup.c */
1188
1189__setup("disableapic", setup_disableapic);
1190__setup("nolapic", setup_nolapic); /* same as disableapic, for compatibility */
1191
1192__setup("noapictimer", setup_noapictimer); 1186__setup("noapictimer", setup_noapictimer);
1193 1187
1194/* no "lapic" flag - we only use the lapic when the BIOS tells us so. */
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c
index d8d5750d6106..3525f884af82 100644
--- a/arch/x86_64/kernel/crash.c
+++ b/arch/x86_64/kernel/crash.c
@@ -23,6 +23,7 @@
23#include <asm/nmi.h> 23#include <asm/nmi.h>
24#include <asm/hw_irq.h> 24#include <asm/hw_irq.h>
25#include <asm/mach_apic.h> 25#include <asm/mach_apic.h>
26#include <asm/kdebug.h>
26 27
27/* This keeps a track of which one is crashing cpu. */ 28/* This keeps a track of which one is crashing cpu. */
28static int crashing_cpu; 29static int crashing_cpu;
@@ -68,7 +69,7 @@ static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
68 * for the data I pass, and I need tags 69 * for the data I pass, and I need tags
69 * on the data to indicate what information I have 70 * on the data to indicate what information I have
70 * squirrelled away. ELF notes happen to provide 71 * squirrelled away. ELF notes happen to provide
71 * all of that that no need to invent something new. 72 * all of that, no need to invent something new.
72 */ 73 */
73 74
74 buf = (u32*)per_cpu_ptr(crash_notes, cpu); 75 buf = (u32*)per_cpu_ptr(crash_notes, cpu);
@@ -95,15 +96,25 @@ static void crash_save_self(struct pt_regs *regs)
95#ifdef CONFIG_SMP 96#ifdef CONFIG_SMP
96static atomic_t waiting_for_crash_ipi; 97static atomic_t waiting_for_crash_ipi;
97 98
98static int crash_nmi_callback(struct pt_regs *regs, int cpu) 99static int crash_nmi_callback(struct notifier_block *self,
100 unsigned long val, void *data)
99{ 101{
102 struct pt_regs *regs;
103 int cpu;
104
105 if (val != DIE_NMI_IPI)
106 return NOTIFY_OK;
107
108 regs = ((struct die_args *)data)->regs;
109 cpu = raw_smp_processor_id();
110
100 /* 111 /*
101 * Don't do anything if this handler is invoked on crashing cpu. 112 * Don't do anything if this handler is invoked on crashing cpu.
102 * Otherwise, system will completely hang. Crashing cpu can get 113 * Otherwise, system will completely hang. Crashing cpu can get
103 * an NMI if system was initially booted with nmi_watchdog parameter. 114 * an NMI if system was initially booted with nmi_watchdog parameter.
104 */ 115 */
105 if (cpu == crashing_cpu) 116 if (cpu == crashing_cpu)
106 return 1; 117 return NOTIFY_STOP;
107 local_irq_disable(); 118 local_irq_disable();
108 119
109 crash_save_this_cpu(regs, cpu); 120 crash_save_this_cpu(regs, cpu);
@@ -127,12 +138,17 @@ static void smp_send_nmi_allbutself(void)
127 * cpu hotplug shouldn't matter. 138 * cpu hotplug shouldn't matter.
128 */ 139 */
129 140
141static struct notifier_block crash_nmi_nb = {
142 .notifier_call = crash_nmi_callback,
143};
144
130static void nmi_shootdown_cpus(void) 145static void nmi_shootdown_cpus(void)
131{ 146{
132 unsigned long msecs; 147 unsigned long msecs;
133 148
134 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); 149 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
135 set_nmi_callback(crash_nmi_callback); 150 if (register_die_notifier(&crash_nmi_nb))
151 return; /* return what? */
136 152
137 /* 153 /*
138 * Ensure the new callback function is set before sending 154 * Ensure the new callback function is set before sending
@@ -178,9 +194,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
178 if(cpu_has_apic) 194 if(cpu_has_apic)
179 disable_local_APIC(); 195 disable_local_APIC();
180 196
181#if defined(CONFIG_X86_IO_APIC)
182 disable_IO_APIC(); 197 disable_IO_APIC();
183#endif
184 198
185 crash_save_self(regs); 199 crash_save_self(regs);
186} 200}
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 708a3cd9a27e..c0af3828df45 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -25,6 +25,8 @@
25#include <asm/bootsetup.h> 25#include <asm/bootsetup.h>
26#include <asm/sections.h> 26#include <asm/sections.h>
27 27
28struct e820map e820 __initdata;
29
28/* 30/*
29 * PFN of last memory page. 31 * PFN of last memory page.
30 */ 32 */
@@ -41,7 +43,7 @@ unsigned long end_pfn_map;
41/* 43/*
42 * Last pfn which the user wants to use. 44 * Last pfn which the user wants to use.
43 */ 45 */
44unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT; 46static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
45 47
46extern struct resource code_resource, data_resource; 48extern struct resource code_resource, data_resource;
47 49
@@ -70,12 +72,7 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
70 return 1; 72 return 1;
71 } 73 }
72#endif 74#endif
73 /* kernel code + 640k memory hole (later should not be needed, but 75 /* kernel code */
74 be paranoid for now) */
75 if (last >= 640*1024 && addr < 1024*1024) {
76 *addrp = 1024*1024;
77 return 1;
78 }
79 if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) { 76 if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) {
80 *addrp = __pa_symbol(&_end); 77 *addrp = __pa_symbol(&_end);
81 return 1; 78 return 1;
@@ -565,13 +562,6 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
565 * If we're lucky and live on a modern system, the setup code 562 * If we're lucky and live on a modern system, the setup code
566 * will have given us a memory map that we can use to properly 563 * will have given us a memory map that we can use to properly
567 * set up memory. If we aren't, we'll fake a memory map. 564 * set up memory. If we aren't, we'll fake a memory map.
568 *
569 * We check to see that the memory map contains at least 2 elements
570 * before we'll use it, because the detection code in setup.S may
571 * not be perfect and most every PC known to man has two memory
572 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
573 * thinkpad 560x, for example, does not cooperate with the memory
574 * detection code.)
575 */ 565 */
576static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) 566static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
577{ 567{
@@ -589,34 +579,19 @@ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
589 if (start > end) 579 if (start > end)
590 return -1; 580 return -1;
591 581
592 /*
593 * Some BIOSes claim RAM in the 640k - 1M region.
594 * Not right. Fix it up.
595 *
596 * This should be removed on Hammer which is supposed to not
597 * have non e820 covered ISA mappings there, but I had some strange
598 * problems so it stays for now. -AK
599 */
600 if (type == E820_RAM) {
601 if (start < 0x100000ULL && end > 0xA0000ULL) {
602 if (start < 0xA0000ULL)
603 add_memory_region(start, 0xA0000ULL-start, type);
604 if (end <= 0x100000ULL)
605 continue;
606 start = 0x100000ULL;
607 size = end - start;
608 }
609 }
610
611 add_memory_region(start, size, type); 582 add_memory_region(start, size, type);
612 } while (biosmap++,--nr_map); 583 } while (biosmap++,--nr_map);
613 return 0; 584 return 0;
614} 585}
615 586
616void __init setup_memory_region(void) 587void early_panic(char *msg)
617{ 588{
618 char *who = "BIOS-e820"; 589 early_printk(msg);
590 panic(msg);
591}
619 592
593void __init setup_memory_region(void)
594{
620 /* 595 /*
621 * Try to copy the BIOS-supplied E820-map. 596 * Try to copy the BIOS-supplied E820-map.
622 * 597 *
@@ -624,51 +599,70 @@ void __init setup_memory_region(void)
624 * the next section from 1mb->appropriate_mem_k 599 * the next section from 1mb->appropriate_mem_k
625 */ 600 */
626 sanitize_e820_map(E820_MAP, &E820_MAP_NR); 601 sanitize_e820_map(E820_MAP, &E820_MAP_NR);
627 if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) { 602 if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
628 unsigned long mem_size; 603 early_panic("Cannot find a valid memory map");
629
630 /* compare results from other methods and take the greater */
631 if (ALT_MEM_K < EXT_MEM_K) {
632 mem_size = EXT_MEM_K;
633 who = "BIOS-88";
634 } else {
635 mem_size = ALT_MEM_K;
636 who = "BIOS-e801";
637 }
638
639 e820.nr_map = 0;
640 add_memory_region(0, LOWMEMSIZE(), E820_RAM);
641 add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
642 }
643 printk(KERN_INFO "BIOS-provided physical RAM map:\n"); 604 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
644 e820_print_map(who); 605 e820_print_map("BIOS-e820");
645} 606}
646 607
647void __init parse_memopt(char *p, char **from) 608static int __init parse_memopt(char *p)
648{ 609{
649 end_user_pfn = memparse(p, from); 610 if (!p)
611 return -EINVAL;
612 end_user_pfn = memparse(p, &p);
650 end_user_pfn >>= PAGE_SHIFT; 613 end_user_pfn >>= PAGE_SHIFT;
614 return 0;
651} 615}
616early_param("mem", parse_memopt);
652 617
653void __init parse_memmapopt(char *p, char **from) 618static int userdef __initdata;
619
620static int __init parse_memmap_opt(char *p)
654{ 621{
622 char *oldp;
655 unsigned long long start_at, mem_size; 623 unsigned long long start_at, mem_size;
656 624
657 mem_size = memparse(p, from); 625 if (!strcmp(p, "exactmap")) {
658 p = *from; 626#ifdef CONFIG_CRASH_DUMP
627 /* If we are doing a crash dump, we
628 * still need to know the real mem
629 * size before original memory map is
630 * reset.
631 */
632 saved_max_pfn = e820_end_of_ram();
633#endif
634 end_pfn_map = 0;
635 e820.nr_map = 0;
636 userdef = 1;
637 return 0;
638 }
639
640 oldp = p;
641 mem_size = memparse(p, &p);
642 if (p == oldp)
643 return -EINVAL;
659 if (*p == '@') { 644 if (*p == '@') {
660 start_at = memparse(p+1, from); 645 start_at = memparse(p+1, &p);
661 add_memory_region(start_at, mem_size, E820_RAM); 646 add_memory_region(start_at, mem_size, E820_RAM);
662 } else if (*p == '#') { 647 } else if (*p == '#') {
663 start_at = memparse(p+1, from); 648 start_at = memparse(p+1, &p);
664 add_memory_region(start_at, mem_size, E820_ACPI); 649 add_memory_region(start_at, mem_size, E820_ACPI);
665 } else if (*p == '$') { 650 } else if (*p == '$') {
666 start_at = memparse(p+1, from); 651 start_at = memparse(p+1, &p);
667 add_memory_region(start_at, mem_size, E820_RESERVED); 652 add_memory_region(start_at, mem_size, E820_RESERVED);
668 } else { 653 } else {
669 end_user_pfn = (mem_size >> PAGE_SHIFT); 654 end_user_pfn = (mem_size >> PAGE_SHIFT);
670 } 655 }
671 p = *from; 656 return *p == '\0' ? 0 : -EINVAL;
657}
658early_param("memmap", parse_memmap_opt);
659
660void finish_e820_parsing(void)
661{
662 if (userdef) {
663 printk(KERN_INFO "user-defined physical RAM map:\n");
664 e820_print_map("user");
665 }
672} 666}
673 667
674unsigned long pci_mem_start = 0xaeedbabe; 668unsigned long pci_mem_start = 0xaeedbabe;
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
new file mode 100644
index 000000000000..208e38a372c1
--- /dev/null
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -0,0 +1,122 @@
1/* Various workarounds for chipset bugs.
2 This code runs very early and can't use the regular PCI subsystem
3 The entries are keyed to PCI bridges which usually identify chipsets
4 uniquely.
5 This is only for whole classes of chipsets with specific problems which
6 need early invasive action (e.g. before the timers are initialized).
7 Most PCI device specific workarounds can be done later and should be
8 in standard PCI quirks
9 Mainboard specific bugs should be handled by DMI entries.
10 CPU specific bugs in setup.c */
11
12#include <linux/pci.h>
13#include <linux/acpi.h>
14#include <linux/pci_ids.h>
15#include <asm/pci-direct.h>
16#include <asm/proto.h>
17#include <asm/dma.h>
18
19static void via_bugs(void)
20{
21#ifdef CONFIG_IOMMU
22 if ((end_pfn > MAX_DMA32_PFN || force_iommu) &&
23 !iommu_aperture_allowed) {
24 printk(KERN_INFO
25 "Looks like a VIA chipset. Disabling IOMMU. Override with iommu=allowed\n");
26 iommu_aperture_disabled = 1;
27 }
28#endif
29}
30
31#ifdef CONFIG_ACPI
32
33static int nvidia_hpet_detected __initdata;
34
35static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
36{
37 nvidia_hpet_detected = 1;
38 return 0;
39}
40#endif
41
42static void nvidia_bugs(void)
43{
44#ifdef CONFIG_ACPI
45 /*
46 * All timer overrides on Nvidia are
47 * wrong unless HPET is enabled.
48 */
49 nvidia_hpet_detected = 0;
50 acpi_table_parse(ACPI_HPET, nvidia_hpet_check);
51 if (nvidia_hpet_detected == 0) {
52 acpi_skip_timer_override = 1;
53 printk(KERN_INFO "Nvidia board "
54 "detected. Ignoring ACPI "
55 "timer override.\n");
56 }
57#endif
58 /* RED-PEN skip them on mptables too? */
59
60}
61
62static void ati_bugs(void)
63{
64#if 1 /* for testing */
65 printk("ATI board detected\n");
66#endif
67 /* No bugs right now */
68}
69
70struct chipset {
71 u16 vendor;
72 void (*f)(void);
73};
74
75static struct chipset early_qrk[] = {
76 { PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
77 { PCI_VENDOR_ID_VIA, via_bugs },
78 { PCI_VENDOR_ID_ATI, ati_bugs },
79 {}
80};
81
82void __init early_quirks(void)
83{
84 int num, slot, func;
85
86 if (!early_pci_allowed())
87 return;
88
89 /* Poor man's PCI discovery */
90 for (num = 0; num < 32; num++) {
91 for (slot = 0; slot < 32; slot++) {
92 for (func = 0; func < 8; func++) {
93 u32 class;
94 u32 vendor;
95 u8 type;
96 int i;
97 class = read_pci_config(num,slot,func,
98 PCI_CLASS_REVISION);
99 if (class == 0xffffffff)
100 break;
101
102 if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
103 continue;
104
105 vendor = read_pci_config(num, slot, func,
106 PCI_VENDOR_ID);
107 vendor &= 0xffff;
108
109 for (i = 0; early_qrk[i].f; i++)
110 if (early_qrk[i].vendor == vendor) {
111 early_qrk[i].f();
112 return;
113 }
114
115 type = read_pci_config_byte(num, slot, func,
116 PCI_HEADER_TYPE);
117 if (!(type & 0x80))
118 break;
119 }
120 }
121 }
122}
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
index 140051e07fa6..e22ecd54870d 100644
--- a/arch/x86_64/kernel/early_printk.c
+++ b/arch/x86_64/kernel/early_printk.c
@@ -215,20 +215,16 @@ void early_printk(const char *fmt, ...)
215 215
216static int __initdata keep_early; 216static int __initdata keep_early;
217 217
218int __init setup_early_printk(char *opt) 218static int __init setup_early_printk(char *buf)
219{ 219{
220 char *space; 220 if (!buf)
221 char buf[256]; 221 return 0;
222 222
223 if (early_console_initialized) 223 if (early_console_initialized)
224 return 1; 224 return 0;
225 225 early_console_initialized = 1;
226 strlcpy(buf,opt,sizeof(buf));
227 space = strchr(buf, ' ');
228 if (space)
229 *space = 0;
230 226
231 if (strstr(buf,"keep")) 227 if (!strcmp(buf,"keep"))
232 keep_early = 1; 228 keep_early = 1;
233 229
234 if (!strncmp(buf, "serial", 6)) { 230 if (!strncmp(buf, "serial", 6)) {
@@ -248,11 +244,12 @@ int __init setup_early_printk(char *opt)
248 early_console = &simnow_console; 244 early_console = &simnow_console;
249 keep_early = 1; 245 keep_early = 1;
250 } 246 }
251 early_console_initialized = 1;
252 register_console(early_console); 247 register_console(early_console);
253 return 0; 248 return 0;
254} 249}
255 250
251early_param("earlyprintk", setup_early_printk);
252
256void __init disable_early_printk(void) 253void __init disable_early_printk(void)
257{ 254{
258 if (!early_console_initialized || !early_console) 255 if (!early_console_initialized || !early_console)
@@ -266,4 +263,3 @@ void __init disable_early_printk(void)
266 } 263 }
267} 264}
268 265
269__setup("earlyprintk=", setup_early_printk);
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index aa8d8939abc1..2802524104f3 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -4,8 +4,6 @@
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs 5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 *
8 * $Id$
9 */ 7 */
10 8
11/* 9/*
@@ -22,15 +20,25 @@
22 * at the top of the kernel process stack. 20 * at the top of the kernel process stack.
23 * - partial stack frame: partially saved registers upto R11. 21 * - partial stack frame: partially saved registers upto R11.
24 * - full stack frame: Like partial stack frame, but all register saved. 22 * - full stack frame: Like partial stack frame, but all register saved.
25 * 23 *
26 * TODO: 24 * Some macro usage:
27 * - schedule it carefully for the final hardware. 25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
28 */ 38 */
29 39
30#define ASSEMBLY 1
31#include <linux/linkage.h> 40#include <linux/linkage.h>
32#include <asm/segment.h> 41#include <asm/segment.h>
33#include <asm/smp.h>
34#include <asm/cache.h> 42#include <asm/cache.h>
35#include <asm/errno.h> 43#include <asm/errno.h>
36#include <asm/dwarf2.h> 44#include <asm/dwarf2.h>
@@ -115,6 +123,7 @@
115 .macro CFI_DEFAULT_STACK start=1 123 .macro CFI_DEFAULT_STACK start=1
116 .if \start 124 .if \start
117 CFI_STARTPROC simple 125 CFI_STARTPROC simple
126 CFI_SIGNAL_FRAME
118 CFI_DEF_CFA rsp,SS+8 127 CFI_DEF_CFA rsp,SS+8
119 .else 128 .else
120 CFI_DEF_CFA_OFFSET SS+8 129 CFI_DEF_CFA_OFFSET SS+8
@@ -146,6 +155,10 @@
146/* rdi: prev */ 155/* rdi: prev */
147ENTRY(ret_from_fork) 156ENTRY(ret_from_fork)
148 CFI_DEFAULT_STACK 157 CFI_DEFAULT_STACK
158 push kernel_eflags(%rip)
159 CFI_ADJUST_CFA_OFFSET 4
160 popf # reset kernel eflags
161 CFI_ADJUST_CFA_OFFSET -4
149 call schedule_tail 162 call schedule_tail
150 GET_THREAD_INFO(%rcx) 163 GET_THREAD_INFO(%rcx)
151 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) 164 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
@@ -199,6 +212,7 @@ END(ret_from_fork)
199 212
200ENTRY(system_call) 213ENTRY(system_call)
201 CFI_STARTPROC simple 214 CFI_STARTPROC simple
215 CFI_SIGNAL_FRAME
202 CFI_DEF_CFA rsp,PDA_STACKOFFSET 216 CFI_DEF_CFA rsp,PDA_STACKOFFSET
203 CFI_REGISTER rip,rcx 217 CFI_REGISTER rip,rcx
204 /*CFI_REGISTER rflags,r11*/ 218 /*CFI_REGISTER rflags,r11*/
@@ -316,6 +330,7 @@ END(system_call)
316 */ 330 */
317ENTRY(int_ret_from_sys_call) 331ENTRY(int_ret_from_sys_call)
318 CFI_STARTPROC simple 332 CFI_STARTPROC simple
333 CFI_SIGNAL_FRAME
319 CFI_DEF_CFA rsp,SS+8-ARGOFFSET 334 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
320 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ 335 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
321 CFI_REL_OFFSET rsp,RSP-ARGOFFSET 336 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
@@ -476,6 +491,7 @@ END(stub_rt_sigreturn)
476 */ 491 */
477 .macro _frame ref 492 .macro _frame ref
478 CFI_STARTPROC simple 493 CFI_STARTPROC simple
494 CFI_SIGNAL_FRAME
479 CFI_DEF_CFA rsp,SS+8-\ref 495 CFI_DEF_CFA rsp,SS+8-\ref
480 /*CFI_REL_OFFSET ss,SS-\ref*/ 496 /*CFI_REL_OFFSET ss,SS-\ref*/
481 CFI_REL_OFFSET rsp,RSP-\ref 497 CFI_REL_OFFSET rsp,RSP-\ref
@@ -511,7 +527,12 @@ END(stub_rt_sigreturn)
511 testl $3,CS(%rdi) 527 testl $3,CS(%rdi)
512 je 1f 528 je 1f
513 swapgs 529 swapgs
5141: incl %gs:pda_irqcount # RED-PEN should check preempt count 530 /* irqcount is used to check if a CPU is already on an interrupt
531 stack or not. While this is essentially redundant with preempt_count
532 it is a little cheaper to use a separate counter in the PDA
533 (short of moving irq_enter into assembly, which would be too
534 much work) */
5351: incl %gs:pda_irqcount
515 cmoveq %gs:pda_irqstackptr,%rsp 536 cmoveq %gs:pda_irqstackptr,%rsp
516 push %rbp # backlink for old unwinder 537 push %rbp # backlink for old unwinder
517 /* 538 /*
@@ -619,8 +640,7 @@ retint_signal:
619#ifdef CONFIG_PREEMPT 640#ifdef CONFIG_PREEMPT
620 /* Returning to kernel space. Check if we need preemption */ 641 /* Returning to kernel space. Check if we need preemption */
621 /* rcx: threadinfo. interrupts off. */ 642 /* rcx: threadinfo. interrupts off. */
622 .p2align 643ENTRY(retint_kernel)
623retint_kernel:
624 cmpl $0,threadinfo_preempt_count(%rcx) 644 cmpl $0,threadinfo_preempt_count(%rcx)
625 jnz retint_restore_args 645 jnz retint_restore_args
626 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) 646 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
@@ -679,7 +699,6 @@ ENTRY(call_function_interrupt)
679END(call_function_interrupt) 699END(call_function_interrupt)
680#endif 700#endif
681 701
682#ifdef CONFIG_X86_LOCAL_APIC
683ENTRY(apic_timer_interrupt) 702ENTRY(apic_timer_interrupt)
684 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 703 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
685END(apic_timer_interrupt) 704END(apic_timer_interrupt)
@@ -691,7 +710,6 @@ END(error_interrupt)
691ENTRY(spurious_interrupt) 710ENTRY(spurious_interrupt)
692 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt 711 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
693END(spurious_interrupt) 712END(spurious_interrupt)
694#endif
695 713
696/* 714/*
697 * Exception entry points. 715 * Exception entry points.
@@ -768,7 +786,9 @@ paranoid_exit\trace:
768 testl $3,CS(%rsp) 786 testl $3,CS(%rsp)
769 jnz paranoid_userspace\trace 787 jnz paranoid_userspace\trace
770paranoid_swapgs\trace: 788paranoid_swapgs\trace:
789 .if \trace
771 TRACE_IRQS_IRETQ 0 790 TRACE_IRQS_IRETQ 0
791 .endif
772 swapgs 792 swapgs
773paranoid_restore\trace: 793paranoid_restore\trace:
774 RESTORE_ALL 8 794 RESTORE_ALL 8
@@ -814,7 +834,7 @@ paranoid_schedule\trace:
814 * Exception entry point. This expects an error code/orig_rax on the stack 834 * Exception entry point. This expects an error code/orig_rax on the stack
815 * and the exception handler in %rax. 835 * and the exception handler in %rax.
816 */ 836 */
817ENTRY(error_entry) 837KPROBE_ENTRY(error_entry)
818 _frame RDI 838 _frame RDI
819 /* rdi slot contains rax, oldrax contains error code */ 839 /* rdi slot contains rax, oldrax contains error code */
820 cld 840 cld
@@ -898,7 +918,7 @@ error_kernelspace:
898 cmpq $gs_change,RIP(%rsp) 918 cmpq $gs_change,RIP(%rsp)
899 je error_swapgs 919 je error_swapgs
900 jmp error_sti 920 jmp error_sti
901END(error_entry) 921KPROBE_END(error_entry)
902 922
903 /* Reload gs selector with exception handling */ 923 /* Reload gs selector with exception handling */
904 /* edi: new selector */ 924 /* edi: new selector */
@@ -1020,8 +1040,7 @@ ENDPROC(execve)
1020 1040
1021KPROBE_ENTRY(page_fault) 1041KPROBE_ENTRY(page_fault)
1022 errorentry do_page_fault 1042 errorentry do_page_fault
1023END(page_fault) 1043KPROBE_END(page_fault)
1024 .previous .text
1025 1044
1026ENTRY(coprocessor_error) 1045ENTRY(coprocessor_error)
1027 zeroentry do_coprocessor_error 1046 zeroentry do_coprocessor_error
@@ -1042,8 +1061,7 @@ KPROBE_ENTRY(debug)
1042 CFI_ADJUST_CFA_OFFSET 8 1061 CFI_ADJUST_CFA_OFFSET 8
1043 paranoidentry do_debug, DEBUG_STACK 1062 paranoidentry do_debug, DEBUG_STACK
1044 paranoidexit 1063 paranoidexit
1045END(debug) 1064KPROBE_END(debug)
1046 .previous .text
1047 1065
1048 /* runs on exception stack */ 1066 /* runs on exception stack */
1049KPROBE_ENTRY(nmi) 1067KPROBE_ENTRY(nmi)
@@ -1057,8 +1075,7 @@ KPROBE_ENTRY(nmi)
1057 jmp paranoid_exit1 1075 jmp paranoid_exit1
1058 CFI_ENDPROC 1076 CFI_ENDPROC
1059#endif 1077#endif
1060END(nmi) 1078KPROBE_END(nmi)
1061 .previous .text
1062 1079
1063KPROBE_ENTRY(int3) 1080KPROBE_ENTRY(int3)
1064 INTR_FRAME 1081 INTR_FRAME
@@ -1067,8 +1084,7 @@ KPROBE_ENTRY(int3)
1067 paranoidentry do_int3, DEBUG_STACK 1084 paranoidentry do_int3, DEBUG_STACK
1068 jmp paranoid_exit1 1085 jmp paranoid_exit1
1069 CFI_ENDPROC 1086 CFI_ENDPROC
1070END(int3) 1087KPROBE_END(int3)
1071 .previous .text
1072 1088
1073ENTRY(overflow) 1089ENTRY(overflow)
1074 zeroentry do_overflow 1090 zeroentry do_overflow
@@ -1116,8 +1132,7 @@ END(stack_segment)
1116 1132
1117KPROBE_ENTRY(general_protection) 1133KPROBE_ENTRY(general_protection)
1118 errorentry do_general_protection 1134 errorentry do_general_protection
1119END(general_protection) 1135KPROBE_END(general_protection)
1120 .previous .text
1121 1136
1122ENTRY(alignment_check) 1137ENTRY(alignment_check)
1123 errorentry do_alignment_check 1138 errorentry do_alignment_check
diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c
index 3020917546de..cdb90e671b88 100644
--- a/arch/x86_64/kernel/genapic_cluster.c
+++ b/arch/x86_64/kernel/genapic_cluster.c
@@ -118,7 +118,6 @@ struct genapic apic_cluster = {
118 .name = "clustered", 118 .name = "clustered",
119 .int_delivery_mode = dest_Fixed, 119 .int_delivery_mode = dest_Fixed,
120 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 120 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
121 .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_FIXED,
122 .target_cpus = cluster_target_cpus, 121 .target_cpus = cluster_target_cpus,
123 .apic_id_registered = cluster_apic_id_registered, 122 .apic_id_registered = cluster_apic_id_registered,
124 .init_apic_ldr = cluster_init_apic_ldr, 123 .init_apic_ldr = cluster_init_apic_ldr,
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index eb86d374813a..50ad153eaac4 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -49,8 +49,7 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
49 unsigned long cfg; 49 unsigned long cfg;
50 unsigned long flags; 50 unsigned long flags;
51 51
52 local_save_flags(flags); 52 local_irq_save(flags);
53 local_irq_disable();
54 53
55 /* 54 /*
56 * Wait for idle. 55 * Wait for idle.
@@ -121,7 +120,6 @@ struct genapic apic_flat = {
121 .name = "flat", 120 .name = "flat",
122 .int_delivery_mode = dest_LowestPrio, 121 .int_delivery_mode = dest_LowestPrio,
123 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 122 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
124 .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
125 .target_cpus = flat_target_cpus, 123 .target_cpus = flat_target_cpus,
126 .apic_id_registered = flat_apic_id_registered, 124 .apic_id_registered = flat_apic_id_registered,
127 .init_apic_ldr = flat_init_apic_ldr, 125 .init_apic_ldr = flat_init_apic_ldr,
@@ -180,7 +178,6 @@ struct genapic apic_physflat = {
180 .name = "physical flat", 178 .name = "physical flat",
181 .int_delivery_mode = dest_Fixed, 179 .int_delivery_mode = dest_Fixed,
182 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 180 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
183 .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_FIXED,
184 .target_cpus = physflat_target_cpus, 181 .target_cpus = physflat_target_cpus,
185 .apic_id_registered = flat_apic_id_registered, 182 .apic_id_registered = flat_apic_id_registered,
186 .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ 183 .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index c9739ca81d06..1e6f80870679 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -5,8 +5,6 @@
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> 5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de> 6 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
7 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> 7 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
8 *
9 * $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $
10 */ 8 */
11 9
12 10
@@ -187,12 +185,15 @@ startup_64:
187 185
188 /* Finally jump to run C code and to be on real kernel address 186 /* Finally jump to run C code and to be on real kernel address
189 * Since we are running on identity-mapped space we have to jump 187 * Since we are running on identity-mapped space we have to jump
190 * to the full 64bit address , this is only possible as indirect 188 * to the full 64bit address, this is only possible as indirect
191 * jump 189 * jump. In addition we need to ensure %cs is set so we make this
190 * a far return.
192 */ 191 */
193 movq initial_code(%rip),%rax 192 movq initial_code(%rip),%rax
194 pushq $0 # fake return address 193 pushq $0 # fake return address to stop unwinder
195 jmp *%rax 194 pushq $__KERNEL_CS # set correct cs
195 pushq %rax # target address in negative space
196 lretq
196 197
197 /* SMP bootup changes these two */ 198 /* SMP bootup changes these two */
198 .align 8 199 .align 8
@@ -371,7 +372,7 @@ ENTRY(cpu_gdt_table)
371 .quad 0,0 /* TSS */ 372 .quad 0,0 /* TSS */
372 .quad 0,0 /* LDT */ 373 .quad 0,0 /* LDT */
373 .quad 0,0,0 /* three TLS descriptors */ 374 .quad 0,0,0 /* three TLS descriptors */
374 .quad 0 /* unused */ 375 .quad 0x0000f40000000000 /* node/CPU stored in limit */
375gdt_end: 376gdt_end:
376 /* asm/segment.h:GDT_ENTRIES must match this */ 377 /* asm/segment.h:GDT_ENTRIES must match this */
377 /* This should be a multiple of the cache line size */ 378 /* This should be a multiple of the cache line size */
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 36647ce6aecb..9561eb3c5b5c 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -45,38 +45,16 @@ static void __init copy_bootdata(char *real_mode_data)
45 new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); 45 new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
46 if (!new_data) { 46 if (!new_data) {
47 if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { 47 if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
48 printk("so old bootloader that it does not support commandline?!\n");
49 return; 48 return;
50 } 49 }
51 new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; 50 new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
52 printk("old bootloader convention, maybe loadlin?\n");
53 } 51 }
54 command_line = (char *) ((u64)(new_data)); 52 command_line = (char *) ((u64)(new_data));
55 memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); 53 memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
56 printk("Bootdata ok (command line is %s)\n", saved_command_line);
57}
58
59static void __init setup_boot_cpu_data(void)
60{
61 unsigned int dummy, eax;
62
63 /* get vendor info */
64 cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
65 (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
66 (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
67 (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
68
69 /* get cpu type */
70 cpuid(1, &eax, &dummy, &dummy,
71 (unsigned int *) &boot_cpu_data.x86_capability);
72 boot_cpu_data.x86 = (eax >> 8) & 0xf;
73 boot_cpu_data.x86_model = (eax >> 4) & 0xf;
74 boot_cpu_data.x86_mask = eax & 0xf;
75} 54}
76 55
77void __init x86_64_start_kernel(char * real_mode_data) 56void __init x86_64_start_kernel(char * real_mode_data)
78{ 57{
79 char *s;
80 int i; 58 int i;
81 59
82 for (i = 0; i < 256; i++) 60 for (i = 0; i < 256; i++)
@@ -84,10 +62,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
84 asm volatile("lidt %0" :: "m" (idt_descr)); 62 asm volatile("lidt %0" :: "m" (idt_descr));
85 clear_bss(); 63 clear_bss();
86 64
87 /* 65 early_printk("Kernel alive\n");
88 * This must be called really, really early:
89 */
90 lockdep_init();
91 66
92 /* 67 /*
93 * switch to init_level4_pgt from boot_level4_pgt 68 * switch to init_level4_pgt from boot_level4_pgt
@@ -103,22 +78,5 @@ void __init x86_64_start_kernel(char * real_mode_data)
103#ifdef CONFIG_SMP 78#ifdef CONFIG_SMP
104 cpu_set(0, cpu_online_map); 79 cpu_set(0, cpu_online_map);
105#endif 80#endif
106 s = strstr(saved_command_line, "earlyprintk=");
107 if (s != NULL)
108 setup_early_printk(strchr(s, '=') + 1);
109#ifdef CONFIG_NUMA
110 s = strstr(saved_command_line, "numa=");
111 if (s != NULL)
112 numa_setup(s+5);
113#endif
114#ifdef CONFIG_X86_IO_APIC
115 if (strstr(saved_command_line, "disableapic"))
116 disable_apic = 1;
117#endif
118 /* You need early console to see that */
119 if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
120 panic("Kernel too big for kernel mapping\n");
121
122 setup_boot_cpu_data();
123 start_kernel(); 81 start_kernel();
124} 82}
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index 0434b1f8e3dd..2dd51f364ea2 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -55,7 +55,6 @@
55 */ 55 */
56BUILD_16_IRQS(0x0) 56BUILD_16_IRQS(0x0)
57 57
58#ifdef CONFIG_X86_LOCAL_APIC
59/* 58/*
60 * The IO-APIC gives us many more interrupt sources. Most of these 59 * The IO-APIC gives us many more interrupt sources. Most of these
61 * are unused but an SMP system is supposed to have enough memory ... 60 * are unused but an SMP system is supposed to have enough memory ...
@@ -75,8 +74,6 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
75 BUILD_15_IRQS(0xe) 74 BUILD_15_IRQS(0xe)
76#endif 75#endif
77 76
78#endif
79
80#undef BUILD_16_IRQS 77#undef BUILD_16_IRQS
81#undef BUILD_15_IRQS 78#undef BUILD_15_IRQS
82#undef BI 79#undef BI
@@ -100,7 +97,6 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
100void (*interrupt[NR_IRQS])(void) = { 97void (*interrupt[NR_IRQS])(void) = {
101 IRQLIST_16(0x0), 98 IRQLIST_16(0x0),
102 99
103#ifdef CONFIG_X86_IO_APIC
104 IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3), 100 IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
105 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), 101 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
106 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), 102 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
@@ -110,7 +106,6 @@ void (*interrupt[NR_IRQS])(void) = {
110 , IRQLIST_15(0xe) 106 , IRQLIST_15(0xe)
111#endif 107#endif
112 108
113#endif
114}; 109};
115 110
116#undef IRQ 111#undef IRQ
@@ -128,6 +123,8 @@ void (*interrupt[NR_IRQS])(void) = {
128 123
129DEFINE_SPINLOCK(i8259A_lock); 124DEFINE_SPINLOCK(i8259A_lock);
130 125
126static int i8259A_auto_eoi;
127
131static void end_8259A_irq (unsigned int irq) 128static void end_8259A_irq (unsigned int irq)
132{ 129{
133 if (irq > 256) { 130 if (irq > 256) {
@@ -341,6 +338,8 @@ void init_8259A(int auto_eoi)
341{ 338{
342 unsigned long flags; 339 unsigned long flags;
343 340
341 i8259A_auto_eoi = auto_eoi;
342
344 spin_lock_irqsave(&i8259A_lock, flags); 343 spin_lock_irqsave(&i8259A_lock, flags);
345 344
346 outb(0xff, 0x21); /* mask all of 8259A-1 */ 345 outb(0xff, 0x21); /* mask all of 8259A-1 */
@@ -399,7 +398,7 @@ static void save_ELCR(char *trigger)
399 398
400static int i8259A_resume(struct sys_device *dev) 399static int i8259A_resume(struct sys_device *dev)
401{ 400{
402 init_8259A(0); 401 init_8259A(i8259A_auto_eoi);
403 restore_ELCR(irq_trigger); 402 restore_ELCR(irq_trigger);
404 return 0; 403 return 0;
405} 404}
@@ -453,9 +452,7 @@ void __init init_ISA_irqs (void)
453{ 452{
454 int i; 453 int i;
455 454
456#ifdef CONFIG_X86_LOCAL_APIC
457 init_bsp_APIC(); 455 init_bsp_APIC();
458#endif
459 init_8259A(0); 456 init_8259A(0);
460 457
461 for (i = 0; i < NR_IRQS; i++) { 458 for (i = 0; i < NR_IRQS; i++) {
@@ -581,14 +578,12 @@ void __init init_IRQ(void)
581 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 578 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
582 set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); 579 set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
583 580
584#ifdef CONFIG_X86_LOCAL_APIC
585 /* self generated IPI for local APIC timer */ 581 /* self generated IPI for local APIC timer */
586 set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); 582 set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
587 583
588 /* IPI vectors for APIC spurious and error interrupts */ 584 /* IPI vectors for APIC spurious and error interrupts */
589 set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 585 set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
590 set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 586 set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
591#endif
592 587
593 /* 588 /*
594 * Set the clock to HZ Hz, we already have a valid 589 * Set the clock to HZ Hz, we already have a valid
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 924a4a332954..0491019d4c8d 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -48,7 +48,7 @@ int sis_apic_bug; /* not actually supported, dummy for compile */
48 48
49static int no_timer_check; 49static int no_timer_check;
50 50
51int disable_timer_pin_1 __initdata; 51static int disable_timer_pin_1 __initdata;
52 52
53int timer_over_8254 __initdata = 0; 53int timer_over_8254 __initdata = 0;
54 54
@@ -111,6 +111,33 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
111 FINAL; \ 111 FINAL; \
112} 112}
113 113
114union entry_union {
115 struct { u32 w1, w2; };
116 struct IO_APIC_route_entry entry;
117};
118
119static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
120{
121 union entry_union eu;
122 unsigned long flags;
123 spin_lock_irqsave(&ioapic_lock, flags);
124 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
125 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
126 spin_unlock_irqrestore(&ioapic_lock, flags);
127 return eu.entry;
128}
129
130static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
131{
132 unsigned long flags;
133 union entry_union eu;
134 eu.entry = e;
135 spin_lock_irqsave(&ioapic_lock, flags);
136 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
137 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
138 spin_unlock_irqrestore(&ioapic_lock, flags);
139}
140
114#ifdef CONFIG_SMP 141#ifdef CONFIG_SMP
115static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 142static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
116{ 143{
@@ -196,13 +223,9 @@ static void unmask_IO_APIC_irq (unsigned int irq)
196static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 223static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
197{ 224{
198 struct IO_APIC_route_entry entry; 225 struct IO_APIC_route_entry entry;
199 unsigned long flags;
200 226
201 /* Check delivery_mode to be sure we're not clearing an SMI pin */ 227 /* Check delivery_mode to be sure we're not clearing an SMI pin */
202 spin_lock_irqsave(&ioapic_lock, flags); 228 entry = ioapic_read_entry(apic, pin);
203 *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
204 *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
205 spin_unlock_irqrestore(&ioapic_lock, flags);
206 if (entry.delivery_mode == dest_SMI) 229 if (entry.delivery_mode == dest_SMI)
207 return; 230 return;
208 /* 231 /*
@@ -210,10 +233,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
210 */ 233 */
211 memset(&entry, 0, sizeof(entry)); 234 memset(&entry, 0, sizeof(entry));
212 entry.mask = 1; 235 entry.mask = 1;
213 spin_lock_irqsave(&ioapic_lock, flags); 236 ioapic_write_entry(apic, pin, entry);
214 io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
215 io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
216 spin_unlock_irqrestore(&ioapic_lock, flags);
217} 237}
218 238
219static void clear_IO_APIC (void) 239static void clear_IO_APIC (void)
@@ -225,14 +245,6 @@ static void clear_IO_APIC (void)
225 clear_IO_APIC_pin(apic, pin); 245 clear_IO_APIC_pin(apic, pin);
226} 246}
227 247
228/*
229 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
230 * specific CPU-side IRQs.
231 */
232
233#define MAX_PIRQS 8
234static int pirq_entries [MAX_PIRQS];
235static int pirqs_enabled;
236int skip_ioapic_setup; 248int skip_ioapic_setup;
237int ioapic_force; 249int ioapic_force;
238 250
@@ -241,18 +253,17 @@ int ioapic_force;
241static int __init disable_ioapic_setup(char *str) 253static int __init disable_ioapic_setup(char *str)
242{ 254{
243 skip_ioapic_setup = 1; 255 skip_ioapic_setup = 1;
244 return 1; 256 return 0;
245} 257}
258early_param("noapic", disable_ioapic_setup);
246 259
247static int __init enable_ioapic_setup(char *str) 260/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
261static int __init disable_timer_pin_setup(char *arg)
248{ 262{
249 ioapic_force = 1; 263 disable_timer_pin_1 = 1;
250 skip_ioapic_setup = 0;
251 return 1; 264 return 1;
252} 265}
253 266__setup("disable_timer_pin_1", disable_timer_pin_setup);
254__setup("noapic", disable_ioapic_setup);
255__setup("apic", enable_ioapic_setup);
256 267
257static int __init setup_disable_8254_timer(char *s) 268static int __init setup_disable_8254_timer(char *s)
258{ 269{
@@ -268,135 +279,6 @@ static int __init setup_enable_8254_timer(char *s)
268__setup("disable_8254_timer", setup_disable_8254_timer); 279__setup("disable_8254_timer", setup_disable_8254_timer);
269__setup("enable_8254_timer", setup_enable_8254_timer); 280__setup("enable_8254_timer", setup_enable_8254_timer);
270 281
271#include <asm/pci-direct.h>
272#include <linux/pci_ids.h>
273#include <linux/pci.h>
274
275
276#ifdef CONFIG_ACPI
277
278static int nvidia_hpet_detected __initdata;
279
280static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
281{
282 nvidia_hpet_detected = 1;
283 return 0;
284}
285#endif
286
287/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
288 off. Check for an Nvidia or VIA PCI bridge and turn it off.
289 Use pci direct infrastructure because this runs before the PCI subsystem.
290
291 Can be overwritten with "apic"
292
293 And another hack to disable the IOMMU on VIA chipsets.
294
295 ... and others. Really should move this somewhere else.
296
297 Kludge-O-Rama. */
298void __init check_ioapic(void)
299{
300 int num,slot,func;
301 /* Poor man's PCI discovery */
302 for (num = 0; num < 32; num++) {
303 for (slot = 0; slot < 32; slot++) {
304 for (func = 0; func < 8; func++) {
305 u32 class;
306 u32 vendor;
307 u8 type;
308 class = read_pci_config(num,slot,func,
309 PCI_CLASS_REVISION);
310 if (class == 0xffffffff)
311 break;
312
313 if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
314 continue;
315
316 vendor = read_pci_config(num, slot, func,
317 PCI_VENDOR_ID);
318 vendor &= 0xffff;
319 switch (vendor) {
320 case PCI_VENDOR_ID_VIA:
321#ifdef CONFIG_IOMMU
322 if ((end_pfn > MAX_DMA32_PFN ||
323 force_iommu) &&
324 !iommu_aperture_allowed) {
325 printk(KERN_INFO
326 "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
327 iommu_aperture_disabled = 1;
328 }
329#endif
330 return;
331 case PCI_VENDOR_ID_NVIDIA:
332#ifdef CONFIG_ACPI
333 /*
334 * All timer overrides on Nvidia are
335 * wrong unless HPET is enabled.
336 */
337 nvidia_hpet_detected = 0;
338 acpi_table_parse(ACPI_HPET,
339 nvidia_hpet_check);
340 if (nvidia_hpet_detected == 0) {
341 acpi_skip_timer_override = 1;
342 printk(KERN_INFO "Nvidia board "
343 "detected. Ignoring ACPI "
344 "timer override.\n");
345 }
346#endif
347 /* RED-PEN skip them on mptables too? */
348 return;
349
350 /* This should be actually default, but
351 for 2.6.16 let's do it for ATI only where
352 it's really needed. */
353 case PCI_VENDOR_ID_ATI:
354 if (timer_over_8254 == 1) {
355 timer_over_8254 = 0;
356 printk(KERN_INFO
357 "ATI board detected. Disabling timer routing over 8254.\n");
358 }
359 return;
360 }
361
362
363 /* No multi-function device? */
364 type = read_pci_config_byte(num,slot,func,
365 PCI_HEADER_TYPE);
366 if (!(type & 0x80))
367 break;
368 }
369 }
370 }
371}
372
373static int __init ioapic_pirq_setup(char *str)
374{
375 int i, max;
376 int ints[MAX_PIRQS+1];
377
378 get_options(str, ARRAY_SIZE(ints), ints);
379
380 for (i = 0; i < MAX_PIRQS; i++)
381 pirq_entries[i] = -1;
382
383 pirqs_enabled = 1;
384 apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
385 max = MAX_PIRQS;
386 if (ints[0] < MAX_PIRQS)
387 max = ints[0];
388
389 for (i = 0; i < max; i++) {
390 apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
391 /*
392 * PIRQs are mapped upside down, usually.
393 */
394 pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
395 }
396 return 1;
397}
398
399__setup("pirq=", ioapic_pirq_setup);
400 282
401/* 283/*
402 * Find the IRQ entry number of a certain pin. 284 * Find the IRQ entry number of a certain pin.
@@ -425,9 +307,7 @@ static int __init find_isa_irq_pin(int irq, int type)
425 for (i = 0; i < mp_irq_entries; i++) { 307 for (i = 0; i < mp_irq_entries; i++) {
426 int lbus = mp_irqs[i].mpc_srcbus; 308 int lbus = mp_irqs[i].mpc_srcbus;
427 309
428 if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || 310 if (test_bit(lbus, mp_bus_not_pci) &&
429 mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
430 mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
431 (mp_irqs[i].mpc_irqtype == type) && 311 (mp_irqs[i].mpc_irqtype == type) &&
432 (mp_irqs[i].mpc_srcbusirq == irq)) 312 (mp_irqs[i].mpc_srcbusirq == irq))
433 313
@@ -443,9 +323,7 @@ static int __init find_isa_irq_apic(int irq, int type)
443 for (i = 0; i < mp_irq_entries; i++) { 323 for (i = 0; i < mp_irq_entries; i++) {
444 int lbus = mp_irqs[i].mpc_srcbus; 324 int lbus = mp_irqs[i].mpc_srcbus;
445 325
446 if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || 326 if (test_bit(lbus, mp_bus_not_pci) &&
447 mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
448 mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
449 (mp_irqs[i].mpc_irqtype == type) && 327 (mp_irqs[i].mpc_irqtype == type) &&
450 (mp_irqs[i].mpc_srcbusirq == irq)) 328 (mp_irqs[i].mpc_srcbusirq == irq))
451 break; 329 break;
@@ -485,7 +363,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
485 mp_irqs[i].mpc_dstapic == MP_APIC_ALL) 363 mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
486 break; 364 break;
487 365
488 if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && 366 if (!test_bit(lbus, mp_bus_not_pci) &&
489 !mp_irqs[i].mpc_irqtype && 367 !mp_irqs[i].mpc_irqtype &&
490 (bus == lbus) && 368 (bus == lbus) &&
491 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { 369 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
@@ -508,27 +386,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
508 return best_guess; 386 return best_guess;
509} 387}
510 388
511/*
512 * EISA Edge/Level control register, ELCR
513 */
514static int EISA_ELCR(unsigned int irq)
515{
516 if (irq < 16) {
517 unsigned int port = 0x4d0 + (irq >> 3);
518 return (inb(port) >> (irq & 7)) & 1;
519 }
520 apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
521 return 0;
522}
523
524/* EISA interrupts are always polarity zero and can be edge or level
525 * trigger depending on the ELCR value. If an interrupt is listed as
526 * EISA conforming in the MP table, that means its trigger type must
527 * be read in from the ELCR */
528
529#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
530#define default_EISA_polarity(idx) (0)
531
532/* ISA interrupts are always polarity zero edge triggered, 389/* ISA interrupts are always polarity zero edge triggered,
533 * when listed as conforming in the MP table. */ 390 * when listed as conforming in the MP table. */
534 391
@@ -541,12 +398,6 @@ static int EISA_ELCR(unsigned int irq)
541#define default_PCI_trigger(idx) (1) 398#define default_PCI_trigger(idx) (1)
542#define default_PCI_polarity(idx) (1) 399#define default_PCI_polarity(idx) (1)
543 400
544/* MCA interrupts are always polarity zero level triggered,
545 * when listed as conforming in the MP table. */
546
547#define default_MCA_trigger(idx) (1)
548#define default_MCA_polarity(idx) (0)
549
550static int __init MPBIOS_polarity(int idx) 401static int __init MPBIOS_polarity(int idx)
551{ 402{
552 int bus = mp_irqs[idx].mpc_srcbus; 403 int bus = mp_irqs[idx].mpc_srcbus;
@@ -558,38 +409,11 @@ static int __init MPBIOS_polarity(int idx)
558 switch (mp_irqs[idx].mpc_irqflag & 3) 409 switch (mp_irqs[idx].mpc_irqflag & 3)
559 { 410 {
560 case 0: /* conforms, ie. bus-type dependent polarity */ 411 case 0: /* conforms, ie. bus-type dependent polarity */
561 { 412 if (test_bit(bus, mp_bus_not_pci))
562 switch (mp_bus_id_to_type[bus]) 413 polarity = default_ISA_polarity(idx);
563 { 414 else
564 case MP_BUS_ISA: /* ISA pin */ 415 polarity = default_PCI_polarity(idx);
565 {
566 polarity = default_ISA_polarity(idx);
567 break;
568 }
569 case MP_BUS_EISA: /* EISA pin */
570 {
571 polarity = default_EISA_polarity(idx);
572 break;
573 }
574 case MP_BUS_PCI: /* PCI pin */
575 {
576 polarity = default_PCI_polarity(idx);
577 break;
578 }
579 case MP_BUS_MCA: /* MCA pin */
580 {
581 polarity = default_MCA_polarity(idx);
582 break;
583 }
584 default:
585 {
586 printk(KERN_WARNING "broken BIOS!!\n");
587 polarity = 1;
588 break;
589 }
590 }
591 break; 416 break;
592 }
593 case 1: /* high active */ 417 case 1: /* high active */
594 { 418 {
595 polarity = 0; 419 polarity = 0;
@@ -627,38 +451,11 @@ static int MPBIOS_trigger(int idx)
627 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) 451 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
628 { 452 {
629 case 0: /* conforms, ie. bus-type dependent */ 453 case 0: /* conforms, ie. bus-type dependent */
630 { 454 if (test_bit(bus, mp_bus_not_pci))
631 switch (mp_bus_id_to_type[bus]) 455 trigger = default_ISA_trigger(idx);
632 { 456 else
633 case MP_BUS_ISA: /* ISA pin */ 457 trigger = default_PCI_trigger(idx);
634 {
635 trigger = default_ISA_trigger(idx);
636 break;
637 }
638 case MP_BUS_EISA: /* EISA pin */
639 {
640 trigger = default_EISA_trigger(idx);
641 break;
642 }
643 case MP_BUS_PCI: /* PCI pin */
644 {
645 trigger = default_PCI_trigger(idx);
646 break;
647 }
648 case MP_BUS_MCA: /* MCA pin */
649 {
650 trigger = default_MCA_trigger(idx);
651 break;
652 }
653 default:
654 {
655 printk(KERN_WARNING "broken BIOS!!\n");
656 trigger = 1;
657 break;
658 }
659 }
660 break; 458 break;
661 }
662 case 1: /* edge */ 459 case 1: /* edge */
663 { 460 {
664 trigger = 0; 461 trigger = 0;
@@ -764,49 +561,17 @@ static int pin_2_irq(int idx, int apic, int pin)
764 if (mp_irqs[idx].mpc_dstirq != pin) 561 if (mp_irqs[idx].mpc_dstirq != pin)
765 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); 562 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
766 563
767 switch (mp_bus_id_to_type[bus]) 564 if (test_bit(bus, mp_bus_not_pci)) {
768 { 565 irq = mp_irqs[idx].mpc_srcbusirq;
769 case MP_BUS_ISA: /* ISA pin */ 566 } else {
770 case MP_BUS_EISA: 567 /*
771 case MP_BUS_MCA: 568 * PCI IRQs are mapped in order
772 { 569 */
773 irq = mp_irqs[idx].mpc_srcbusirq; 570 i = irq = 0;
774 break; 571 while (i < apic)
775 } 572 irq += nr_ioapic_registers[i++];
776 case MP_BUS_PCI: /* PCI pin */ 573 irq += pin;
777 { 574 irq = gsi_irq_sharing(irq);
778 /*
779 * PCI IRQs are mapped in order
780 */
781 i = irq = 0;
782 while (i < apic)
783 irq += nr_ioapic_registers[i++];
784 irq += pin;
785 irq = gsi_irq_sharing(irq);
786 break;
787 }
788 default:
789 {
790 printk(KERN_ERR "unknown bus type %d.\n",bus);
791 irq = 0;
792 break;
793 }
794 }
795 BUG_ON(irq >= NR_IRQS);
796
797 /*
798 * PCI IRQ command line redirection. Yes, limits are hardcoded.
799 */
800 if ((pin >= 16) && (pin <= 23)) {
801 if (pirq_entries[pin-16] != -1) {
802 if (!pirq_entries[pin-16]) {
803 apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
804 } else {
805 irq = pirq_entries[pin-16];
806 apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
807 pin-16, irq);
808 }
809 }
810 } 575 }
811 BUG_ON(irq >= NR_IRQS); 576 BUG_ON(irq >= NR_IRQS);
812 return irq; 577 return irq;
@@ -943,9 +708,9 @@ static void __init setup_IO_APIC_irqs(void)
943 if (!apic && (irq < 16)) 708 if (!apic && (irq < 16))
944 disable_8259A_irq(irq); 709 disable_8259A_irq(irq);
945 } 710 }
711 ioapic_write_entry(apic, pin, entry);
712
946 spin_lock_irqsave(&ioapic_lock, flags); 713 spin_lock_irqsave(&ioapic_lock, flags);
947 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
948 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
949 set_native_irq_info(irq, TARGET_CPUS); 714 set_native_irq_info(irq, TARGET_CPUS);
950 spin_unlock_irqrestore(&ioapic_lock, flags); 715 spin_unlock_irqrestore(&ioapic_lock, flags);
951 } 716 }
@@ -1083,10 +848,7 @@ void __apicdebuginit print_IO_APIC(void)
1083 for (i = 0; i <= reg_01.bits.entries; i++) { 848 for (i = 0; i <= reg_01.bits.entries; i++) {
1084 struct IO_APIC_route_entry entry; 849 struct IO_APIC_route_entry entry;
1085 850
1086 spin_lock_irqsave(&ioapic_lock, flags); 851 entry = ioapic_read_entry(apic, i);
1087 *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
1088 *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
1089 spin_unlock_irqrestore(&ioapic_lock, flags);
1090 852
1091 printk(KERN_DEBUG " %02x %03X %02X ", 853 printk(KERN_DEBUG " %02x %03X %02X ",
1092 i, 854 i,
@@ -1281,9 +1043,6 @@ static void __init enable_IO_APIC(void)
1281 irq_2_pin[i].pin = -1; 1043 irq_2_pin[i].pin = -1;
1282 irq_2_pin[i].next = 0; 1044 irq_2_pin[i].next = 0;
1283 } 1045 }
1284 if (!pirqs_enabled)
1285 for (i = 0; i < MAX_PIRQS; i++)
1286 pirq_entries[i] = -1;
1287 1046
1288 /* 1047 /*
1289 * The number of IO-APIC IRQ registers (== #pins): 1048 * The number of IO-APIC IRQ registers (== #pins):
@@ -1299,11 +1058,7 @@ static void __init enable_IO_APIC(void)
1299 /* See if any of the pins is in ExtINT mode */ 1058 /* See if any of the pins is in ExtINT mode */
1300 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1059 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1301 struct IO_APIC_route_entry entry; 1060 struct IO_APIC_route_entry entry;
1302 spin_lock_irqsave(&ioapic_lock, flags); 1061 entry = ioapic_read_entry(apic, pin);
1303 *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1304 *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1305 spin_unlock_irqrestore(&ioapic_lock, flags);
1306
1307 1062
1308 /* If the interrupt line is enabled and in ExtInt mode 1063 /* If the interrupt line is enabled and in ExtInt mode
1309 * I have found the pin where the i8259 is connected. 1064 * I have found the pin where the i8259 is connected.
@@ -1355,7 +1110,6 @@ void disable_IO_APIC(void)
1355 */ 1110 */
1356 if (ioapic_i8259.pin != -1) { 1111 if (ioapic_i8259.pin != -1) {
1357 struct IO_APIC_route_entry entry; 1112 struct IO_APIC_route_entry entry;
1358 unsigned long flags;
1359 1113
1360 memset(&entry, 0, sizeof(entry)); 1114 memset(&entry, 0, sizeof(entry));
1361 entry.mask = 0; /* Enabled */ 1115 entry.mask = 0; /* Enabled */
@@ -1372,84 +1126,13 @@ void disable_IO_APIC(void)
1372 /* 1126 /*
1373 * Add it to the IO-APIC irq-routing table: 1127 * Add it to the IO-APIC irq-routing table:
1374 */ 1128 */
1375 spin_lock_irqsave(&ioapic_lock, flags); 1129 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1376 io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
1377 *(((int *)&entry)+1));
1378 io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
1379 *(((int *)&entry)+0));
1380 spin_unlock_irqrestore(&ioapic_lock, flags);
1381 } 1130 }
1382 1131
1383 disconnect_bsp_APIC(ioapic_i8259.pin != -1); 1132 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1384} 1133}
1385 1134
1386/* 1135/*
1387 * function to set the IO-APIC physical IDs based on the
1388 * values stored in the MPC table.
1389 *
1390 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1391 */
1392
1393static void __init setup_ioapic_ids_from_mpc (void)
1394{
1395 union IO_APIC_reg_00 reg_00;
1396 int apic;
1397 int i;
1398 unsigned char old_id;
1399 unsigned long flags;
1400
1401 /*
1402 * Set the IOAPIC ID to the value stored in the MPC table.
1403 */
1404 for (apic = 0; apic < nr_ioapics; apic++) {
1405
1406 /* Read the register 0 value */
1407 spin_lock_irqsave(&ioapic_lock, flags);
1408 reg_00.raw = io_apic_read(apic, 0);
1409 spin_unlock_irqrestore(&ioapic_lock, flags);
1410
1411 old_id = mp_ioapics[apic].mpc_apicid;
1412
1413
1414 printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
1415
1416
1417 /*
1418 * We need to adjust the IRQ routing table
1419 * if the ID changed.
1420 */
1421 if (old_id != mp_ioapics[apic].mpc_apicid)
1422 for (i = 0; i < mp_irq_entries; i++)
1423 if (mp_irqs[i].mpc_dstapic == old_id)
1424 mp_irqs[i].mpc_dstapic
1425 = mp_ioapics[apic].mpc_apicid;
1426
1427 /*
1428 * Read the right value from the MPC table and
1429 * write it into the ID register.
1430 */
1431 apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
1432 mp_ioapics[apic].mpc_apicid);
1433
1434 reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
1435 spin_lock_irqsave(&ioapic_lock, flags);
1436 io_apic_write(apic, 0, reg_00.raw);
1437 spin_unlock_irqrestore(&ioapic_lock, flags);
1438
1439 /*
1440 * Sanity check
1441 */
1442 spin_lock_irqsave(&ioapic_lock, flags);
1443 reg_00.raw = io_apic_read(apic, 0);
1444 spin_unlock_irqrestore(&ioapic_lock, flags);
1445 if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
1446 printk("could not set ID!\n");
1447 else
1448 apic_printk(APIC_VERBOSE," ok.\n");
1449 }
1450}
1451
1452/*
1453 * There is a nasty bug in some older SMP boards, their mptable lies 1136 * There is a nasty bug in some older SMP boards, their mptable lies
1454 * about the timer IRQ. We do the following to work around the situation: 1137 * about the timer IRQ. We do the following to work around the situation:
1455 * 1138 *
@@ -1964,11 +1647,6 @@ void __init setup_IO_APIC(void)
1964 1647
1965 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); 1648 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
1966 1649
1967 /*
1968 * Set up the IO-APIC IRQ routing table.
1969 */
1970 if (!acpi_ioapic)
1971 setup_ioapic_ids_from_mpc();
1972 sync_Arb_IDs(); 1650 sync_Arb_IDs();
1973 setup_IO_APIC_irqs(); 1651 setup_IO_APIC_irqs();
1974 init_IO_APIC_traps(); 1652 init_IO_APIC_traps();
@@ -1987,17 +1665,12 @@ static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
1987{ 1665{
1988 struct IO_APIC_route_entry *entry; 1666 struct IO_APIC_route_entry *entry;
1989 struct sysfs_ioapic_data *data; 1667 struct sysfs_ioapic_data *data;
1990 unsigned long flags;
1991 int i; 1668 int i;
1992 1669
1993 data = container_of(dev, struct sysfs_ioapic_data, dev); 1670 data = container_of(dev, struct sysfs_ioapic_data, dev);
1994 entry = data->entry; 1671 entry = data->entry;
1995 spin_lock_irqsave(&ioapic_lock, flags); 1672 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
1996 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { 1673 *entry = ioapic_read_entry(dev->id, i);
1997 *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
1998 *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
1999 }
2000 spin_unlock_irqrestore(&ioapic_lock, flags);
2001 1674
2002 return 0; 1675 return 0;
2003} 1676}
@@ -2019,11 +1692,9 @@ static int ioapic_resume(struct sys_device *dev)
2019 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; 1692 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
2020 io_apic_write(dev->id, 0, reg_00.raw); 1693 io_apic_write(dev->id, 0, reg_00.raw);
2021 } 1694 }
2022 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
2023 io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
2024 io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
2025 }
2026 spin_unlock_irqrestore(&ioapic_lock, flags); 1695 spin_unlock_irqrestore(&ioapic_lock, flags);
1696 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
1697 ioapic_write_entry(dev->id, i, entry[i]);
2027 1698
2028 return 0; 1699 return 0;
2029} 1700}
@@ -2077,19 +1748,6 @@ device_initcall(ioapic_init_sysfs);
2077 1748
2078#define IO_APIC_MAX_ID 0xFE 1749#define IO_APIC_MAX_ID 0xFE
2079 1750
2080int __init io_apic_get_version (int ioapic)
2081{
2082 union IO_APIC_reg_01 reg_01;
2083 unsigned long flags;
2084
2085 spin_lock_irqsave(&ioapic_lock, flags);
2086 reg_01.raw = io_apic_read(ioapic, 1);
2087 spin_unlock_irqrestore(&ioapic_lock, flags);
2088
2089 return reg_01.bits.version;
2090}
2091
2092
2093int __init io_apic_get_redir_entries (int ioapic) 1751int __init io_apic_get_redir_entries (int ioapic)
2094{ 1752{
2095 union IO_APIC_reg_01 reg_01; 1753 union IO_APIC_reg_01 reg_01;
@@ -2148,10 +1806,10 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
2148 if (!ioapic && (irq < 16)) 1806 if (!ioapic && (irq < 16))
2149 disable_8259A_irq(irq); 1807 disable_8259A_irq(irq);
2150 1808
1809 ioapic_write_entry(ioapic, pin, entry);
1810
2151 spin_lock_irqsave(&ioapic_lock, flags); 1811 spin_lock_irqsave(&ioapic_lock, flags);
2152 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); 1812 set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
2153 io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
2154 set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
2155 spin_unlock_irqrestore(&ioapic_lock, flags); 1813 spin_unlock_irqrestore(&ioapic_lock, flags);
2156 1814
2157 return 0; 1815 return 0;
diff --git a/arch/x86_64/kernel/ioport.c b/arch/x86_64/kernel/ioport.c
index b81614970ecc..fe063d3cfe42 100644
--- a/arch/x86_64/kernel/ioport.c
+++ b/arch/x86_64/kernel/ioport.c
@@ -56,6 +56,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
56 56
57 memset(bitmap, 0xff, IO_BITMAP_BYTES); 57 memset(bitmap, 0xff, IO_BITMAP_BYTES);
58 t->io_bitmap_ptr = bitmap; 58 t->io_bitmap_ptr = bitmap;
59 set_thread_flag(TIF_IO_BITMAP);
59 } 60 }
60 61
61 /* 62 /*
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index 5221a53e90c1..b3677e6ccc6e 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -20,11 +20,6 @@
20#include <asm/idle.h> 20#include <asm/idle.h>
21 21
22atomic_t irq_err_count; 22atomic_t irq_err_count;
23#ifdef CONFIG_X86_IO_APIC
24#ifdef APIC_MISMATCH_DEBUG
25atomic_t irq_mis_count;
26#endif
27#endif
28 23
29#ifdef CONFIG_DEBUG_STACKOVERFLOW 24#ifdef CONFIG_DEBUG_STACKOVERFLOW
30/* 25/*
@@ -92,18 +87,11 @@ skip:
92 for_each_online_cpu(j) 87 for_each_online_cpu(j)
93 seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); 88 seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
94 seq_putc(p, '\n'); 89 seq_putc(p, '\n');
95#ifdef CONFIG_X86_LOCAL_APIC
96 seq_printf(p, "LOC: "); 90 seq_printf(p, "LOC: ");
97 for_each_online_cpu(j) 91 for_each_online_cpu(j)
98 seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); 92 seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
99 seq_putc(p, '\n'); 93 seq_putc(p, '\n');
100#endif
101 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); 94 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
102#ifdef CONFIG_X86_IO_APIC
103#ifdef APIC_MISMATCH_DEBUG
104 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
105#endif
106#endif
107 } 95 }
108 return 0; 96 return 0;
109} 97}
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c
index 106076b370fc..0497e3bd5bff 100644
--- a/arch/x86_64/kernel/machine_kexec.c
+++ b/arch/x86_64/kernel/machine_kexec.c
@@ -15,6 +15,15 @@
15#include <asm/mmu_context.h> 15#include <asm/mmu_context.h>
16#include <asm/io.h> 16#include <asm/io.h>
17 17
18#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
19static u64 kexec_pgd[512] PAGE_ALIGNED;
20static u64 kexec_pud0[512] PAGE_ALIGNED;
21static u64 kexec_pmd0[512] PAGE_ALIGNED;
22static u64 kexec_pte0[512] PAGE_ALIGNED;
23static u64 kexec_pud1[512] PAGE_ALIGNED;
24static u64 kexec_pmd1[512] PAGE_ALIGNED;
25static u64 kexec_pte1[512] PAGE_ALIGNED;
26
18static void init_level2_page(pmd_t *level2p, unsigned long addr) 27static void init_level2_page(pmd_t *level2p, unsigned long addr)
19{ 28{
20 unsigned long end_addr; 29 unsigned long end_addr;
@@ -144,32 +153,19 @@ static void load_segments(void)
144 ); 153 );
145} 154}
146 155
147typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page,
148 unsigned long control_code_buffer,
149 unsigned long start_address,
150 unsigned long pgtable) ATTRIB_NORET;
151
152extern const unsigned char relocate_new_kernel[];
153extern const unsigned long relocate_new_kernel_size;
154
155int machine_kexec_prepare(struct kimage *image) 156int machine_kexec_prepare(struct kimage *image)
156{ 157{
157 unsigned long start_pgtable, control_code_buffer; 158 unsigned long start_pgtable;
158 int result; 159 int result;
159 160
160 /* Calculate the offsets */ 161 /* Calculate the offsets */
161 start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; 162 start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
162 control_code_buffer = start_pgtable + PAGE_SIZE;
163 163
164 /* Setup the identity mapped 64bit page table */ 164 /* Setup the identity mapped 64bit page table */
165 result = init_pgtable(image, start_pgtable); 165 result = init_pgtable(image, start_pgtable);
166 if (result) 166 if (result)
167 return result; 167 return result;
168 168
169 /* Place the code in the reboot code buffer */
170 memcpy(__va(control_code_buffer), relocate_new_kernel,
171 relocate_new_kernel_size);
172
173 return 0; 169 return 0;
174} 170}
175 171
@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage *image)
184 */ 180 */
185NORET_TYPE void machine_kexec(struct kimage *image) 181NORET_TYPE void machine_kexec(struct kimage *image)
186{ 182{
187 unsigned long page_list; 183 unsigned long page_list[PAGES_NR];
188 unsigned long control_code_buffer; 184 void *control_page;
189 unsigned long start_pgtable;
190 relocate_new_kernel_t rnk;
191 185
192 /* Interrupts aren't acceptable while we reboot */ 186 /* Interrupts aren't acceptable while we reboot */
193 local_irq_disable(); 187 local_irq_disable();
194 188
195 /* Calculate the offsets */ 189 control_page = page_address(image->control_code_page) + PAGE_SIZE;
196 page_list = image->head; 190 memcpy(control_page, relocate_kernel, PAGE_SIZE);
197 start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; 191
198 control_code_buffer = start_pgtable + PAGE_SIZE; 192 page_list[PA_CONTROL_PAGE] = __pa(control_page);
199 193 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
200 /* Set the low half of the page table to my identity mapped 194 page_list[PA_PGD] = __pa(kexec_pgd);
201 * page table for kexec. Leave the high half pointing at the 195 page_list[VA_PGD] = (unsigned long)kexec_pgd;
202 * kernel pages. Don't bother to flush the global pages 196 page_list[PA_PUD_0] = __pa(kexec_pud0);
203 * as that will happen when I fully switch to my identity mapped 197 page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
204 * page table anyway. 198 page_list[PA_PMD_0] = __pa(kexec_pmd0);
205 */ 199 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
206 memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2); 200 page_list[PA_PTE_0] = __pa(kexec_pte0);
207 __flush_tlb(); 201 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
208 202 page_list[PA_PUD_1] = __pa(kexec_pud1);
203 page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
204 page_list[PA_PMD_1] = __pa(kexec_pmd1);
205 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
206 page_list[PA_PTE_1] = __pa(kexec_pte1);
207 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
208
209 page_list[PA_TABLE_PAGE] =
210 (unsigned long)__pa(page_address(image->control_code_page));
209 211
210 /* The segment registers are funny things, they have both a 212 /* The segment registers are funny things, they have both a
211 * visible and an invisible part. Whenever the visible part is 213 * visible and an invisible part. Whenever the visible part is
@@ -222,7 +224,36 @@ NORET_TYPE void machine_kexec(struct kimage *image)
222 */ 224 */
223 set_gdt(phys_to_virt(0),0); 225 set_gdt(phys_to_virt(0),0);
224 set_idt(phys_to_virt(0),0); 226 set_idt(phys_to_virt(0),0);
227
225 /* now call it */ 228 /* now call it */
226 rnk = (relocate_new_kernel_t) control_code_buffer; 229 relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
227 (*rnk)(page_list, control_code_buffer, image->start, start_pgtable); 230 image->start);
228} 231}
232
233/* crashkernel=size@addr specifies the location to reserve for
234 * a crash kernel. By reserving this memory we guarantee
235 * that linux never set's it up as a DMA target.
236 * Useful for holding code to do something appropriate
237 * after a kernel panic.
238 */
239static int __init setup_crashkernel(char *arg)
240{
241 unsigned long size, base;
242 char *p;
243 if (!arg)
244 return -EINVAL;
245 size = memparse(arg, &p);
246 if (arg == p)
247 return -EINVAL;
248 if (*p == '@') {
249 base = memparse(p+1, &p);
250 /* FIXME: Do I want a sanity check to validate the
251 * memory range? Yes you do, but it's too early for
252 * e820 -AK */
253 crashk_res.start = base;
254 crashk_res.end = base + size - 1;
255 }
256 return 0;
257}
258early_param("crashkernel", setup_crashkernel);
259
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 4e017fb30fb3..bbea88801d88 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -182,7 +182,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
182 goto out2; 182 goto out2;
183 183
184 memset(&m, 0, sizeof(struct mce)); 184 memset(&m, 0, sizeof(struct mce));
185 m.cpu = safe_smp_processor_id(); 185 m.cpu = smp_processor_id();
186 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); 186 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
187 if (!(m.mcgstatus & MCG_STATUS_RIPV)) 187 if (!(m.mcgstatus & MCG_STATUS_RIPV))
188 kill_it = 1; 188 kill_it = 1;
@@ -274,6 +274,33 @@ void do_machine_check(struct pt_regs * regs, long error_code)
274 atomic_dec(&mce_entry); 274 atomic_dec(&mce_entry);
275} 275}
276 276
277#ifdef CONFIG_X86_MCE_INTEL
278/***
279 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
280 * @cpu: The CPU on which the event occured.
281 * @status: Event status information
282 *
283 * This function should be called by the thermal interrupt after the
284 * event has been processed and the decision was made to log the event
285 * further.
286 *
287 * The status parameter will be saved to the 'status' field of 'struct mce'
288 * and historically has been the register value of the
289 * MSR_IA32_THERMAL_STATUS (Intel) msr.
290 */
291void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
292{
293 struct mce m;
294
295 memset(&m, 0, sizeof(m));
296 m.cpu = cpu;
297 m.bank = MCE_THERMAL_BANK;
298 m.status = status;
299 rdtscll(m.tsc);
300 mce_log(&m);
301}
302#endif /* CONFIG_X86_MCE_INTEL */
303
277/* 304/*
278 * Periodic polling timer for "silent" machine check errors. 305 * Periodic polling timer for "silent" machine check errors.
279 */ 306 */
diff --git a/arch/x86_64/kernel/mce_intel.c b/arch/x86_64/kernel/mce_intel.c
index 8f533d2c40cb..6551505d8a2c 100644
--- a/arch/x86_64/kernel/mce_intel.c
+++ b/arch/x86_64/kernel/mce_intel.c
@@ -11,36 +11,21 @@
11#include <asm/mce.h> 11#include <asm/mce.h>
12#include <asm/hw_irq.h> 12#include <asm/hw_irq.h>
13#include <asm/idle.h> 13#include <asm/idle.h>
14 14#include <asm/therm_throt.h>
15static DEFINE_PER_CPU(unsigned long, next_check);
16 15
17asmlinkage void smp_thermal_interrupt(void) 16asmlinkage void smp_thermal_interrupt(void)
18{ 17{
19 struct mce m; 18 __u64 msr_val;
20 19
21 ack_APIC_irq(); 20 ack_APIC_irq();
22 21
23 exit_idle(); 22 exit_idle();
24 irq_enter(); 23 irq_enter();
25 if (time_before(jiffies, __get_cpu_var(next_check)))
26 goto done;
27
28 __get_cpu_var(next_check) = jiffies + HZ*300;
29 memset(&m, 0, sizeof(m));
30 m.cpu = smp_processor_id();
31 m.bank = MCE_THERMAL_BANK;
32 rdtscll(m.tsc);
33 rdmsrl(MSR_IA32_THERM_STATUS, m.status);
34 if (m.status & 0x1) {
35 printk(KERN_EMERG
36 "CPU%d: Temperature above threshold, cpu clock throttled\n", m.cpu);
37 add_taint(TAINT_MACHINE_CHECK);
38 } else {
39 printk(KERN_EMERG "CPU%d: Temperature/speed normal\n", m.cpu);
40 }
41 24
42 mce_log(&m); 25 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
43done: 26 if (therm_throt_process(msr_val & 1))
27 mce_log_therm_throt_event(smp_processor_id(), msr_val);
28
44 irq_exit(); 29 irq_exit();
45} 30}
46 31
@@ -92,6 +77,9 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c)
92 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 77 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
93 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", 78 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
94 cpu, tm2 ? "TM2" : "TM1"); 79 cpu, tm2 ? "TM2" : "TM1");
80
81 /* enable thermal throttle processing */
82 atomic_set(&therm_throt_en, 1);
95 return; 83 return;
96} 84}
97 85
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index a1ab4197f8a1..20e88f4b564b 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -41,8 +41,7 @@ int acpi_found_madt;
41 * Various Linux-internal data structures created from the 41 * Various Linux-internal data structures created from the
42 * MP-table. 42 * MP-table.
43 */ 43 */
44unsigned char apic_version [MAX_APICS]; 44DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
45unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
46int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; 45int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
47 46
48static int mp_current_pci_id = 0; 47static int mp_current_pci_id = 0;
@@ -56,7 +55,6 @@ struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
56int mp_irq_entries; 55int mp_irq_entries;
57 56
58int nr_ioapics; 57int nr_ioapics;
59int pic_mode;
60unsigned long mp_lapic_addr = 0; 58unsigned long mp_lapic_addr = 0;
61 59
62 60
@@ -71,19 +69,6 @@ unsigned disabled_cpus __initdata;
71/* Bitmask of physically existing CPUs */ 69/* Bitmask of physically existing CPUs */
72physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; 70physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
73 71
74/* ACPI MADT entry parsing functions */
75#ifdef CONFIG_ACPI
76extern struct acpi_boot_flags acpi_boot;
77#ifdef CONFIG_X86_LOCAL_APIC
78extern int acpi_parse_lapic (acpi_table_entry_header *header);
79extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
80extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
81#endif /*CONFIG_X86_LOCAL_APIC*/
82#ifdef CONFIG_X86_IO_APIC
83extern int acpi_parse_ioapic (acpi_table_entry_header *header);
84#endif /*CONFIG_X86_IO_APIC*/
85#endif /*CONFIG_ACPI*/
86
87u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; 72u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
88 73
89 74
@@ -108,24 +93,20 @@ static int __init mpf_checksum(unsigned char *mp, int len)
108static void __cpuinit MP_processor_info (struct mpc_config_processor *m) 93static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
109{ 94{
110 int cpu; 95 int cpu;
111 unsigned char ver;
112 cpumask_t tmp_map; 96 cpumask_t tmp_map;
97 char *bootup_cpu = "";
113 98
114 if (!(m->mpc_cpuflag & CPU_ENABLED)) { 99 if (!(m->mpc_cpuflag & CPU_ENABLED)) {
115 disabled_cpus++; 100 disabled_cpus++;
116 return; 101 return;
117 } 102 }
118
119 printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
120 m->mpc_apicid,
121 (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8,
122 (m->mpc_cpufeature & CPU_MODEL_MASK)>>4,
123 m->mpc_apicver);
124
125 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { 103 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
126 Dprintk(" Bootup CPU\n"); 104 bootup_cpu = " (Bootup-CPU)";
127 boot_cpu_id = m->mpc_apicid; 105 boot_cpu_id = m->mpc_apicid;
128 } 106 }
107
108 printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
109
129 if (num_processors >= NR_CPUS) { 110 if (num_processors >= NR_CPUS) {
130 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." 111 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
131 " Processor ignored.\n", NR_CPUS); 112 " Processor ignored.\n", NR_CPUS);
@@ -136,24 +117,7 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
136 cpus_complement(tmp_map, cpu_present_map); 117 cpus_complement(tmp_map, cpu_present_map);
137 cpu = first_cpu(tmp_map); 118 cpu = first_cpu(tmp_map);
138 119
139#if MAX_APICS < 255
140 if ((int)m->mpc_apicid > MAX_APICS) {
141 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
142 m->mpc_apicid, MAX_APICS);
143 return;
144 }
145#endif
146 ver = m->mpc_apicver;
147
148 physid_set(m->mpc_apicid, phys_cpu_present_map); 120 physid_set(m->mpc_apicid, phys_cpu_present_map);
149 /*
150 * Validate version
151 */
152 if (ver == 0x0) {
153 printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
154 ver = 0x10;
155 }
156 apic_version[m->mpc_apicid] = ver;
157 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { 121 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
158 /* 122 /*
159 * bios_cpu_apicid is required to have processors listed 123 * bios_cpu_apicid is required to have processors listed
@@ -178,15 +142,11 @@ static void __init MP_bus_info (struct mpc_config_bus *m)
178 Dprintk("Bus #%d is %s\n", m->mpc_busid, str); 142 Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
179 143
180 if (strncmp(str, "ISA", 3) == 0) { 144 if (strncmp(str, "ISA", 3) == 0) {
181 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; 145 set_bit(m->mpc_busid, mp_bus_not_pci);
182 } else if (strncmp(str, "EISA", 4) == 0) {
183 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
184 } else if (strncmp(str, "PCI", 3) == 0) { 146 } else if (strncmp(str, "PCI", 3) == 0) {
185 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; 147 clear_bit(m->mpc_busid, mp_bus_not_pci);
186 mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; 148 mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
187 mp_current_pci_id++; 149 mp_current_pci_id++;
188 } else if (strncmp(str, "MCA", 3) == 0) {
189 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
190 } else { 150 } else {
191 printk(KERN_ERR "Unknown bustype %s\n", str); 151 printk(KERN_ERR "Unknown bustype %s\n", str);
192 } 152 }
@@ -197,8 +157,8 @@ static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
197 if (!(m->mpc_flags & MPC_APIC_USABLE)) 157 if (!(m->mpc_flags & MPC_APIC_USABLE))
198 return; 158 return;
199 159
200 printk("I/O APIC #%d Version %d at 0x%X.\n", 160 printk("I/O APIC #%d at 0x%X.\n",
201 m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); 161 m->mpc_apicid, m->mpc_apicaddr);
202 if (nr_ioapics >= MAX_IO_APICS) { 162 if (nr_ioapics >= MAX_IO_APICS) {
203 printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n", 163 printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n",
204 MAX_IO_APICS, nr_ioapics); 164 MAX_IO_APICS, nr_ioapics);
@@ -232,19 +192,6 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
232 m->mpc_irqtype, m->mpc_irqflag & 3, 192 m->mpc_irqtype, m->mpc_irqflag & 3,
233 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, 193 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
234 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); 194 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
235 /*
236 * Well it seems all SMP boards in existence
237 * use ExtINT/LVT1 == LINT0 and
238 * NMI/LVT2 == LINT1 - the following check
239 * will show us if this assumptions is false.
240 * Until then we do not have to add baggage.
241 */
242 if ((m->mpc_irqtype == mp_ExtINT) &&
243 (m->mpc_destapiclint != 0))
244 BUG();
245 if ((m->mpc_irqtype == mp_NMI) &&
246 (m->mpc_destapiclint != 1))
247 BUG();
248} 195}
249 196
250/* 197/*
@@ -258,7 +205,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
258 unsigned char *mpt=((unsigned char *)mpc)+count; 205 unsigned char *mpt=((unsigned char *)mpc)+count;
259 206
260 if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { 207 if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
261 printk("SMP mptable: bad signature [%c%c%c%c]!\n", 208 printk("MPTABLE: bad signature [%c%c%c%c]!\n",
262 mpc->mpc_signature[0], 209 mpc->mpc_signature[0],
263 mpc->mpc_signature[1], 210 mpc->mpc_signature[1],
264 mpc->mpc_signature[2], 211 mpc->mpc_signature[2],
@@ -266,31 +213,31 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
266 return 0; 213 return 0;
267 } 214 }
268 if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { 215 if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
269 printk("SMP mptable: checksum error!\n"); 216 printk("MPTABLE: checksum error!\n");
270 return 0; 217 return 0;
271 } 218 }
272 if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { 219 if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
273 printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", 220 printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
274 mpc->mpc_spec); 221 mpc->mpc_spec);
275 return 0; 222 return 0;
276 } 223 }
277 if (!mpc->mpc_lapic) { 224 if (!mpc->mpc_lapic) {
278 printk(KERN_ERR "SMP mptable: null local APIC address!\n"); 225 printk(KERN_ERR "MPTABLE: null local APIC address!\n");
279 return 0; 226 return 0;
280 } 227 }
281 memcpy(str,mpc->mpc_oem,8); 228 memcpy(str,mpc->mpc_oem,8);
282 str[8]=0; 229 str[8] = 0;
283 printk(KERN_INFO "OEM ID: %s ",str); 230 printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
284 231
285 memcpy(str,mpc->mpc_productid,12); 232 memcpy(str,mpc->mpc_productid,12);
286 str[12]=0; 233 str[12] = 0;
287 printk("Product ID: %s ",str); 234 printk("MPTABLE: Product ID: %s ",str);
288 235
289 printk("APIC at: 0x%X\n",mpc->mpc_lapic); 236 printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
290 237
291 /* save the local APIC address, it might be non-default */ 238 /* save the local APIC address, it might be non-default */
292 if (!acpi_lapic) 239 if (!acpi_lapic)
293 mp_lapic_addr = mpc->mpc_lapic; 240 mp_lapic_addr = mpc->mpc_lapic;
294 241
295 /* 242 /*
296 * Now process the configuration blocks. 243 * Now process the configuration blocks.
@@ -302,7 +249,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
302 struct mpc_config_processor *m= 249 struct mpc_config_processor *m=
303 (struct mpc_config_processor *)mpt; 250 (struct mpc_config_processor *)mpt;
304 if (!acpi_lapic) 251 if (!acpi_lapic)
305 MP_processor_info(m); 252 MP_processor_info(m);
306 mpt += sizeof(*m); 253 mpt += sizeof(*m);
307 count += sizeof(*m); 254 count += sizeof(*m);
308 break; 255 break;
@@ -321,8 +268,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
321 struct mpc_config_ioapic *m= 268 struct mpc_config_ioapic *m=
322 (struct mpc_config_ioapic *)mpt; 269 (struct mpc_config_ioapic *)mpt;
323 MP_ioapic_info(m); 270 MP_ioapic_info(m);
324 mpt+=sizeof(*m); 271 mpt += sizeof(*m);
325 count+=sizeof(*m); 272 count += sizeof(*m);
326 break; 273 break;
327 } 274 }
328 case MP_INTSRC: 275 case MP_INTSRC:
@@ -331,8 +278,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
331 (struct mpc_config_intsrc *)mpt; 278 (struct mpc_config_intsrc *)mpt;
332 279
333 MP_intsrc_info(m); 280 MP_intsrc_info(m);
334 mpt+=sizeof(*m); 281 mpt += sizeof(*m);
335 count+=sizeof(*m); 282 count += sizeof(*m);
336 break; 283 break;
337 } 284 }
338 case MP_LINTSRC: 285 case MP_LINTSRC:
@@ -340,15 +287,15 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
340 struct mpc_config_lintsrc *m= 287 struct mpc_config_lintsrc *m=
341 (struct mpc_config_lintsrc *)mpt; 288 (struct mpc_config_lintsrc *)mpt;
342 MP_lintsrc_info(m); 289 MP_lintsrc_info(m);
343 mpt+=sizeof(*m); 290 mpt += sizeof(*m);
344 count+=sizeof(*m); 291 count += sizeof(*m);
345 break; 292 break;
346 } 293 }
347 } 294 }
348 } 295 }
349 clustered_apic_check(); 296 clustered_apic_check();
350 if (!num_processors) 297 if (!num_processors)
351 printk(KERN_ERR "SMP mptable: no processors registered!\n"); 298 printk(KERN_ERR "MPTABLE: no processors registered!\n");
352 return num_processors; 299 return num_processors;
353} 300}
354 301
@@ -444,13 +391,10 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
444 * 2 CPUs, numbered 0 & 1. 391 * 2 CPUs, numbered 0 & 1.
445 */ 392 */
446 processor.mpc_type = MP_PROCESSOR; 393 processor.mpc_type = MP_PROCESSOR;
447 /* Either an integrated APIC or a discrete 82489DX. */ 394 processor.mpc_apicver = 0;
448 processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
449 processor.mpc_cpuflag = CPU_ENABLED; 395 processor.mpc_cpuflag = CPU_ENABLED;
450 processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 396 processor.mpc_cpufeature = 0;
451 (boot_cpu_data.x86_model << 4) | 397 processor.mpc_featureflag = 0;
452 boot_cpu_data.x86_mask;
453 processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
454 processor.mpc_reserved[0] = 0; 398 processor.mpc_reserved[0] = 0;
455 processor.mpc_reserved[1] = 0; 399 processor.mpc_reserved[1] = 0;
456 for (i = 0; i < 2; i++) { 400 for (i = 0; i < 2; i++) {
@@ -469,14 +413,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
469 case 5: 413 case 5:
470 memcpy(bus.mpc_bustype, "ISA ", 6); 414 memcpy(bus.mpc_bustype, "ISA ", 6);
471 break; 415 break;
472 case 2:
473 case 6:
474 case 3:
475 memcpy(bus.mpc_bustype, "EISA ", 6);
476 break;
477 case 4:
478 case 7:
479 memcpy(bus.mpc_bustype, "MCA ", 6);
480 } 416 }
481 MP_bus_info(&bus); 417 MP_bus_info(&bus);
482 if (mpc_default_type > 4) { 418 if (mpc_default_type > 4) {
@@ -487,7 +423,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
487 423
488 ioapic.mpc_type = MP_IOAPIC; 424 ioapic.mpc_type = MP_IOAPIC;
489 ioapic.mpc_apicid = 2; 425 ioapic.mpc_apicid = 2;
490 ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; 426 ioapic.mpc_apicver = 0;
491 ioapic.mpc_flags = MPC_APIC_USABLE; 427 ioapic.mpc_flags = MPC_APIC_USABLE;
492 ioapic.mpc_apicaddr = 0xFEC00000; 428 ioapic.mpc_apicaddr = 0xFEC00000;
493 MP_ioapic_info(&ioapic); 429 MP_ioapic_info(&ioapic);
@@ -530,13 +466,6 @@ void __init get_smp_config (void)
530 printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); 466 printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
531 467
532 printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); 468 printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
533 if (mpf->mpf_feature2 & (1<<7)) {
534 printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
535 pic_mode = 1;
536 } else {
537 printk(KERN_INFO " Virtual Wire compatibility mode.\n");
538 pic_mode = 0;
539 }
540 469
541 /* 470 /*
542 * Now see if we need to read further. 471 * Now see if we need to read further.
@@ -616,7 +545,7 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
616 return 0; 545 return 0;
617} 546}
618 547
619void __init find_intel_smp (void) 548void __init find_smp_config(void)
620{ 549{
621 unsigned int address; 550 unsigned int address;
622 551
@@ -633,9 +562,7 @@ void __init find_intel_smp (void)
633 smp_scan_config(0xF0000,0x10000)) 562 smp_scan_config(0xF0000,0x10000))
634 return; 563 return;
635 /* 564 /*
636 * If it is an SMP machine we should know now, unless the 565 * If it is an SMP machine we should know now.
637 * configuration is in an EISA/MCA bus machine with an
638 * extended bios data area.
639 * 566 *
640 * there is a real-mode segmented pointer pointing to the 567 * there is a real-mode segmented pointer pointing to the
641 * 4K EBDA area at 0x40E, calculate and scan it here. 568 * 4K EBDA area at 0x40E, calculate and scan it here.
@@ -656,69 +583,41 @@ void __init find_intel_smp (void)
656 printk(KERN_INFO "No mptable found.\n"); 583 printk(KERN_INFO "No mptable found.\n");
657} 584}
658 585
659/*
660 * - Intel MP Configuration Table
661 */
662void __init find_smp_config (void)
663{
664#ifdef CONFIG_X86_LOCAL_APIC
665 find_intel_smp();
666#endif
667}
668
669
670/* -------------------------------------------------------------------------- 586/* --------------------------------------------------------------------------
671 ACPI-based MP Configuration 587 ACPI-based MP Configuration
672 -------------------------------------------------------------------------- */ 588 -------------------------------------------------------------------------- */
673 589
674#ifdef CONFIG_ACPI 590#ifdef CONFIG_ACPI
675 591
676void __init mp_register_lapic_address ( 592void __init mp_register_lapic_address(u64 address)
677 u64 address)
678{ 593{
679 mp_lapic_addr = (unsigned long) address; 594 mp_lapic_addr = (unsigned long) address;
680
681 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); 595 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
682
683 if (boot_cpu_id == -1U) 596 if (boot_cpu_id == -1U)
684 boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); 597 boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
685
686 Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
687} 598}
688 599
689 600void __cpuinit mp_register_lapic (u8 id, u8 enabled)
690void __cpuinit mp_register_lapic (
691 u8 id,
692 u8 enabled)
693{ 601{
694 struct mpc_config_processor processor; 602 struct mpc_config_processor processor;
695 int boot_cpu = 0; 603 int boot_cpu = 0;
696 604
697 if (id >= MAX_APICS) { 605 if (id == boot_cpu_id)
698 printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
699 id, MAX_APICS);
700 return;
701 }
702
703 if (id == boot_cpu_physical_apicid)
704 boot_cpu = 1; 606 boot_cpu = 1;
705 607
706 processor.mpc_type = MP_PROCESSOR; 608 processor.mpc_type = MP_PROCESSOR;
707 processor.mpc_apicid = id; 609 processor.mpc_apicid = id;
708 processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); 610 processor.mpc_apicver = 0;
709 processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); 611 processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
710 processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); 612 processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
711 processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 613 processor.mpc_cpufeature = 0;
712 (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; 614 processor.mpc_featureflag = 0;
713 processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
714 processor.mpc_reserved[0] = 0; 615 processor.mpc_reserved[0] = 0;
715 processor.mpc_reserved[1] = 0; 616 processor.mpc_reserved[1] = 0;
716 617
717 MP_processor_info(&processor); 618 MP_processor_info(&processor);
718} 619}
719 620
720#ifdef CONFIG_X86_IO_APIC
721
722#define MP_ISA_BUS 0 621#define MP_ISA_BUS 0
723#define MP_MAX_IOAPIC_PIN 127 622#define MP_MAX_IOAPIC_PIN 127
724 623
@@ -729,11 +628,9 @@ static struct mp_ioapic_routing {
729 u32 pin_programmed[4]; 628 u32 pin_programmed[4];
730} mp_ioapic_routing[MAX_IO_APICS]; 629} mp_ioapic_routing[MAX_IO_APICS];
731 630
732 631static int mp_find_ioapic(int gsi)
733static int mp_find_ioapic (
734 int gsi)
735{ 632{
736 int i = 0; 633 int i = 0;
737 634
738 /* Find the IOAPIC that manages this GSI. */ 635 /* Find the IOAPIC that manages this GSI. */
739 for (i = 0; i < nr_ioapics; i++) { 636 for (i = 0; i < nr_ioapics; i++) {
@@ -743,17 +640,12 @@ static int mp_find_ioapic (
743 } 640 }
744 641
745 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); 642 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
746
747 return -1; 643 return -1;
748} 644}
749
750 645
751void __init mp_register_ioapic ( 646void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
752 u8 id,
753 u32 address,
754 u32 gsi_base)
755{ 647{
756 int idx = 0; 648 int idx = 0;
757 649
758 if (nr_ioapics >= MAX_IO_APICS) { 650 if (nr_ioapics >= MAX_IO_APICS) {
759 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " 651 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
@@ -774,7 +666,7 @@ void __init mp_register_ioapic (
774 666
775 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 667 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
776 mp_ioapics[idx].mpc_apicid = id; 668 mp_ioapics[idx].mpc_apicid = id;
777 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); 669 mp_ioapics[idx].mpc_apicver = 0;
778 670
779 /* 671 /*
780 * Build basic IRQ lookup table to facilitate gsi->io_apic lookups 672 * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
@@ -785,21 +677,15 @@ void __init mp_register_ioapic (
785 mp_ioapic_routing[idx].gsi_end = gsi_base + 677 mp_ioapic_routing[idx].gsi_end = gsi_base +
786 io_apic_get_redir_entries(idx); 678 io_apic_get_redir_entries(idx);
787 679
788 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " 680 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
789 "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 681 "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
790 mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, 682 mp_ioapics[idx].mpc_apicaddr,
791 mp_ioapic_routing[idx].gsi_start, 683 mp_ioapic_routing[idx].gsi_start,
792 mp_ioapic_routing[idx].gsi_end); 684 mp_ioapic_routing[idx].gsi_end);
793
794 return;
795} 685}
796 686
797 687void __init
798void __init mp_override_legacy_irq ( 688mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
799 u8 bus_irq,
800 u8 polarity,
801 u8 trigger,
802 u32 gsi)
803{ 689{
804 struct mpc_config_intsrc intsrc; 690 struct mpc_config_intsrc intsrc;
805 int ioapic = -1; 691 int ioapic = -1;
@@ -837,22 +723,18 @@ void __init mp_override_legacy_irq (
837 mp_irqs[mp_irq_entries] = intsrc; 723 mp_irqs[mp_irq_entries] = intsrc;
838 if (++mp_irq_entries == MAX_IRQ_SOURCES) 724 if (++mp_irq_entries == MAX_IRQ_SOURCES)
839 panic("Max # of irq sources exceeded!\n"); 725 panic("Max # of irq sources exceeded!\n");
840
841 return;
842} 726}
843 727
844 728void __init mp_config_acpi_legacy_irqs(void)
845void __init mp_config_acpi_legacy_irqs (void)
846{ 729{
847 struct mpc_config_intsrc intsrc; 730 struct mpc_config_intsrc intsrc;
848 int i = 0; 731 int i = 0;
849 int ioapic = -1; 732 int ioapic = -1;
850 733
851 /* 734 /*
852 * Fabricate the legacy ISA bus (bus #31). 735 * Fabricate the legacy ISA bus (bus #31).
853 */ 736 */
854 mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; 737 set_bit(MP_ISA_BUS, mp_bus_not_pci);
855 Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
856 738
857 /* 739 /*
858 * Locate the IOAPIC that manages the ISA IRQs (0-15). 740 * Locate the IOAPIC that manages the ISA IRQs (0-15).
@@ -905,24 +787,22 @@ void __init mp_config_acpi_legacy_irqs (void)
905 if (++mp_irq_entries == MAX_IRQ_SOURCES) 787 if (++mp_irq_entries == MAX_IRQ_SOURCES)
906 panic("Max # of irq sources exceeded!\n"); 788 panic("Max # of irq sources exceeded!\n");
907 } 789 }
908
909 return;
910} 790}
911 791
912#define MAX_GSI_NUM 4096 792#define MAX_GSI_NUM 4096
913 793
914int mp_register_gsi(u32 gsi, int triggering, int polarity) 794int mp_register_gsi(u32 gsi, int triggering, int polarity)
915{ 795{
916 int ioapic = -1; 796 int ioapic = -1;
917 int ioapic_pin = 0; 797 int ioapic_pin = 0;
918 int idx, bit = 0; 798 int idx, bit = 0;
919 static int pci_irq = 16; 799 static int pci_irq = 16;
920 /* 800 /*
921 * Mapping between Global System Interrupts, which 801 * Mapping between Global System Interrupts, which
922 * represent all possible interrupts, to the IRQs 802 * represent all possible interrupts, to the IRQs
923 * assigned to actual devices. 803 * assigned to actual devices.
924 */ 804 */
925 static int gsi_to_irq[MAX_GSI_NUM]; 805 static int gsi_to_irq[MAX_GSI_NUM];
926 806
927 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) 807 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
928 return gsi; 808 return gsi;
@@ -996,6 +876,4 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
996 polarity == ACPI_ACTIVE_HIGH ? 0 : 1); 876 polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
997 return gsi; 877 return gsi;
998} 878}
999
1000#endif /*CONFIG_X86_IO_APIC*/
1001#endif /*CONFIG_ACPI*/ 879#endif /*CONFIG_ACPI*/
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 5baa0c726e97..4d6fb047952e 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -28,71 +28,138 @@
28#include <asm/mce.h> 28#include <asm/mce.h>
29#include <asm/intel_arch_perfmon.h> 29#include <asm/intel_arch_perfmon.h>
30 30
31/* 31/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
32 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: 32 * evtsel_nmi_owner tracks the ownership of the event selection
33 * - it may be reserved by some other driver, or not 33 * - different performance counters/ event selection may be reserved for
34 * - when not reserved by some other driver, it may be used for 34 * different subsystems this reservation system just tries to coordinate
35 * the NMI watchdog, or not 35 * things a little
36 *
37 * This is maintained separately from nmi_active because the NMI
38 * watchdog may also be driven from the I/O APIC timer.
39 */ 36 */
40static DEFINE_SPINLOCK(lapic_nmi_owner_lock); 37static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
41static unsigned int lapic_nmi_owner; 38static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
42#define LAPIC_NMI_WATCHDOG (1<<0) 39
43#define LAPIC_NMI_RESERVED (1<<1) 40/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
41 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
42 */
43#define NMI_MAX_COUNTER_BITS 66
44 44
45/* nmi_active: 45/* nmi_active:
46 * +1: the lapic NMI watchdog is active, but can be disabled 46 * >0: the lapic NMI watchdog is active, but can be disabled
47 * 0: the lapic NMI watchdog has not been set up, and cannot 47 * <0: the lapic NMI watchdog has not been set up, and cannot
48 * be enabled 48 * be enabled
49 * -1: the lapic NMI watchdog is disabled, but can be enabled 49 * 0: the lapic NMI watchdog is disabled, but can be enabled
50 */ 50 */
51int nmi_active; /* oprofile uses this */ 51atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
52int panic_on_timeout; 52int panic_on_timeout;
53 53
54unsigned int nmi_watchdog = NMI_DEFAULT; 54unsigned int nmi_watchdog = NMI_DEFAULT;
55static unsigned int nmi_hz = HZ; 55static unsigned int nmi_hz = HZ;
56static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
57static unsigned int nmi_p4_cccr_val;
58 56
59/* Note that these events don't tick when the CPU idles. This means 57struct nmi_watchdog_ctlblk {
60 the frequency varies with CPU load. */ 58 int enabled;
59 u64 check_bit;
60 unsigned int cccr_msr;
61 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
62 unsigned int evntsel_msr; /* the MSR to select the events to handle */
63};
64static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
61 65
62#define K7_EVNTSEL_ENABLE (1 << 22) 66/* local prototypes */
63#define K7_EVNTSEL_INT (1 << 20) 67static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
64#define K7_EVNTSEL_OS (1 << 17)
65#define K7_EVNTSEL_USR (1 << 16)
66#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
67#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
68 68
69#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 69/* converts an msr to an appropriate reservation bit */
70#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK 70static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
71{
72 /* returns the bit offset of the performance counter register */
73 switch (boot_cpu_data.x86_vendor) {
74 case X86_VENDOR_AMD:
75 return (msr - MSR_K7_PERFCTR0);
76 case X86_VENDOR_INTEL:
77 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
78 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
79 else
80 return (msr - MSR_P4_BPU_PERFCTR0);
81 }
82 return 0;
83}
71 84
72#define MSR_P4_MISC_ENABLE 0x1A0 85/* converts an msr to an appropriate reservation bit */
73#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) 86static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
74#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) 87{
75#define MSR_P4_PERFCTR0 0x300 88 /* returns the bit offset of the event selection register */
76#define MSR_P4_CCCR0 0x360 89 switch (boot_cpu_data.x86_vendor) {
77#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) 90 case X86_VENDOR_AMD:
78#define P4_ESCR_OS (1<<3) 91 return (msr - MSR_K7_EVNTSEL0);
79#define P4_ESCR_USR (1<<2) 92 case X86_VENDOR_INTEL:
80#define P4_CCCR_OVF_PMI0 (1<<26) 93 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
81#define P4_CCCR_OVF_PMI1 (1<<27) 94 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
82#define P4_CCCR_THRESHOLD(N) ((N)<<20) 95 else
83#define P4_CCCR_COMPLEMENT (1<<19) 96 return (msr - MSR_P4_BSU_ESCR0);
84#define P4_CCCR_COMPARE (1<<18) 97 }
85#define P4_CCCR_REQUIRED (3<<16) 98 return 0;
86#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) 99}
87#define P4_CCCR_ENABLE (1<<12) 100
88/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter 101/* checks for a bit availability (hack for oprofile) */
89 CRU_ESCR0 (with any non-null event selector) through a complemented 102int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
90 max threshold. [IA32-Vol3, Section 14.9.9] */ 103{
91#define MSR_P4_IQ_COUNTER0 0x30C 104 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
92#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) 105
93#define P4_NMI_IQ_CCCR0 \ 106 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
94 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ 107}
95 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) 108
109/* checks the an msr for availability */
110int avail_to_resrv_perfctr_nmi(unsigned int msr)
111{
112 unsigned int counter;
113
114 counter = nmi_perfctr_msr_to_bit(msr);
115 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
116
117 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
118}
119
120int reserve_perfctr_nmi(unsigned int msr)
121{
122 unsigned int counter;
123
124 counter = nmi_perfctr_msr_to_bit(msr);
125 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126
127 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
128 return 1;
129 return 0;
130}
131
132void release_perfctr_nmi(unsigned int msr)
133{
134 unsigned int counter;
135
136 counter = nmi_perfctr_msr_to_bit(msr);
137 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
138
139 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
140}
141
142int reserve_evntsel_nmi(unsigned int msr)
143{
144 unsigned int counter;
145
146 counter = nmi_evntsel_msr_to_bit(msr);
147 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
148
149 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)))
150 return 1;
151 return 0;
152}
153
154void release_evntsel_nmi(unsigned int msr)
155{
156 unsigned int counter;
157
158 counter = nmi_evntsel_msr_to_bit(msr);
159 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
160
161 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner));
162}
96 163
97static __cpuinit inline int nmi_known_cpu(void) 164static __cpuinit inline int nmi_known_cpu(void)
98{ 165{
@@ -109,7 +176,7 @@ static __cpuinit inline int nmi_known_cpu(void)
109} 176}
110 177
111/* Run after command line and cpu_init init, but before all other checks */ 178/* Run after command line and cpu_init init, but before all other checks */
112void __cpuinit nmi_watchdog_default(void) 179void nmi_watchdog_default(void)
113{ 180{
114 if (nmi_watchdog != NMI_DEFAULT) 181 if (nmi_watchdog != NMI_DEFAULT)
115 return; 182 return;
@@ -145,6 +212,12 @@ int __init check_nmi_watchdog (void)
145 int *counts; 212 int *counts;
146 int cpu; 213 int cpu;
147 214
215 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
216 return 0;
217
218 if (!atomic_read(&nmi_active))
219 return 0;
220
148 counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); 221 counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
149 if (!counts) 222 if (!counts)
150 return -1; 223 return -1;
@@ -162,26 +235,43 @@ int __init check_nmi_watchdog (void)
162 mdelay((10*1000)/nmi_hz); // wait 10 ticks 235 mdelay((10*1000)/nmi_hz); // wait 10 ticks
163 236
164 for_each_online_cpu(cpu) { 237 for_each_online_cpu(cpu) {
238 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
239 continue;
165 if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { 240 if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) {
166 endflag = 1;
167 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", 241 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
168 cpu, 242 cpu,
169 counts[cpu], 243 counts[cpu],
170 cpu_pda(cpu)->__nmi_count); 244 cpu_pda(cpu)->__nmi_count);
171 nmi_active = 0; 245 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
172 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; 246 atomic_dec(&nmi_active);
173 nmi_perfctr_msr = 0;
174 kfree(counts);
175 return -1;
176 } 247 }
177 } 248 }
249 if (!atomic_read(&nmi_active)) {
250 kfree(counts);
251 atomic_set(&nmi_active, -1);
252 return -1;
253 }
178 endflag = 1; 254 endflag = 1;
179 printk("OK.\n"); 255 printk("OK.\n");
180 256
181 /* now that we know it works we can reduce NMI frequency to 257 /* now that we know it works we can reduce NMI frequency to
182 something more reasonable; makes a difference in some configs */ 258 something more reasonable; makes a difference in some configs */
183 if (nmi_watchdog == NMI_LOCAL_APIC) 259 if (nmi_watchdog == NMI_LOCAL_APIC) {
260 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
261
184 nmi_hz = 1; 262 nmi_hz = 1;
263 /*
264 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
265 * are writable, with higher bits sign extending from bit 31.
266 * So, we can only program the counter with 31 bit values and
267 * 32nd bit should be 1, for 33.. to be 1.
268 * Find the appropriate nmi_hz
269 */
270 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
271 ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
272 nmi_hz = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1;
273 }
274 }
185 275
186 kfree(counts); 276 kfree(counts);
187 return 0; 277 return 0;
@@ -201,91 +291,65 @@ int __init setup_nmi_watchdog(char *str)
201 291
202 get_option(&str, &nmi); 292 get_option(&str, &nmi);
203 293
204 if (nmi >= NMI_INVALID) 294 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
205 return 0; 295 return 0;
296
297 if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
298 return 0; /* no lapic support */
206 nmi_watchdog = nmi; 299 nmi_watchdog = nmi;
207 return 1; 300 return 1;
208} 301}
209 302
210__setup("nmi_watchdog=", setup_nmi_watchdog); 303__setup("nmi_watchdog=", setup_nmi_watchdog);
211 304
212static void disable_intel_arch_watchdog(void);
213
214static void disable_lapic_nmi_watchdog(void) 305static void disable_lapic_nmi_watchdog(void)
215{ 306{
216 if (nmi_active <= 0) 307 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
308
309 if (atomic_read(&nmi_active) <= 0)
217 return; 310 return;
218 switch (boot_cpu_data.x86_vendor) {
219 case X86_VENDOR_AMD:
220 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
221 break;
222 case X86_VENDOR_INTEL:
223 if (boot_cpu_data.x86 == 15) {
224 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
225 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
226 } else if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
227 disable_intel_arch_watchdog();
228 }
229 break;
230 }
231 nmi_active = -1;
232 /* tell do_nmi() and others that we're not active any more */
233 nmi_watchdog = 0;
234}
235 311
236static void enable_lapic_nmi_watchdog(void) 312 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
237{ 313
238 if (nmi_active < 0) { 314 BUG_ON(atomic_read(&nmi_active) != 0);
239 nmi_watchdog = NMI_LOCAL_APIC;
240 touch_nmi_watchdog();
241 setup_apic_nmi_watchdog();
242 }
243} 315}
244 316
245int reserve_lapic_nmi(void) 317static void enable_lapic_nmi_watchdog(void)
246{ 318{
247 unsigned int old_owner; 319 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
248 320
249 spin_lock(&lapic_nmi_owner_lock); 321 /* are we already enabled */
250 old_owner = lapic_nmi_owner; 322 if (atomic_read(&nmi_active) != 0)
251 lapic_nmi_owner |= LAPIC_NMI_RESERVED; 323 return;
252 spin_unlock(&lapic_nmi_owner_lock);
253 if (old_owner & LAPIC_NMI_RESERVED)
254 return -EBUSY;
255 if (old_owner & LAPIC_NMI_WATCHDOG)
256 disable_lapic_nmi_watchdog();
257 return 0;
258}
259 324
260void release_lapic_nmi(void) 325 /* are we lapic aware */
261{ 326 if (nmi_known_cpu() <= 0)
262 unsigned int new_owner; 327 return;
263 328
264 spin_lock(&lapic_nmi_owner_lock); 329 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
265 new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; 330 touch_nmi_watchdog();
266 lapic_nmi_owner = new_owner;
267 spin_unlock(&lapic_nmi_owner_lock);
268 if (new_owner & LAPIC_NMI_WATCHDOG)
269 enable_lapic_nmi_watchdog();
270} 331}
271 332
272void disable_timer_nmi_watchdog(void) 333void disable_timer_nmi_watchdog(void)
273{ 334{
274 if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) 335 BUG_ON(nmi_watchdog != NMI_IO_APIC);
336
337 if (atomic_read(&nmi_active) <= 0)
275 return; 338 return;
276 339
277 disable_irq(0); 340 disable_irq(0);
278 unset_nmi_callback(); 341 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
279 nmi_active = -1; 342
280 nmi_watchdog = NMI_NONE; 343 BUG_ON(atomic_read(&nmi_active) != 0);
281} 344}
282 345
283void enable_timer_nmi_watchdog(void) 346void enable_timer_nmi_watchdog(void)
284{ 347{
285 if (nmi_active < 0) { 348 BUG_ON(nmi_watchdog != NMI_IO_APIC);
286 nmi_watchdog = NMI_IO_APIC; 349
350 if (atomic_read(&nmi_active) == 0) {
287 touch_nmi_watchdog(); 351 touch_nmi_watchdog();
288 nmi_active = 1; 352 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
289 enable_irq(0); 353 enable_irq(0);
290 } 354 }
291} 355}
@@ -296,15 +360,20 @@ static int nmi_pm_active; /* nmi_active before suspend */
296 360
297static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) 361static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
298{ 362{
299 nmi_pm_active = nmi_active; 363 /* only CPU0 goes here, other CPUs should be offline */
300 disable_lapic_nmi_watchdog(); 364 nmi_pm_active = atomic_read(&nmi_active);
365 stop_apic_nmi_watchdog(NULL);
366 BUG_ON(atomic_read(&nmi_active) != 0);
301 return 0; 367 return 0;
302} 368}
303 369
304static int lapic_nmi_resume(struct sys_device *dev) 370static int lapic_nmi_resume(struct sys_device *dev)
305{ 371{
306 if (nmi_pm_active > 0) 372 /* only CPU0 goes here, other CPUs should be offline */
307 enable_lapic_nmi_watchdog(); 373 if (nmi_pm_active > 0) {
374 setup_apic_nmi_watchdog(NULL);
375 touch_nmi_watchdog();
376 }
308 return 0; 377 return 0;
309} 378}
310 379
@@ -323,7 +392,13 @@ static int __init init_lapic_nmi_sysfs(void)
323{ 392{
324 int error; 393 int error;
325 394
326 if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) 395 /* should really be a BUG_ON but b/c this is an
396 * init call, it just doesn't work. -dcz
397 */
398 if (nmi_watchdog != NMI_LOCAL_APIC)
399 return 0;
400
401 if ( atomic_read(&nmi_active) < 0 )
327 return 0; 402 return 0;
328 403
329 error = sysdev_class_register(&nmi_sysclass); 404 error = sysdev_class_register(&nmi_sysclass);
@@ -341,74 +416,209 @@ late_initcall(init_lapic_nmi_sysfs);
341 * Original code written by Keith Owens. 416 * Original code written by Keith Owens.
342 */ 417 */
343 418
344static void clear_msr_range(unsigned int base, unsigned int n) 419/* Note that these events don't tick when the CPU idles. This means
345{ 420 the frequency varies with CPU load. */
346 unsigned int i;
347 421
348 for(i = 0; i < n; ++i) 422#define K7_EVNTSEL_ENABLE (1 << 22)
349 wrmsr(base+i, 0, 0); 423#define K7_EVNTSEL_INT (1 << 20)
350} 424#define K7_EVNTSEL_OS (1 << 17)
425#define K7_EVNTSEL_USR (1 << 16)
426#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
427#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
351 428
352static void setup_k7_watchdog(void) 429static int setup_k7_watchdog(void)
353{ 430{
354 int i; 431 unsigned int perfctr_msr, evntsel_msr;
355 unsigned int evntsel; 432 unsigned int evntsel;
433 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
356 434
357 nmi_perfctr_msr = MSR_K7_PERFCTR0; 435 perfctr_msr = MSR_K7_PERFCTR0;
436 evntsel_msr = MSR_K7_EVNTSEL0;
437 if (!reserve_perfctr_nmi(perfctr_msr))
438 goto fail;
358 439
359 for(i = 0; i < 4; ++i) { 440 if (!reserve_evntsel_nmi(evntsel_msr))
360 /* Simulator may not support it */ 441 goto fail1;
361 if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) { 442
362 nmi_perfctr_msr = 0; 443 /* Simulator may not support it */
363 return; 444 if (checking_wrmsrl(evntsel_msr, 0UL))
364 } 445 goto fail2;
365 wrmsrl(MSR_K7_PERFCTR0+i, 0UL); 446 wrmsrl(perfctr_msr, 0UL);
366 }
367 447
368 evntsel = K7_EVNTSEL_INT 448 evntsel = K7_EVNTSEL_INT
369 | K7_EVNTSEL_OS 449 | K7_EVNTSEL_OS
370 | K7_EVNTSEL_USR 450 | K7_EVNTSEL_USR
371 | K7_NMI_EVENT; 451 | K7_NMI_EVENT;
372 452
373 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 453 /* setup the timer */
374 wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz)); 454 wrmsr(evntsel_msr, evntsel, 0);
455 wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
375 apic_write(APIC_LVTPC, APIC_DM_NMI); 456 apic_write(APIC_LVTPC, APIC_DM_NMI);
376 evntsel |= K7_EVNTSEL_ENABLE; 457 evntsel |= K7_EVNTSEL_ENABLE;
377 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 458 wrmsr(evntsel_msr, evntsel, 0);
459
460 wd->perfctr_msr = perfctr_msr;
461 wd->evntsel_msr = evntsel_msr;
462 wd->cccr_msr = 0; //unused
463 wd->check_bit = 1ULL<<63;
464 return 1;
465fail2:
466 release_evntsel_nmi(evntsel_msr);
467fail1:
468 release_perfctr_nmi(perfctr_msr);
469fail:
470 return 0;
378} 471}
379 472
380static void disable_intel_arch_watchdog(void) 473static void stop_k7_watchdog(void)
381{ 474{
382 unsigned ebx; 475 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
383 476
384 /* 477 wrmsr(wd->evntsel_msr, 0, 0);
385 * Check whether the Architectural PerfMon supports 478
386 * Unhalted Core Cycles Event or not. 479 release_evntsel_nmi(wd->evntsel_msr);
387 * NOTE: Corresponding bit = 0 in ebp indicates event present. 480 release_perfctr_nmi(wd->perfctr_msr);
481}
482
483/* Note that these events don't tick when the CPU idles. This means
484 the frequency varies with CPU load. */
485
486#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
487#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
488#define P4_ESCR_OS (1<<3)
489#define P4_ESCR_USR (1<<2)
490#define P4_CCCR_OVF_PMI0 (1<<26)
491#define P4_CCCR_OVF_PMI1 (1<<27)
492#define P4_CCCR_THRESHOLD(N) ((N)<<20)
493#define P4_CCCR_COMPLEMENT (1<<19)
494#define P4_CCCR_COMPARE (1<<18)
495#define P4_CCCR_REQUIRED (3<<16)
496#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
497#define P4_CCCR_ENABLE (1<<12)
498#define P4_CCCR_OVF (1<<31)
499/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
500 CRU_ESCR0 (with any non-null event selector) through a complemented
501 max threshold. [IA32-Vol3, Section 14.9.9] */
502
503static int setup_p4_watchdog(void)
504{
505 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
506 unsigned int evntsel, cccr_val;
507 unsigned int misc_enable, dummy;
508 unsigned int ht_num;
509 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
510
511 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
512 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
513 return 0;
514
515#ifdef CONFIG_SMP
516 /* detect which hyperthread we are on */
517 if (smp_num_siblings == 2) {
518 unsigned int ebx, apicid;
519
520 ebx = cpuid_ebx(1);
521 apicid = (ebx >> 24) & 0xff;
522 ht_num = apicid & 1;
523 } else
524#endif
525 ht_num = 0;
526
527 /* performance counters are shared resources
528 * assign each hyperthread its own set
529 * (re-use the ESCR0 register, seems safe
530 * and keeps the cccr_val the same)
388 */ 531 */
389 ebx = cpuid_ebx(10); 532 if (!ht_num) {
390 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) 533 /* logical cpu 0 */
391 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); 534 perfctr_msr = MSR_P4_IQ_PERFCTR0;
535 evntsel_msr = MSR_P4_CRU_ESCR0;
536 cccr_msr = MSR_P4_IQ_CCCR0;
537 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
538 } else {
539 /* logical cpu 1 */
540 perfctr_msr = MSR_P4_IQ_PERFCTR1;
541 evntsel_msr = MSR_P4_CRU_ESCR0;
542 cccr_msr = MSR_P4_IQ_CCCR1;
543 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
544 }
545
546 if (!reserve_perfctr_nmi(perfctr_msr))
547 goto fail;
548
549 if (!reserve_evntsel_nmi(evntsel_msr))
550 goto fail1;
551
552 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
553 | P4_ESCR_OS
554 | P4_ESCR_USR;
555
556 cccr_val |= P4_CCCR_THRESHOLD(15)
557 | P4_CCCR_COMPLEMENT
558 | P4_CCCR_COMPARE
559 | P4_CCCR_REQUIRED;
560
561 wrmsr(evntsel_msr, evntsel, 0);
562 wrmsr(cccr_msr, cccr_val, 0);
563 wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
564 apic_write(APIC_LVTPC, APIC_DM_NMI);
565 cccr_val |= P4_CCCR_ENABLE;
566 wrmsr(cccr_msr, cccr_val, 0);
567
568 wd->perfctr_msr = perfctr_msr;
569 wd->evntsel_msr = evntsel_msr;
570 wd->cccr_msr = cccr_msr;
571 wd->check_bit = 1ULL<<39;
572 return 1;
573fail1:
574 release_perfctr_nmi(perfctr_msr);
575fail:
576 return 0;
577}
578
579static void stop_p4_watchdog(void)
580{
581 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
582
583 wrmsr(wd->cccr_msr, 0, 0);
584 wrmsr(wd->evntsel_msr, 0, 0);
585
586 release_evntsel_nmi(wd->evntsel_msr);
587 release_perfctr_nmi(wd->perfctr_msr);
392} 588}
393 589
590#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
591#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
592
394static int setup_intel_arch_watchdog(void) 593static int setup_intel_arch_watchdog(void)
395{ 594{
595 unsigned int ebx;
596 union cpuid10_eax eax;
597 unsigned int unused;
598 unsigned int perfctr_msr, evntsel_msr;
396 unsigned int evntsel; 599 unsigned int evntsel;
397 unsigned ebx; 600 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
398 601
399 /* 602 /*
400 * Check whether the Architectural PerfMon supports 603 * Check whether the Architectural PerfMon supports
401 * Unhalted Core Cycles Event or not. 604 * Unhalted Core Cycles Event or not.
402 * NOTE: Corresponding bit = 0 in ebp indicates event present. 605 * NOTE: Corresponding bit = 0 in ebx indicates event present.
403 */ 606 */
404 ebx = cpuid_ebx(10); 607 cpuid(10, &(eax.full), &ebx, &unused, &unused);
405 if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) 608 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
406 return 0; 609 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
610 goto fail;
611
612 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
613 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
407 614
408 nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; 615 if (!reserve_perfctr_nmi(perfctr_msr))
616 goto fail;
409 617
410 clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); 618 if (!reserve_evntsel_nmi(evntsel_msr))
411 clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); 619 goto fail1;
620
621 wrmsrl(perfctr_msr, 0UL);
412 622
413 evntsel = ARCH_PERFMON_EVENTSEL_INT 623 evntsel = ARCH_PERFMON_EVENTSEL_INT
414 | ARCH_PERFMON_EVENTSEL_OS 624 | ARCH_PERFMON_EVENTSEL_OS
@@ -416,84 +626,122 @@ static int setup_intel_arch_watchdog(void)
416 | ARCH_PERFMON_NMI_EVENT_SEL 626 | ARCH_PERFMON_NMI_EVENT_SEL
417 | ARCH_PERFMON_NMI_EVENT_UMASK; 627 | ARCH_PERFMON_NMI_EVENT_UMASK;
418 628
419 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); 629 /* setup the timer */
420 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz)); 630 wrmsr(evntsel_msr, evntsel, 0);
631 wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
632
421 apic_write(APIC_LVTPC, APIC_DM_NMI); 633 apic_write(APIC_LVTPC, APIC_DM_NMI);
422 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; 634 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
423 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); 635 wrmsr(evntsel_msr, evntsel, 0);
636
637 wd->perfctr_msr = perfctr_msr;
638 wd->evntsel_msr = evntsel_msr;
639 wd->cccr_msr = 0; //unused
640 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
424 return 1; 641 return 1;
642fail1:
643 release_perfctr_nmi(perfctr_msr);
644fail:
645 return 0;
425} 646}
426 647
427 648static void stop_intel_arch_watchdog(void)
428static int setup_p4_watchdog(void)
429{ 649{
430 unsigned int misc_enable, dummy; 650 unsigned int ebx;
651 union cpuid10_eax eax;
652 unsigned int unused;
653 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
431 654
432 rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); 655 /*
433 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) 656 * Check whether the Architectural PerfMon supports
434 return 0; 657 * Unhalted Core Cycles Event or not.
658 * NOTE: Corresponding bit = 0 in ebx indicates event present.
659 */
660 cpuid(10, &(eax.full), &ebx, &unused, &unused);
661 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
662 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
663 return;
435 664
436 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; 665 wrmsr(wd->evntsel_msr, 0, 0);
437 nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
438#ifdef CONFIG_SMP
439 if (smp_num_siblings == 2)
440 nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
441#endif
442 666
443 if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) 667 release_evntsel_nmi(wd->evntsel_msr);
444 clear_msr_range(0x3F1, 2); 668 release_perfctr_nmi(wd->perfctr_msr);
445 /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
446 docs doesn't fully define it, so leave it alone for now. */
447 if (boot_cpu_data.x86_model >= 0x3) {
448 /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
449 clear_msr_range(0x3A0, 26);
450 clear_msr_range(0x3BC, 3);
451 } else {
452 clear_msr_range(0x3A0, 31);
453 }
454 clear_msr_range(0x3C0, 6);
455 clear_msr_range(0x3C8, 6);
456 clear_msr_range(0x3E0, 2);
457 clear_msr_range(MSR_P4_CCCR0, 18);
458 clear_msr_range(MSR_P4_PERFCTR0, 18);
459
460 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
461 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
462 Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz * 1000UL / nmi_hz));
463 wrmsrl(MSR_P4_IQ_COUNTER0, -((u64)cpu_khz * 1000 / nmi_hz));
464 apic_write(APIC_LVTPC, APIC_DM_NMI);
465 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
466 return 1;
467} 669}
468 670
469void setup_apic_nmi_watchdog(void) 671void setup_apic_nmi_watchdog(void *unused)
470{ 672{
471 switch (boot_cpu_data.x86_vendor) { 673 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
472 case X86_VENDOR_AMD: 674
473 if (boot_cpu_data.x86 != 15) 675 /* only support LOCAL and IO APICs for now */
474 return; 676 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
475 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) 677 (nmi_watchdog != NMI_IO_APIC))
476 return; 678 return;
477 setup_k7_watchdog(); 679
478 break; 680 if (wd->enabled == 1)
479 case X86_VENDOR_INTEL: 681 return;
480 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 682
481 if (!setup_intel_arch_watchdog()) 683 /* cheap hack to support suspend/resume */
684 /* if cpu0 is not active neither should the other cpus */
685 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
686 return;
687
688 if (nmi_watchdog == NMI_LOCAL_APIC) {
689 switch (boot_cpu_data.x86_vendor) {
690 case X86_VENDOR_AMD:
691 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
482 return; 692 return;
483 } else if (boot_cpu_data.x86 == 15) { 693 if (!setup_k7_watchdog())
694 return;
695 break;
696 case X86_VENDOR_INTEL:
697 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
698 if (!setup_intel_arch_watchdog())
699 return;
700 break;
701 }
484 if (!setup_p4_watchdog()) 702 if (!setup_p4_watchdog())
485 return; 703 return;
486 } else { 704 break;
705 default:
487 return; 706 return;
488 } 707 }
708 }
709 wd->enabled = 1;
710 atomic_inc(&nmi_active);
711}
712
713void stop_apic_nmi_watchdog(void *unused)
714{
715 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
489 716
490 break; 717 /* only support LOCAL and IO APICs for now */
718 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
719 (nmi_watchdog != NMI_IO_APIC))
720 return;
491 721
492 default: 722 if (wd->enabled == 0)
493 return; 723 return;
724
725 if (nmi_watchdog == NMI_LOCAL_APIC) {
726 switch (boot_cpu_data.x86_vendor) {
727 case X86_VENDOR_AMD:
728 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
729 return;
730 stop_k7_watchdog();
731 break;
732 case X86_VENDOR_INTEL:
733 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
734 stop_intel_arch_watchdog();
735 break;
736 }
737 stop_p4_watchdog();
738 break;
739 default:
740 return;
741 }
494 } 742 }
495 lapic_nmi_owner = LAPIC_NMI_WATCHDOG; 743 wd->enabled = 0;
496 nmi_active = 1; 744 atomic_dec(&nmi_active);
497} 745}
498 746
499/* 747/*
@@ -526,93 +774,109 @@ void touch_nmi_watchdog (void)
526 touch_softlockup_watchdog(); 774 touch_softlockup_watchdog();
527} 775}
528 776
529void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) 777int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
530{ 778{
531 int sum; 779 int sum;
532 int touched = 0; 780 int touched = 0;
781 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
782 u64 dummy;
783 int rc=0;
784
785 /* check for other users first */
786 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
787 == NOTIFY_STOP) {
788 rc = 1;
789 touched = 1;
790 }
533 791
534 sum = read_pda(apic_timer_irqs); 792 sum = read_pda(apic_timer_irqs);
535 if (__get_cpu_var(nmi_touch)) { 793 if (__get_cpu_var(nmi_touch)) {
536 __get_cpu_var(nmi_touch) = 0; 794 __get_cpu_var(nmi_touch) = 0;
537 touched = 1; 795 touched = 1;
538 } 796 }
797
539#ifdef CONFIG_X86_MCE 798#ifdef CONFIG_X86_MCE
540 /* Could check oops_in_progress here too, but it's safer 799 /* Could check oops_in_progress here too, but it's safer
541 not too */ 800 not too */
542 if (atomic_read(&mce_entry) > 0) 801 if (atomic_read(&mce_entry) > 0)
543 touched = 1; 802 touched = 1;
544#endif 803#endif
804 /* if the apic timer isn't firing, this cpu isn't doing much */
545 if (!touched && __get_cpu_var(last_irq_sum) == sum) { 805 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
546 /* 806 /*
547 * Ayiee, looks like this CPU is stuck ... 807 * Ayiee, looks like this CPU is stuck ...
548 * wait a few IRQs (5 seconds) before doing the oops ... 808 * wait a few IRQs (5 seconds) before doing the oops ...
549 */ 809 */
550 local_inc(&__get_cpu_var(alert_counter)); 810 local_inc(&__get_cpu_var(alert_counter));
551 if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { 811 if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz)
552 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 812 die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs,
553 == NOTIFY_STOP) { 813 panic_on_timeout);
554 local_set(&__get_cpu_var(alert_counter), 0);
555 return;
556 }
557 die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs);
558 }
559 } else { 814 } else {
560 __get_cpu_var(last_irq_sum) = sum; 815 __get_cpu_var(last_irq_sum) = sum;
561 local_set(&__get_cpu_var(alert_counter), 0); 816 local_set(&__get_cpu_var(alert_counter), 0);
562 } 817 }
563 if (nmi_perfctr_msr) { 818
564 if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { 819 /* see if the nmi watchdog went off */
565 /* 820 if (wd->enabled) {
566 * P4 quirks: 821 if (nmi_watchdog == NMI_LOCAL_APIC) {
567 * - An overflown perfctr will assert its interrupt 822 rdmsrl(wd->perfctr_msr, dummy);
568 * until the OVF flag in its CCCR is cleared. 823 if (dummy & wd->check_bit){
569 * - LVTPC is masked on interrupt and must be 824 /* this wasn't a watchdog timer interrupt */
570 * unmasked by the LVTPC handler. 825 goto done;
571 */ 826 }
572 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 827
573 apic_write(APIC_LVTPC, APIC_DM_NMI); 828 /* only Intel uses the cccr msr */
574 } else if (nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { 829 if (wd->cccr_msr != 0) {
575 /* 830 /*
576 * For Intel based architectural perfmon 831 * P4 quirks:
577 * - LVTPC is masked on interrupt and must be 832 * - An overflown perfctr will assert its interrupt
578 * unmasked by the LVTPC handler. 833 * until the OVF flag in its CCCR is cleared.
834 * - LVTPC is masked on interrupt and must be
835 * unmasked by the LVTPC handler.
836 */
837 rdmsrl(wd->cccr_msr, dummy);
838 dummy &= ~P4_CCCR_OVF;
839 wrmsrl(wd->cccr_msr, dummy);
840 apic_write(APIC_LVTPC, APIC_DM_NMI);
841 } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
842 /*
843 * ArchPerfom/Core Duo needs to re-unmask
844 * the apic vector
845 */
846 apic_write(APIC_LVTPC, APIC_DM_NMI);
847 }
848 /* start the cycle over again */
849 wrmsrl(wd->perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
850 rc = 1;
851 } else if (nmi_watchdog == NMI_IO_APIC) {
852 /* don't know how to accurately check for this.
853 * just assume it was a watchdog timer interrupt
854 * This matches the old behaviour.
579 */ 855 */
580 apic_write(APIC_LVTPC, APIC_DM_NMI); 856 rc = 1;
581 } 857 } else
582 wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); 858 printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
583 } 859 }
860done:
861 return rc;
584} 862}
585 863
586static __kprobes int dummy_nmi_callback(struct pt_regs * regs, int cpu)
587{
588 return 0;
589}
590
591static nmi_callback_t nmi_callback = dummy_nmi_callback;
592
593asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) 864asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code)
594{ 865{
595 int cpu = safe_smp_processor_id();
596
597 nmi_enter(); 866 nmi_enter();
598 add_pda(__nmi_count,1); 867 add_pda(__nmi_count,1);
599 if (!rcu_dereference(nmi_callback)(regs, cpu)) 868 default_do_nmi(regs);
600 default_do_nmi(regs);
601 nmi_exit(); 869 nmi_exit();
602} 870}
603 871
604void set_nmi_callback(nmi_callback_t callback) 872int do_nmi_callback(struct pt_regs * regs, int cpu)
605{ 873{
606 vmalloc_sync_all(); 874#ifdef CONFIG_SYSCTL
607 rcu_assign_pointer(nmi_callback, callback); 875 if (unknown_nmi_panic)
608} 876 return unknown_nmi_panic_callback(regs, cpu);
609EXPORT_SYMBOL_GPL(set_nmi_callback); 877#endif
610 878 return 0;
611void unset_nmi_callback(void)
612{
613 nmi_callback = dummy_nmi_callback;
614} 879}
615EXPORT_SYMBOL_GPL(unset_nmi_callback);
616 880
617#ifdef CONFIG_SYSCTL 881#ifdef CONFIG_SYSCTL
618 882
@@ -621,36 +885,42 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
621 unsigned char reason = get_nmi_reason(); 885 unsigned char reason = get_nmi_reason();
622 char buf[64]; 886 char buf[64];
623 887
624 if (!(reason & 0xc0)) { 888 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
625 sprintf(buf, "NMI received for unknown reason %02x\n", reason); 889 die_nmi(buf, regs, 1); /* Always panic here */
626 die_nmi(buf,regs);
627 }
628 return 0; 890 return 0;
629} 891}
630 892
631/* 893/*
632 * proc handler for /proc/sys/kernel/unknown_nmi_panic 894 * proc handler for /proc/sys/kernel/nmi
633 */ 895 */
634int proc_unknown_nmi_panic(struct ctl_table *table, int write, struct file *file, 896int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
635 void __user *buffer, size_t *length, loff_t *ppos) 897 void __user *buffer, size_t *length, loff_t *ppos)
636{ 898{
637 int old_state; 899 int old_state;
638 900
639 old_state = unknown_nmi_panic; 901 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
902 old_state = nmi_watchdog_enabled;
640 proc_dointvec(table, write, file, buffer, length, ppos); 903 proc_dointvec(table, write, file, buffer, length, ppos);
641 if (!!old_state == !!unknown_nmi_panic) 904 if (!!old_state == !!nmi_watchdog_enabled)
642 return 0; 905 return 0;
643 906
644 if (unknown_nmi_panic) { 907 if (atomic_read(&nmi_active) < 0) {
645 if (reserve_lapic_nmi() < 0) { 908 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
646 unknown_nmi_panic = 0; 909 return -EIO;
647 return -EBUSY; 910 }
648 } else { 911
649 set_nmi_callback(unknown_nmi_panic_callback); 912 /* if nmi_watchdog is not set yet, then set it */
650 } 913 nmi_watchdog_default();
914
915 if (nmi_watchdog == NMI_LOCAL_APIC) {
916 if (nmi_watchdog_enabled)
917 enable_lapic_nmi_watchdog();
918 else
919 disable_lapic_nmi_watchdog();
651 } else { 920 } else {
652 release_lapic_nmi(); 921 printk( KERN_WARNING
653 unset_nmi_callback(); 922 "NMI watchdog doesn't know what hardware to touch\n");
923 return -EIO;
654 } 924 }
655 return 0; 925 return 0;
656} 926}
@@ -659,8 +929,12 @@ int proc_unknown_nmi_panic(struct ctl_table *table, int write, struct file *file
659 929
660EXPORT_SYMBOL(nmi_active); 930EXPORT_SYMBOL(nmi_active);
661EXPORT_SYMBOL(nmi_watchdog); 931EXPORT_SYMBOL(nmi_watchdog);
662EXPORT_SYMBOL(reserve_lapic_nmi); 932EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
663EXPORT_SYMBOL(release_lapic_nmi); 933EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
934EXPORT_SYMBOL(reserve_perfctr_nmi);
935EXPORT_SYMBOL(release_perfctr_nmi);
936EXPORT_SYMBOL(reserve_evntsel_nmi);
937EXPORT_SYMBOL(release_evntsel_nmi);
664EXPORT_SYMBOL(disable_timer_nmi_watchdog); 938EXPORT_SYMBOL(disable_timer_nmi_watchdog);
665EXPORT_SYMBOL(enable_timer_nmi_watchdog); 939EXPORT_SYMBOL(enable_timer_nmi_watchdog);
666EXPORT_SYMBOL(touch_nmi_watchdog); 940EXPORT_SYMBOL(touch_nmi_watchdog);
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 146924ba5df5..cfb09b07ae99 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -86,7 +86,8 @@
86 86
87#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */ 87#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */
88#define MAX_NUM_CHASSIS 8 /* max number of chassis */ 88#define MAX_NUM_CHASSIS 8 /* max number of chassis */
89#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2) /* max dev->bus->number */ 89/* MAX_PHB_BUS_NUM is the maximal possible dev->bus->number */
90#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2)
90#define PHBS_PER_CALGARY 4 91#define PHBS_PER_CALGARY 4
91 92
92/* register offsets in Calgary's internal register space */ 93/* register offsets in Calgary's internal register space */
@@ -111,31 +112,49 @@ static const unsigned long phb_offsets[] = {
111 0xB000 /* PHB3 */ 112 0xB000 /* PHB3 */
112}; 113};
113 114
114static char bus_to_phb[MAX_PHB_BUS_NUM];
115void* tce_table_kva[MAX_PHB_BUS_NUM];
116unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; 115unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
117static int translate_empty_slots __read_mostly = 0; 116static int translate_empty_slots __read_mostly = 0;
118static int calgary_detected __read_mostly = 0; 117static int calgary_detected __read_mostly = 0;
119 118
120/* 119struct calgary_bus_info {
121 * the bitmap of PHBs the user requested that we disable 120 void *tce_space;
122 * translation on. 121 unsigned char translation_disabled;
123 */ 122 signed char phbid;
124static DECLARE_BITMAP(translation_disabled, MAX_PHB_BUS_NUM); 123};
124
125static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
125 126
126static void tce_cache_blast(struct iommu_table *tbl); 127static void tce_cache_blast(struct iommu_table *tbl);
127 128
128/* enable this to stress test the chip's TCE cache */ 129/* enable this to stress test the chip's TCE cache */
129#ifdef CONFIG_IOMMU_DEBUG 130#ifdef CONFIG_IOMMU_DEBUG
130static inline void tce_cache_blast_stress(struct iommu_table *tbl) 131int debugging __read_mostly = 1;
132
133static inline unsigned long verify_bit_range(unsigned long* bitmap,
134 int expected, unsigned long start, unsigned long end)
131{ 135{
132 tce_cache_blast(tbl); 136 unsigned long idx = start;
137
138 BUG_ON(start >= end);
139
140 while (idx < end) {
141 if (!!test_bit(idx, bitmap) != expected)
142 return idx;
143 ++idx;
144 }
145
146 /* all bits have the expected value */
147 return ~0UL;
133} 148}
134#else 149#else /* debugging is disabled */
135static inline void tce_cache_blast_stress(struct iommu_table *tbl) 150int debugging __read_mostly = 0;
151
152static inline unsigned long verify_bit_range(unsigned long* bitmap,
153 int expected, unsigned long start, unsigned long end)
136{ 154{
155 return ~0UL;
137} 156}
138#endif /* BLAST_TCE_CACHE_ON_UNMAP */ 157#endif /* CONFIG_IOMMU_DEBUG */
139 158
140static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen) 159static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
141{ 160{
@@ -149,7 +168,7 @@ static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
149 168
150static inline int translate_phb(struct pci_dev* dev) 169static inline int translate_phb(struct pci_dev* dev)
151{ 170{
152 int disabled = test_bit(dev->bus->number, translation_disabled); 171 int disabled = bus_info[dev->bus->number].translation_disabled;
153 return !disabled; 172 return !disabled;
154} 173}
155 174
@@ -158,6 +177,7 @@ static void iommu_range_reserve(struct iommu_table *tbl,
158{ 177{
159 unsigned long index; 178 unsigned long index;
160 unsigned long end; 179 unsigned long end;
180 unsigned long badbit;
161 181
162 index = start_addr >> PAGE_SHIFT; 182 index = start_addr >> PAGE_SHIFT;
163 183
@@ -169,14 +189,15 @@ static void iommu_range_reserve(struct iommu_table *tbl,
169 if (end > tbl->it_size) /* don't go off the table */ 189 if (end > tbl->it_size) /* don't go off the table */
170 end = tbl->it_size; 190 end = tbl->it_size;
171 191
172 while (index < end) { 192 badbit = verify_bit_range(tbl->it_map, 0, index, end);
173 if (test_bit(index, tbl->it_map)) 193 if (badbit != ~0UL) {
194 if (printk_ratelimit())
174 printk(KERN_ERR "Calgary: entry already allocated at " 195 printk(KERN_ERR "Calgary: entry already allocated at "
175 "0x%lx tbl %p dma 0x%lx npages %u\n", 196 "0x%lx tbl %p dma 0x%lx npages %u\n",
176 index, tbl, start_addr, npages); 197 badbit, tbl, start_addr, npages);
177 ++index;
178 } 198 }
179 set_bit_string(tbl->it_map, start_addr >> PAGE_SHIFT, npages); 199
200 set_bit_string(tbl->it_map, index, npages);
180} 201}
181 202
182static unsigned long iommu_range_alloc(struct iommu_table *tbl, 203static unsigned long iommu_range_alloc(struct iommu_table *tbl,
@@ -243,7 +264,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
243 unsigned int npages) 264 unsigned int npages)
244{ 265{
245 unsigned long entry; 266 unsigned long entry;
246 unsigned long i; 267 unsigned long badbit;
247 268
248 entry = dma_addr >> PAGE_SHIFT; 269 entry = dma_addr >> PAGE_SHIFT;
249 270
@@ -251,16 +272,15 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
251 272
252 tce_free(tbl, entry, npages); 273 tce_free(tbl, entry, npages);
253 274
254 for (i = 0; i < npages; ++i) { 275 badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages);
255 if (!test_bit(entry + i, tbl->it_map)) 276 if (badbit != ~0UL) {
277 if (printk_ratelimit())
256 printk(KERN_ERR "Calgary: bit is off at 0x%lx " 278 printk(KERN_ERR "Calgary: bit is off at 0x%lx "
257 "tbl %p dma 0x%Lx entry 0x%lx npages %u\n", 279 "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
258 entry + i, tbl, dma_addr, entry, npages); 280 badbit, tbl, dma_addr, entry, npages);
259 } 281 }
260 282
261 __clear_bit_string(tbl->it_map, entry, npages); 283 __clear_bit_string(tbl->it_map, entry, npages);
262
263 tce_cache_blast_stress(tbl);
264} 284}
265 285
266static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, 286static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
@@ -454,7 +474,7 @@ static struct dma_mapping_ops calgary_dma_ops = {
454 474
455static inline int busno_to_phbid(unsigned char num) 475static inline int busno_to_phbid(unsigned char num)
456{ 476{
457 return bus_to_phb[num]; 477 return bus_info[num].phbid;
458} 478}
459 479
460static inline unsigned long split_queue_offset(unsigned char num) 480static inline unsigned long split_queue_offset(unsigned char num)
@@ -631,6 +651,10 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
631 if (ret) 651 if (ret)
632 return ret; 652 return ret;
633 653
654 tbl = dev->sysdata;
655 tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
656 tce_free(tbl, 0, tbl->it_size);
657
634 calgary_reserve_regions(dev); 658 calgary_reserve_regions(dev);
635 659
636 /* set TARs for each PHB */ 660 /* set TARs for each PHB */
@@ -654,11 +678,12 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
654 return 0; 678 return 0;
655} 679}
656 680
657static void __init calgary_free_tar(struct pci_dev *dev) 681static void __init calgary_free_bus(struct pci_dev *dev)
658{ 682{
659 u64 val64; 683 u64 val64;
660 struct iommu_table *tbl = dev->sysdata; 684 struct iommu_table *tbl = dev->sysdata;
661 void __iomem *target; 685 void __iomem *target;
686 unsigned int bitmapsz;
662 687
663 target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number)); 688 target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
664 val64 = be64_to_cpu(readq(target)); 689 val64 = be64_to_cpu(readq(target));
@@ -666,8 +691,15 @@ static void __init calgary_free_tar(struct pci_dev *dev)
666 writeq(cpu_to_be64(val64), target); 691 writeq(cpu_to_be64(val64), target);
667 readq(target); /* flush */ 692 readq(target); /* flush */
668 693
694 bitmapsz = tbl->it_size / BITS_PER_BYTE;
695 free_pages((unsigned long)tbl->it_map, get_order(bitmapsz));
696 tbl->it_map = NULL;
697
669 kfree(tbl); 698 kfree(tbl);
670 dev->sysdata = NULL; 699 dev->sysdata = NULL;
700
701 /* Can't free bootmem allocated memory after system is up :-( */
702 bus_info[dev->bus->number].tce_space = NULL;
671} 703}
672 704
673static void calgary_watchdog(unsigned long data) 705static void calgary_watchdog(unsigned long data)
@@ -772,12 +804,11 @@ static inline unsigned int __init locate_register_space(struct pci_dev *dev)
772 return address; 804 return address;
773} 805}
774 806
775static int __init calgary_init_one_nontraslated(struct pci_dev *dev) 807static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
776{ 808{
809 pci_dev_get(dev);
777 dev->sysdata = NULL; 810 dev->sysdata = NULL;
778 dev->bus->self = dev; 811 dev->bus->self = dev;
779
780 return 0;
781} 812}
782 813
783static int __init calgary_init_one(struct pci_dev *dev) 814static int __init calgary_init_one(struct pci_dev *dev)
@@ -798,6 +829,7 @@ static int __init calgary_init_one(struct pci_dev *dev)
798 if (ret) 829 if (ret)
799 goto iounmap; 830 goto iounmap;
800 831
832 pci_dev_get(dev);
801 dev->bus->self = dev; 833 dev->bus->self = dev;
802 calgary_enable_translation(dev); 834 calgary_enable_translation(dev);
803 835
@@ -824,10 +856,9 @@ static int __init calgary_init(void)
824 calgary_init_one_nontraslated(dev); 856 calgary_init_one_nontraslated(dev);
825 continue; 857 continue;
826 } 858 }
827 if (!tce_table_kva[dev->bus->number] && !translate_empty_slots) { 859 if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots)
828 pci_dev_put(dev);
829 continue; 860 continue;
830 } 861
831 ret = calgary_init_one(dev); 862 ret = calgary_init_one(dev);
832 if (ret) 863 if (ret)
833 goto error; 864 goto error;
@@ -840,15 +871,18 @@ error:
840 dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM, 871 dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM,
841 PCI_DEVICE_ID_IBM_CALGARY, 872 PCI_DEVICE_ID_IBM_CALGARY,
842 dev); 873 dev);
874 if (!dev)
875 break;
843 if (!translate_phb(dev)) { 876 if (!translate_phb(dev)) {
844 pci_dev_put(dev); 877 pci_dev_put(dev);
845 continue; 878 continue;
846 } 879 }
847 if (!tce_table_kva[dev->bus->number] && !translate_empty_slots) 880 if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots)
848 continue; 881 continue;
882
849 calgary_disable_translation(dev); 883 calgary_disable_translation(dev);
850 calgary_free_tar(dev); 884 calgary_free_bus(dev);
851 pci_dev_put(dev); 885 pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
852 } 886 }
853 887
854 return ret; 888 return ret;
@@ -890,13 +924,15 @@ void __init detect_calgary(void)
890 if (swiotlb || no_iommu || iommu_detected) 924 if (swiotlb || no_iommu || iommu_detected)
891 return; 925 return;
892 926
927 if (!early_pci_allowed())
928 return;
929
893 specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); 930 specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
894 931
895 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { 932 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
896 int dev; 933 int dev;
897 934 struct calgary_bus_info *info = &bus_info[bus];
898 tce_table_kva[bus] = NULL; 935 info->phbid = -1;
899 bus_to_phb[bus] = -1;
900 936
901 if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) 937 if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
902 continue; 938 continue;
@@ -907,12 +943,9 @@ void __init detect_calgary(void)
907 */ 943 */
908 phb = (phb + 1) % PHBS_PER_CALGARY; 944 phb = (phb + 1) % PHBS_PER_CALGARY;
909 945
910 if (test_bit(bus, translation_disabled)) { 946 if (info->translation_disabled)
911 printk(KERN_INFO "Calgary: translation is disabled for "
912 "PHB 0x%x\n", bus);
913 /* skip this phb, don't allocate a tbl for it */
914 continue; 947 continue;
915 } 948
916 /* 949 /*
917 * Scan the slots of the PCI bus to see if there is a device present. 950 * Scan the slots of the PCI bus to see if there is a device present.
918 * The parent bus will be the zero-ith device, so start at 1. 951 * The parent bus will be the zero-ith device, so start at 1.
@@ -923,8 +956,8 @@ void __init detect_calgary(void)
923 tbl = alloc_tce_table(); 956 tbl = alloc_tce_table();
924 if (!tbl) 957 if (!tbl)
925 goto cleanup; 958 goto cleanup;
926 tce_table_kva[bus] = tbl; 959 info->tce_space = tbl;
927 bus_to_phb[bus] = phb; 960 info->phbid = phb;
928 calgary_found = 1; 961 calgary_found = 1;
929 break; 962 break;
930 } 963 }
@@ -934,15 +967,20 @@ void __init detect_calgary(void)
934 if (calgary_found) { 967 if (calgary_found) {
935 iommu_detected = 1; 968 iommu_detected = 1;
936 calgary_detected = 1; 969 calgary_detected = 1;
937 printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected. " 970 printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
938 "TCE table spec is %d.\n", specified_table_size); 971 printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
972 "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
973 debugging ? "enabled" : "disabled");
939 } 974 }
940 return; 975 return;
941 976
942cleanup: 977cleanup:
943 for (--bus; bus >= 0; --bus) 978 for (--bus; bus >= 0; --bus) {
944 if (tce_table_kva[bus]) 979 struct calgary_bus_info *info = &bus_info[bus];
945 free_tce_table(tce_table_kva[bus]); 980
981 if (info->tce_space)
982 free_tce_table(info->tce_space);
983 }
946} 984}
947 985
948int __init calgary_iommu_init(void) 986int __init calgary_iommu_init(void)
@@ -1016,7 +1054,7 @@ static int __init calgary_parse_options(char *p)
1016 if (bridge < MAX_PHB_BUS_NUM) { 1054 if (bridge < MAX_PHB_BUS_NUM) {
1017 printk(KERN_INFO "Calgary: disabling " 1055 printk(KERN_INFO "Calgary: disabling "
1018 "translation for PHB 0x%x\n", bridge); 1056 "translation for PHB 0x%x\n", bridge);
1019 set_bit(bridge, translation_disabled); 1057 bus_info[bridge].translation_disabled = 1;
1020 } 1058 }
1021 } 1059 }
1022 1060
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index 9c44f4f2433d..4dcb671bd19f 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -236,6 +236,9 @@ __init int iommu_setup(char *p)
236{ 236{
237 iommu_merge = 1; 237 iommu_merge = 1;
238 238
239 if (!p)
240 return -EINVAL;
241
239 while (*p) { 242 while (*p) {
240 if (!strncmp(p,"off",3)) 243 if (!strncmp(p,"off",3))
241 no_iommu = 1; 244 no_iommu = 1;
@@ -278,9 +281,9 @@ __init int iommu_setup(char *p)
278 if (*p == ',') 281 if (*p == ',')
279 ++p; 282 ++p;
280 } 283 }
281 return 1; 284 return 0;
282} 285}
283__setup("iommu=", iommu_setup); 286early_param("iommu", iommu_setup);
284 287
285void __init pci_iommu_alloc(void) 288void __init pci_iommu_alloc(void)
286{ 289{
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 6d3e61baf7a0..16261a8a3303 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -239,8 +239,6 @@ dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
239{ 239{
240 unsigned long phys_mem, bus; 240 unsigned long phys_mem, bus;
241 241
242 BUG_ON(dir == DMA_NONE);
243
244 if (!dev) 242 if (!dev)
245 dev = &fallback_dev; 243 dev = &fallback_dev;
246 244
@@ -383,7 +381,6 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
383 unsigned long pages = 0; 381 unsigned long pages = 0;
384 int need = 0, nextneed; 382 int need = 0, nextneed;
385 383
386 BUG_ON(dir == DMA_NONE);
387 if (nents == 0) 384 if (nents == 0)
388 return 0; 385 return 0;
389 386
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index aad7609d8e92..df09ab05a1bd 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -59,7 +59,6 @@ int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
59{ 59{
60 int i; 60 int i;
61 61
62 BUG_ON(direction == DMA_NONE);
63 for (i = 0; i < nents; i++ ) { 62 for (i = 0; i < nents; i++ ) {
64 struct scatterlist *s = &sg[i]; 63 struct scatterlist *s = &sg[i];
65 BUG_ON(!s->page); 64 BUG_ON(!s->page);
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index bb6745d13b8f..458006ae19f3 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -80,25 +80,25 @@ void idle_notifier_unregister(struct notifier_block *n)
80} 80}
81EXPORT_SYMBOL(idle_notifier_unregister); 81EXPORT_SYMBOL(idle_notifier_unregister);
82 82
83enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
84static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
85
86void enter_idle(void) 83void enter_idle(void)
87{ 84{
88 __get_cpu_var(idle_state) = CPU_IDLE; 85 write_pda(isidle, 1);
89 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 86 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
90} 87}
91 88
92static void __exit_idle(void) 89static void __exit_idle(void)
93{ 90{
94 __get_cpu_var(idle_state) = CPU_NOT_IDLE; 91 if (read_pda(isidle) == 0)
92 return;
93 write_pda(isidle, 0);
95 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 94 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
96} 95}
97 96
98/* Called from interrupts to signify idle end */ 97/* Called from interrupts to signify idle end */
99void exit_idle(void) 98void exit_idle(void)
100{ 99{
101 if (current->pid | read_pda(irqcount)) 100 /* idle loop has pid 0 */
101 if (current->pid)
102 return; 102 return;
103 __exit_idle(); 103 __exit_idle();
104} 104}
@@ -220,6 +220,9 @@ void cpu_idle (void)
220 play_dead(); 220 play_dead();
221 enter_idle(); 221 enter_idle();
222 idle(); 222 idle();
223 /* In many cases the interrupt that ended idle
224 has already called exit_idle. But some idle
225 loops can be woken up without interrupt. */
223 __exit_idle(); 226 __exit_idle();
224 } 227 }
225 228
@@ -350,6 +353,7 @@ void exit_thread(void)
350 353
351 kfree(t->io_bitmap_ptr); 354 kfree(t->io_bitmap_ptr);
352 t->io_bitmap_ptr = NULL; 355 t->io_bitmap_ptr = NULL;
356 clear_thread_flag(TIF_IO_BITMAP);
353 /* 357 /*
354 * Careful, clear this in the TSS too: 358 * Careful, clear this in the TSS too:
355 */ 359 */
@@ -369,6 +373,7 @@ void flush_thread(void)
369 if (t->flags & _TIF_IA32) 373 if (t->flags & _TIF_IA32)
370 current_thread_info()->status |= TS_COMPAT; 374 current_thread_info()->status |= TS_COMPAT;
371 } 375 }
376 t->flags &= ~_TIF_DEBUG;
372 377
373 tsk->thread.debugreg0 = 0; 378 tsk->thread.debugreg0 = 0;
374 tsk->thread.debugreg1 = 0; 379 tsk->thread.debugreg1 = 0;
@@ -461,7 +466,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
461 asm("mov %%es,%0" : "=m" (p->thread.es)); 466 asm("mov %%es,%0" : "=m" (p->thread.es));
462 asm("mov %%ds,%0" : "=m" (p->thread.ds)); 467 asm("mov %%ds,%0" : "=m" (p->thread.ds));
463 468
464 if (unlikely(me->thread.io_bitmap_ptr != NULL)) { 469 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
465 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 470 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
466 if (!p->thread.io_bitmap_ptr) { 471 if (!p->thread.io_bitmap_ptr) {
467 p->thread.io_bitmap_max = 0; 472 p->thread.io_bitmap_max = 0;
@@ -469,6 +474,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
469 } 474 }
470 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, 475 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
471 IO_BITMAP_BYTES); 476 IO_BITMAP_BYTES);
477 set_tsk_thread_flag(p, TIF_IO_BITMAP);
472 } 478 }
473 479
474 /* 480 /*
@@ -498,6 +504,40 @@ out:
498 */ 504 */
499#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) 505#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
500 506
507static inline void __switch_to_xtra(struct task_struct *prev_p,
508 struct task_struct *next_p,
509 struct tss_struct *tss)
510{
511 struct thread_struct *prev, *next;
512
513 prev = &prev_p->thread,
514 next = &next_p->thread;
515
516 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
517 loaddebug(next, 0);
518 loaddebug(next, 1);
519 loaddebug(next, 2);
520 loaddebug(next, 3);
521 /* no 4 and 5 */
522 loaddebug(next, 6);
523 loaddebug(next, 7);
524 }
525
526 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
527 /*
528 * Copy the relevant range of the IO bitmap.
529 * Normally this is 128 bytes or less:
530 */
531 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
532 max(prev->io_bitmap_max, next->io_bitmap_max));
533 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
534 /*
535 * Clear any possible leftover bits:
536 */
537 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
538 }
539}
540
501/* 541/*
502 * switch_to(x,y) should switch tasks from x to y. 542 * switch_to(x,y) should switch tasks from x to y.
503 * 543 *
@@ -515,6 +555,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
515 int cpu = smp_processor_id(); 555 int cpu = smp_processor_id();
516 struct tss_struct *tss = &per_cpu(init_tss, cpu); 556 struct tss_struct *tss = &per_cpu(init_tss, cpu);
517 557
558 /* we're going to use this soon, after a few expensive things */
559 if (next_p->fpu_counter>5)
560 prefetch(&next->i387.fxsave);
561
518 /* 562 /*
519 * Reload esp0, LDT and the page table pointer: 563 * Reload esp0, LDT and the page table pointer:
520 */ 564 */
@@ -583,41 +627,29 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
583 And the AMD workaround requires it to be after DS reload. */ 627 And the AMD workaround requires it to be after DS reload. */
584 unlazy_fpu(prev_p); 628 unlazy_fpu(prev_p);
585 write_pda(kernelstack, 629 write_pda(kernelstack,
586 task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); 630 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
587 631#ifdef CONFIG_CC_STACKPROTECTOR
632 write_pda(stack_canary, next_p->stack_canary);
588 /* 633 /*
589 * Now maybe reload the debug registers 634 * Build time only check to make sure the stack_canary is at
635 * offset 40 in the pda; this is a gcc ABI requirement
590 */ 636 */
591 if (unlikely(next->debugreg7)) { 637 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
592 loaddebug(next, 0); 638#endif
593 loaddebug(next, 1);
594 loaddebug(next, 2);
595 loaddebug(next, 3);
596 /* no 4 and 5 */
597 loaddebug(next, 6);
598 loaddebug(next, 7);
599 }
600
601 639
602 /* 640 /*
603 * Handle the IO bitmap 641 * Now maybe reload the debug registers and handle I/O bitmaps
604 */ 642 */
605 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) { 643 if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
606 if (next->io_bitmap_ptr) 644 || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
607 /* 645 __switch_to_xtra(prev_p, next_p, tss);
608 * Copy the relevant range of the IO bitmap.
609 * Normally this is 128 bytes or less:
610 */
611 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
612 max(prev->io_bitmap_max, next->io_bitmap_max));
613 else {
614 /*
615 * Clear any possible leftover bits:
616 */
617 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
618 }
619 }
620 646
647 /* If the task has used fpu the last 5 timeslices, just do a full
648 * restore of the math state immediately to avoid the trap; the
649 * chances of needing FPU soon are obviously high now
650 */
651 if (next_p->fpu_counter>5)
652 math_state_restore();
621 return prev_p; 653 return prev_p;
622} 654}
623 655
@@ -834,7 +866,7 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
834 866
835unsigned long arch_align_stack(unsigned long sp) 867unsigned long arch_align_stack(unsigned long sp)
836{ 868{
837 if (randomize_va_space) 869 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
838 sp -= get_random_int() % 8192; 870 sp -= get_random_int() % 8192;
839 return sp & ~0xf; 871 return sp & ~0xf;
840} 872}
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
index 2d50024c9f30..addc14af0c56 100644
--- a/arch/x86_64/kernel/ptrace.c
+++ b/arch/x86_64/kernel/ptrace.c
@@ -116,17 +116,17 @@ unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *r
116 return addr; 116 return addr;
117} 117}
118 118
119static int is_at_popf(struct task_struct *child, struct pt_regs *regs) 119static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
120{ 120{
121 int i, copied; 121 int i, copied;
122 unsigned char opcode[16]; 122 unsigned char opcode[15];
123 unsigned long addr = convert_rip_to_linear(child, regs); 123 unsigned long addr = convert_rip_to_linear(child, regs);
124 124
125 copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); 125 copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
126 for (i = 0; i < copied; i++) { 126 for (i = 0; i < copied; i++) {
127 switch (opcode[i]) { 127 switch (opcode[i]) {
128 /* popf */ 128 /* popf and iret */
129 case 0x9d: 129 case 0x9d: case 0xcf:
130 return 1; 130 return 1;
131 131
132 /* CHECKME: 64 65 */ 132 /* CHECKME: 64 65 */
@@ -138,14 +138,17 @@ static int is_at_popf(struct task_struct *child, struct pt_regs *regs)
138 case 0x26: case 0x2e: 138 case 0x26: case 0x2e:
139 case 0x36: case 0x3e: 139 case 0x36: case 0x3e:
140 case 0x64: case 0x65: 140 case 0x64: case 0x65:
141 case 0xf0: case 0xf2: case 0xf3: 141 case 0xf2: case 0xf3:
142 continue; 142 continue;
143 143
144 /* REX prefixes */
145 case 0x40 ... 0x4f: 144 case 0x40 ... 0x4f:
145 if (regs->cs != __USER_CS)
146 /* 32-bit mode: register increment */
147 return 0;
148 /* 64-bit mode: REX prefix */
146 continue; 149 continue;
147 150
148 /* CHECKME: f0, f2, f3 */ 151 /* CHECKME: f2, f3 */
149 152
150 /* 153 /*
151 * pushf: NOTE! We should probably not let 154 * pushf: NOTE! We should probably not let
@@ -186,10 +189,8 @@ static void set_singlestep(struct task_struct *child)
186 * ..but if TF is changed by the instruction we will trace, 189 * ..but if TF is changed by the instruction we will trace,
187 * don't mark it as being "us" that set it, so that we 190 * don't mark it as being "us" that set it, so that we
188 * won't clear it by hand later. 191 * won't clear it by hand later.
189 *
190 * AK: this is not enough, LAHF and IRET can change TF in user space too.
191 */ 192 */
192 if (is_at_popf(child, regs)) 193 if (is_setting_trap_flag(child, regs))
193 return; 194 return;
194 195
195 child->ptrace |= PT_DTRACE; 196 child->ptrace |= PT_DTRACE;
@@ -420,9 +421,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
420 if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1) 421 if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
421 break; 422 break;
422 if (i == 4) { 423 if (i == 4) {
423 child->thread.debugreg7 = data; 424 child->thread.debugreg7 = data;
425 if (data)
426 set_tsk_thread_flag(child, TIF_DEBUG);
427 else
428 clear_tsk_thread_flag(child, TIF_DEBUG);
424 ret = 0; 429 ret = 0;
425 } 430 }
426 break; 431 break;
427 } 432 }
428 break; 433 break;
diff --git a/arch/x86_64/kernel/relocate_kernel.S b/arch/x86_64/kernel/relocate_kernel.S
index d24fa9b72a2b..14e95872c6a3 100644
--- a/arch/x86_64/kernel/relocate_kernel.S
+++ b/arch/x86_64/kernel/relocate_kernel.S
@@ -7,31 +7,169 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/page.h>
11#include <asm/kexec.h>
10 12
11 /* 13/*
12 * Must be relocatable PIC code callable as a C function, that once 14 * Must be relocatable PIC code callable as a C function
13 * it starts can not use the previous processes stack. 15 */
14 */ 16
15 .globl relocate_new_kernel 17#define PTR(x) (x << 3)
18#define PAGE_ALIGNED (1 << PAGE_SHIFT)
19#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
20
21 .text
22 .align PAGE_ALIGNED
16 .code64 23 .code64
24 .globl relocate_kernel
25relocate_kernel:
26 /* %rdi indirection_page
27 * %rsi page_list
28 * %rdx start address
29 */
30
31 /* map the control page at its virtual address */
32
33 movq $0x0000ff8000000000, %r10 /* mask */
34 mov $(39 - 3), %cl /* bits to shift */
35 movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
36
37 movq %r11, %r9
38 andq %r10, %r9
39 shrq %cl, %r9
40
41 movq PTR(VA_PGD)(%rsi), %r8
42 addq %r8, %r9
43 movq PTR(PA_PUD_0)(%rsi), %r8
44 orq $PAGE_ATTR, %r8
45 movq %r8, (%r9)
46
47 shrq $9, %r10
48 sub $9, %cl
49
50 movq %r11, %r9
51 andq %r10, %r9
52 shrq %cl, %r9
53
54 movq PTR(VA_PUD_0)(%rsi), %r8
55 addq %r8, %r9
56 movq PTR(PA_PMD_0)(%rsi), %r8
57 orq $PAGE_ATTR, %r8
58 movq %r8, (%r9)
59
60 shrq $9, %r10
61 sub $9, %cl
62
63 movq %r11, %r9
64 andq %r10, %r9
65 shrq %cl, %r9
66
67 movq PTR(VA_PMD_0)(%rsi), %r8
68 addq %r8, %r9
69 movq PTR(PA_PTE_0)(%rsi), %r8
70 orq $PAGE_ATTR, %r8
71 movq %r8, (%r9)
72
73 shrq $9, %r10
74 sub $9, %cl
75
76 movq %r11, %r9
77 andq %r10, %r9
78 shrq %cl, %r9
79
80 movq PTR(VA_PTE_0)(%rsi), %r8
81 addq %r8, %r9
82 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
83 orq $PAGE_ATTR, %r8
84 movq %r8, (%r9)
85
86 /* identity map the control page at its physical address */
87
88 movq $0x0000ff8000000000, %r10 /* mask */
89 mov $(39 - 3), %cl /* bits to shift */
90 movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
91
92 movq %r11, %r9
93 andq %r10, %r9
94 shrq %cl, %r9
95
96 movq PTR(VA_PGD)(%rsi), %r8
97 addq %r8, %r9
98 movq PTR(PA_PUD_1)(%rsi), %r8
99 orq $PAGE_ATTR, %r8
100 movq %r8, (%r9)
101
102 shrq $9, %r10
103 sub $9, %cl
104
105 movq %r11, %r9
106 andq %r10, %r9
107 shrq %cl, %r9
108
109 movq PTR(VA_PUD_1)(%rsi), %r8
110 addq %r8, %r9
111 movq PTR(PA_PMD_1)(%rsi), %r8
112 orq $PAGE_ATTR, %r8
113 movq %r8, (%r9)
114
115 shrq $9, %r10
116 sub $9, %cl
117
118 movq %r11, %r9
119 andq %r10, %r9
120 shrq %cl, %r9
121
122 movq PTR(VA_PMD_1)(%rsi), %r8
123 addq %r8, %r9
124 movq PTR(PA_PTE_1)(%rsi), %r8
125 orq $PAGE_ATTR, %r8
126 movq %r8, (%r9)
127
128 shrq $9, %r10
129 sub $9, %cl
130
131 movq %r11, %r9
132 andq %r10, %r9
133 shrq %cl, %r9
134
135 movq PTR(VA_PTE_1)(%rsi), %r8
136 addq %r8, %r9
137 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
138 orq $PAGE_ATTR, %r8
139 movq %r8, (%r9)
140
17relocate_new_kernel: 141relocate_new_kernel:
18 /* %rdi page_list 142 /* %rdi indirection_page
19 * %rsi reboot_code_buffer 143 * %rsi page_list
20 * %rdx start address 144 * %rdx start address
21 * %rcx page_table
22 * %r8 arg5
23 * %r9 arg6
24 */ 145 */
25 146
26 /* zero out flags, and disable interrupts */ 147 /* zero out flags, and disable interrupts */
27 pushq $0 148 pushq $0
28 popfq 149 popfq
29 150
30 /* set a new stack at the bottom of our page... */ 151 /* get physical address of control page now */
31 lea 4096(%rsi), %rsp 152 /* this is impossible after page table switch */
153 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
154
155 /* get physical address of page table now too */
156 movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
32 157
33 /* store the parameters back on the stack */ 158 /* switch to new set of page tables */
34 pushq %rdx /* store the start address */ 159 movq PTR(PA_PGD)(%rsi), %r9
160 movq %r9, %cr3
161
162 /* setup a new stack at the end of the physical control page */
163 lea 4096(%r8), %rsp
164
165 /* jump to identity mapped page */
166 addq $(identity_mapped - relocate_kernel), %r8
167 pushq %r8
168 ret
169
170identity_mapped:
171 /* store the start address on the stack */
172 pushq %rdx
35 173
36 /* Set cr0 to a known state: 174 /* Set cr0 to a known state:
37 * 31 1 == Paging enabled 175 * 31 1 == Paging enabled
@@ -136,8 +274,3 @@ relocate_new_kernel:
136 xorq %r15, %r15 274 xorq %r15, %r15
137 275
138 ret 276 ret
139relocate_new_kernel_end:
140
141 .globl relocate_new_kernel_size
142relocate_new_kernel_size:
143 .quad relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 4b39f0da17f3..f98e48cae6da 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -74,16 +74,6 @@ EXPORT_SYMBOL(boot_cpu_data);
74 74
75unsigned long mmu_cr4_features; 75unsigned long mmu_cr4_features;
76 76
77int acpi_disabled;
78EXPORT_SYMBOL(acpi_disabled);
79#ifdef CONFIG_ACPI
80extern int __initdata acpi_ht;
81extern acpi_interrupt_flags acpi_sci_flags;
82int __initdata acpi_force = 0;
83#endif
84
85int acpi_numa __initdata;
86
87/* Boot loader ID as an integer, for the benefit of proc_dointvec */ 77/* Boot loader ID as an integer, for the benefit of proc_dointvec */
88int bootloader_type; 78int bootloader_type;
89 79
@@ -107,7 +97,6 @@ struct sys_desc_table_struct {
107 97
108struct edid_info edid_info; 98struct edid_info edid_info;
109EXPORT_SYMBOL_GPL(edid_info); 99EXPORT_SYMBOL_GPL(edid_info);
110struct e820map e820;
111 100
112extern int root_mountflags; 101extern int root_mountflags;
113 102
@@ -276,185 +265,22 @@ static void __init probe_roms(void)
276 } 265 }
277} 266}
278 267
279/* Check for full argument with no trailing characters */ 268#ifdef CONFIG_PROC_VMCORE
280static int fullarg(char *p, char *arg) 269/* elfcorehdr= specifies the location of elf core header
270 * stored by the crashed kernel. This option will be passed
271 * by kexec loader to the capture kernel.
272 */
273static int __init setup_elfcorehdr(char *arg)
281{ 274{
282 int l = strlen(arg); 275 char *end;
283 return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l])); 276 if (!arg)
277 return -EINVAL;
278 elfcorehdr_addr = memparse(arg, &end);
279 return end > arg ? 0 : -EINVAL;
284} 280}
285 281early_param("elfcorehdr", setup_elfcorehdr);
286static __init void parse_cmdline_early (char ** cmdline_p)
287{
288 char c = ' ', *to = command_line, *from = COMMAND_LINE;
289 int len = 0;
290 int userdef = 0;
291
292 for (;;) {
293 if (c != ' ')
294 goto next_char;
295
296#ifdef CONFIG_SMP
297 /*
298 * If the BIOS enumerates physical processors before logical,
299 * maxcpus=N at enumeration-time can be used to disable HT.
300 */
301 else if (!memcmp(from, "maxcpus=", 8)) {
302 extern unsigned int maxcpus;
303
304 maxcpus = simple_strtoul(from + 8, NULL, 0);
305 }
306#endif
307#ifdef CONFIG_ACPI
308 /* "acpi=off" disables both ACPI table parsing and interpreter init */
309 if (fullarg(from,"acpi=off"))
310 disable_acpi();
311
312 if (fullarg(from, "acpi=force")) {
313 /* add later when we do DMI horrors: */
314 acpi_force = 1;
315 acpi_disabled = 0;
316 }
317
318 /* acpi=ht just means: do ACPI MADT parsing
319 at bootup, but don't enable the full ACPI interpreter */
320 if (fullarg(from, "acpi=ht")) {
321 if (!acpi_force)
322 disable_acpi();
323 acpi_ht = 1;
324 }
325 else if (fullarg(from, "pci=noacpi"))
326 acpi_disable_pci();
327 else if (fullarg(from, "acpi=noirq"))
328 acpi_noirq_set();
329
330 else if (fullarg(from, "acpi_sci=edge"))
331 acpi_sci_flags.trigger = 1;
332 else if (fullarg(from, "acpi_sci=level"))
333 acpi_sci_flags.trigger = 3;
334 else if (fullarg(from, "acpi_sci=high"))
335 acpi_sci_flags.polarity = 1;
336 else if (fullarg(from, "acpi_sci=low"))
337 acpi_sci_flags.polarity = 3;
338
339 /* acpi=strict disables out-of-spec workarounds */
340 else if (fullarg(from, "acpi=strict")) {
341 acpi_strict = 1;
342 }
343#ifdef CONFIG_X86_IO_APIC
344 else if (fullarg(from, "acpi_skip_timer_override"))
345 acpi_skip_timer_override = 1;
346#endif
347#endif
348
349 if (fullarg(from, "disable_timer_pin_1"))
350 disable_timer_pin_1 = 1;
351 if (fullarg(from, "enable_timer_pin_1"))
352 disable_timer_pin_1 = -1;
353
354 if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
355 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
356 disable_apic = 1;
357 }
358
359 if (fullarg(from, "noapic"))
360 skip_ioapic_setup = 1;
361
362 if (fullarg(from,"apic")) {
363 skip_ioapic_setup = 0;
364 ioapic_force = 1;
365 }
366
367 if (!memcmp(from, "mem=", 4))
368 parse_memopt(from+4, &from);
369
370 if (!memcmp(from, "memmap=", 7)) {
371 /* exactmap option is for used defined memory */
372 if (!memcmp(from+7, "exactmap", 8)) {
373#ifdef CONFIG_CRASH_DUMP
374 /* If we are doing a crash dump, we
375 * still need to know the real mem
376 * size before original memory map is
377 * reset.
378 */
379 saved_max_pfn = e820_end_of_ram();
380#endif
381 from += 8+7;
382 end_pfn_map = 0;
383 e820.nr_map = 0;
384 userdef = 1;
385 }
386 else {
387 parse_memmapopt(from+7, &from);
388 userdef = 1;
389 }
390 }
391
392#ifdef CONFIG_NUMA
393 if (!memcmp(from, "numa=", 5))
394 numa_setup(from+5);
395#endif
396
397 if (!memcmp(from,"iommu=",6)) {
398 iommu_setup(from+6);
399 }
400
401 if (fullarg(from,"oops=panic"))
402 panic_on_oops = 1;
403
404 if (!memcmp(from, "noexec=", 7))
405 nonx_setup(from + 7);
406
407#ifdef CONFIG_KEXEC
408 /* crashkernel=size@addr specifies the location to reserve for
409 * a crash kernel. By reserving this memory we guarantee
410 * that linux never set's it up as a DMA target.
411 * Useful for holding code to do something appropriate
412 * after a kernel panic.
413 */
414 else if (!memcmp(from, "crashkernel=", 12)) {
415 unsigned long size, base;
416 size = memparse(from+12, &from);
417 if (*from == '@') {
418 base = memparse(from+1, &from);
419 /* FIXME: Do I want a sanity check
420 * to validate the memory range?
421 */
422 crashk_res.start = base;
423 crashk_res.end = base + size - 1;
424 }
425 }
426#endif
427
428#ifdef CONFIG_PROC_VMCORE
429 /* elfcorehdr= specifies the location of elf core header
430 * stored by the crashed kernel. This option will be passed
431 * by kexec loader to the capture kernel.
432 */
433 else if(!memcmp(from, "elfcorehdr=", 11))
434 elfcorehdr_addr = memparse(from+11, &from);
435#endif
436
437#ifdef CONFIG_HOTPLUG_CPU
438 else if (!memcmp(from, "additional_cpus=", 16))
439 setup_additional_cpus(from+16);
440#endif 282#endif
441 283
442 next_char:
443 c = *(from++);
444 if (!c)
445 break;
446 if (COMMAND_LINE_SIZE <= ++len)
447 break;
448 *(to++) = c;
449 }
450 if (userdef) {
451 printk(KERN_INFO "user-defined physical RAM map:\n");
452 e820_print_map("user");
453 }
454 *to = '\0';
455 *cmdline_p = command_line;
456}
457
458#ifndef CONFIG_NUMA 284#ifndef CONFIG_NUMA
459static void __init 285static void __init
460contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) 286contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
@@ -521,6 +347,8 @@ static void discover_ebda(void)
521 347
522void __init setup_arch(char **cmdline_p) 348void __init setup_arch(char **cmdline_p)
523{ 349{
350 printk(KERN_INFO "Command line: %s\n", saved_command_line);
351
524 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); 352 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
525 screen_info = SCREEN_INFO; 353 screen_info = SCREEN_INFO;
526 edid_info = EDID_INFO; 354 edid_info = EDID_INFO;
@@ -547,16 +375,21 @@ void __init setup_arch(char **cmdline_p)
547 data_resource.start = virt_to_phys(&_etext); 375 data_resource.start = virt_to_phys(&_etext);
548 data_resource.end = virt_to_phys(&_edata)-1; 376 data_resource.end = virt_to_phys(&_edata)-1;
549 377
550 parse_cmdline_early(cmdline_p);
551
552 early_identify_cpu(&boot_cpu_data); 378 early_identify_cpu(&boot_cpu_data);
553 379
380 strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
381 *cmdline_p = command_line;
382
383 parse_early_param();
384
385 finish_e820_parsing();
386
554 /* 387 /*
555 * partially used pages are not usable - thus 388 * partially used pages are not usable - thus
556 * we are rounding upwards: 389 * we are rounding upwards:
557 */ 390 */
558 end_pfn = e820_end_of_ram(); 391 end_pfn = e820_end_of_ram();
559 num_physpages = end_pfn; /* for pfn_valid */ 392 num_physpages = end_pfn;
560 393
561 check_efer(); 394 check_efer();
562 395
@@ -576,6 +409,11 @@ void __init setup_arch(char **cmdline_p)
576 acpi_boot_table_init(); 409 acpi_boot_table_init();
577#endif 410#endif
578 411
412 /* How many end-of-memory variables you have, grandma! */
413 max_low_pfn = end_pfn;
414 max_pfn = end_pfn;
415 high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
416
579#ifdef CONFIG_ACPI_NUMA 417#ifdef CONFIG_ACPI_NUMA
580 /* 418 /*
581 * Parse SRAT to discover nodes. 419 * Parse SRAT to discover nodes.
@@ -625,12 +463,10 @@ void __init setup_arch(char **cmdline_p)
625 */ 463 */
626 acpi_reserve_bootmem(); 464 acpi_reserve_bootmem();
627#endif 465#endif
628#ifdef CONFIG_X86_LOCAL_APIC
629 /* 466 /*
630 * Find and reserve possible boot-time SMP configuration: 467 * Find and reserve possible boot-time SMP configuration:
631 */ 468 */
632 find_smp_config(); 469 find_smp_config();
633#endif
634#ifdef CONFIG_BLK_DEV_INITRD 470#ifdef CONFIG_BLK_DEV_INITRD
635 if (LOADER_TYPE && INITRD_START) { 471 if (LOADER_TYPE && INITRD_START) {
636 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { 472 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
@@ -657,7 +493,9 @@ void __init setup_arch(char **cmdline_p)
657 493
658 paging_init(); 494 paging_init();
659 495
660 check_ioapic(); 496#ifdef CONFIG_PCI
497 early_quirks();
498#endif
661 499
662 /* 500 /*
663 * set this early, so we dont allocate cpu0 501 * set this early, so we dont allocate cpu0
@@ -674,14 +512,12 @@ void __init setup_arch(char **cmdline_p)
674 512
675 init_cpu_to_node(); 513 init_cpu_to_node();
676 514
677#ifdef CONFIG_X86_LOCAL_APIC
678 /* 515 /*
679 * get boot-time SMP configuration: 516 * get boot-time SMP configuration:
680 */ 517 */
681 if (smp_found_config) 518 if (smp_found_config)
682 get_smp_config(); 519 get_smp_config();
683 init_apic_mappings(); 520 init_apic_mappings();
684#endif
685 521
686 /* 522 /*
687 * Request address space for all standard RAM and ROM resources 523 * Request address space for all standard RAM and ROM resources
@@ -839,7 +675,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
839#endif 675#endif
840} 676}
841 677
842static void __init init_amd(struct cpuinfo_x86 *c) 678static void __cpuinit init_amd(struct cpuinfo_x86 *c)
843{ 679{
844 unsigned level; 680 unsigned level;
845 681
@@ -895,6 +731,12 @@ static void __init init_amd(struct cpuinfo_x86 *c)
895 731
896 /* Fix cpuid4 emulation for more */ 732 /* Fix cpuid4 emulation for more */
897 num_cache_leaves = 3; 733 num_cache_leaves = 3;
734
735 /* When there is only one core no need to synchronize RDTSC */
736 if (num_possible_cpus() == 1)
737 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
738 else
739 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
898} 740}
899 741
900static void __cpuinit detect_ht(struct cpuinfo_x86 *c) 742static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -976,8 +818,7 @@ static void srat_detect_node(void)
976 node = first_node(node_online_map); 818 node = first_node(node_online_map);
977 numa_set_node(cpu, node); 819 numa_set_node(cpu, node);
978 820
979 if (acpi_numa > 0) 821 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
980 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
981#endif 822#endif
982} 823}
983 824
@@ -1011,6 +852,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
1011 if ((c->x86 == 0xf && c->x86_model >= 0x03) || 852 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
1012 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 853 (c->x86 == 0x6 && c->x86_model >= 0x0e))
1013 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); 854 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
855 if (c->x86 == 6)
856 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
1014 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); 857 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
1015 c->x86_max_cores = intel_num_cpu_cores(c); 858 c->x86_max_cores = intel_num_cpu_cores(c);
1016 859
@@ -1229,8 +1072,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
1229 1072
1230 /* Intel-defined (#2) */ 1073 /* Intel-defined (#2) */
1231 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", 1074 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
1232 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, 1075 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
1233 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1076 NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
1234 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1077 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1235 1078
1236 /* VIA/Cyrix/Centaur-defined */ 1079 /* VIA/Cyrix/Centaur-defined */
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 417de564456e..8c4b80fe71a1 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -24,7 +24,7 @@
24#include <asm/proto.h> 24#include <asm/proto.h>
25#include <asm/sections.h> 25#include <asm/sections.h>
26 26
27char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; 27char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
28 28
29cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; 29cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
30 30
@@ -46,8 +46,10 @@ Control non executable mappings for 64bit processes.
46on Enable(default) 46on Enable(default)
47off Disable 47off Disable
48*/ 48*/
49int __init nonx_setup(char *str) 49static int __init nonx_setup(char *str)
50{ 50{
51 if (!str)
52 return -EINVAL;
51 if (!strncmp(str, "on", 2)) { 53 if (!strncmp(str, "on", 2)) {
52 __supported_pte_mask |= _PAGE_NX; 54 __supported_pte_mask |= _PAGE_NX;
53 do_not_nx = 0; 55 do_not_nx = 0;
@@ -55,9 +57,9 @@ int __init nonx_setup(char *str)
55 do_not_nx = 1; 57 do_not_nx = 1;
56 __supported_pte_mask &= ~_PAGE_NX; 58 __supported_pte_mask &= ~_PAGE_NX;
57 } 59 }
58 return 1; 60 return 0;
59} 61}
60__setup("noexec=", nonx_setup); /* parsed early actually */ 62early_param("noexec", nonx_setup);
61 63
62int force_personality32 = 0; 64int force_personality32 = 0;
63 65
@@ -93,12 +95,9 @@ void __init setup_per_cpu_areas(void)
93#endif 95#endif
94 96
95 /* Copy section for each CPU (we discard the original) */ 97 /* Copy section for each CPU (we discard the original) */
96 size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); 98 size = PERCPU_ENOUGH_ROOM;
97#ifdef CONFIG_MODULES
98 if (size < PERCPU_ENOUGH_ROOM)
99 size = PERCPU_ENOUGH_ROOM;
100#endif
101 99
100 printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
102 for_each_cpu_mask (i, cpu_possible_map) { 101 for_each_cpu_mask (i, cpu_possible_map) {
103 char *ptr; 102 char *ptr;
104 103
@@ -122,7 +121,10 @@ void pda_init(int cpu)
122 121
123 /* Setup up data that may be needed in __get_free_pages early */ 122 /* Setup up data that may be needed in __get_free_pages early */
124 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); 123 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
124 /* Memory clobbers used to order PDA accessed */
125 mb();
125 wrmsrl(MSR_GS_BASE, pda); 126 wrmsrl(MSR_GS_BASE, pda);
127 mb();
126 128
127 pda->cpunumber = cpu; 129 pda->cpunumber = cpu;
128 pda->irqcount = -1; 130 pda->irqcount = -1;
@@ -178,6 +180,8 @@ void __cpuinit check_efer(void)
178 } 180 }
179} 181}
180 182
183unsigned long kernel_eflags;
184
181/* 185/*
182 * cpu_init() initializes state that is per-CPU. Some data is already 186 * cpu_init() initializes state that is per-CPU. Some data is already
183 * initialized (naturally) in the bootstrap process, such as the GDT 187 * initialized (naturally) in the bootstrap process, such as the GDT
@@ -235,28 +239,17 @@ void __cpuinit cpu_init (void)
235 * set up and load the per-CPU TSS 239 * set up and load the per-CPU TSS
236 */ 240 */
237 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 241 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
242 static const unsigned int order[N_EXCEPTION_STACKS] = {
243 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
244 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
245 };
238 if (cpu) { 246 if (cpu) {
239 static const unsigned int order[N_EXCEPTION_STACKS] = {
240 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
241 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
242 };
243
244 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); 247 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
245 if (!estacks) 248 if (!estacks)
246 panic("Cannot allocate exception stack %ld %d\n", 249 panic("Cannot allocate exception stack %ld %d\n",
247 v, cpu); 250 v, cpu);
248 } 251 }
249 switch (v + 1) { 252 estacks += PAGE_SIZE << order[v];
250#if DEBUG_STKSZ > EXCEPTION_STKSZ
251 case DEBUG_STACK:
252 cpu_pda(cpu)->debugstack = (unsigned long)estacks;
253 estacks += DEBUG_STKSZ;
254 break;
255#endif
256 default:
257 estacks += EXCEPTION_STKSZ;
258 break;
259 }
260 orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks; 253 orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
261 } 254 }
262 255
@@ -290,4 +283,6 @@ void __cpuinit cpu_init (void)
290 set_debugreg(0UL, 7); 283 set_debugreg(0UL, 7);
291 284
292 fpu_init(); 285 fpu_init();
286
287 raw_local_save_flags(kernel_eflags);
293} 288}
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index 28161170fb0a..49ec324cd141 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -38,37 +38,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
38 sigset_t *set, struct pt_regs * regs); 38 sigset_t *set, struct pt_regs * regs);
39 39
40asmlinkage long 40asmlinkage long
41sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, struct pt_regs *regs)
42{
43 sigset_t saveset, newset;
44
45 /* XXX: Don't preclude handling different sized sigset_t's. */
46 if (sigsetsize != sizeof(sigset_t))
47 return -EINVAL;
48
49 if (copy_from_user(&newset, unewset, sizeof(newset)))
50 return -EFAULT;
51 sigdelsetmask(&newset, ~_BLOCKABLE);
52
53 spin_lock_irq(&current->sighand->siglock);
54 saveset = current->blocked;
55 current->blocked = newset;
56 recalc_sigpending();
57 spin_unlock_irq(&current->sighand->siglock);
58#ifdef DEBUG_SIG
59 printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
60 saveset, newset, regs, regs->rip);
61#endif
62 regs->rax = -EINTR;
63 while (1) {
64 current->state = TASK_INTERRUPTIBLE;
65 schedule();
66 if (do_signal(regs, &saveset))
67 return -EINTR;
68 }
69}
70
71asmlinkage long
72sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, 41sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
73 struct pt_regs *regs) 42 struct pt_regs *regs)
74{ 43{
@@ -308,11 +277,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
308#endif 277#endif
309 278
310 /* Set up registers for signal handler */ 279 /* Set up registers for signal handler */
311 {
312 struct exec_domain *ed = current_thread_info()->exec_domain;
313 if (unlikely(ed && ed->signal_invmap && sig < 32))
314 sig = ed->signal_invmap[sig];
315 }
316 regs->rdi = sig; 280 regs->rdi = sig;
317 /* In case the signal handler was declared without prototypes */ 281 /* In case the signal handler was declared without prototypes */
318 regs->rax = 0; 282 regs->rax = 0;
@@ -341,11 +305,11 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
341 current->comm, current->pid, frame, regs->rip, frame->pretcode); 305 current->comm, current->pid, frame, regs->rip, frame->pretcode);
342#endif 306#endif
343 307
344 return 1; 308 return 0;
345 309
346give_sigsegv: 310give_sigsegv:
347 force_sigsegv(sig, current); 311 force_sigsegv(sig, current);
348 return 0; 312 return -EFAULT;
349} 313}
350 314
351/* 315/*
@@ -408,7 +372,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
408#endif 372#endif
409 ret = setup_rt_frame(sig, ka, info, oldset, regs); 373 ret = setup_rt_frame(sig, ka, info, oldset, regs);
410 374
411 if (ret) { 375 if (ret == 0) {
412 spin_lock_irq(&current->sighand->siglock); 376 spin_lock_irq(&current->sighand->siglock);
413 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask); 377 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
414 if (!(ka->sa.sa_flags & SA_NODEFER)) 378 if (!(ka->sa.sa_flags & SA_NODEFER))
@@ -425,11 +389,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
425 * want to handle. Thus you cannot kill init even with a SIGKILL even by 389 * want to handle. Thus you cannot kill init even with a SIGKILL even by
426 * mistake. 390 * mistake.
427 */ 391 */
428int do_signal(struct pt_regs *regs, sigset_t *oldset) 392static void do_signal(struct pt_regs *regs)
429{ 393{
430 struct k_sigaction ka; 394 struct k_sigaction ka;
431 siginfo_t info; 395 siginfo_t info;
432 int signr; 396 int signr;
397 sigset_t *oldset;
433 398
434 /* 399 /*
435 * We want the common case to go fast, which 400 * We want the common case to go fast, which
@@ -438,9 +403,11 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
438 * if so. 403 * if so.
439 */ 404 */
440 if (!user_mode(regs)) 405 if (!user_mode(regs))
441 return 1; 406 return;
442 407
443 if (!oldset) 408 if (test_thread_flag(TIF_RESTORE_SIGMASK))
409 oldset = &current->saved_sigmask;
410 else
444 oldset = &current->blocked; 411 oldset = &current->blocked;
445 412
446 signr = get_signal_to_deliver(&info, &ka, regs, NULL); 413 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
@@ -454,30 +421,46 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
454 set_debugreg(current->thread.debugreg7, 7); 421 set_debugreg(current->thread.debugreg7, 7);
455 422
456 /* Whee! Actually deliver the signal. */ 423 /* Whee! Actually deliver the signal. */
457 return handle_signal(signr, &info, &ka, oldset, regs); 424 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
425 /* a signal was successfully delivered; the saved
426 * sigmask will have been stored in the signal frame,
427 * and will be restored by sigreturn, so we can simply
428 * clear the TIF_RESTORE_SIGMASK flag */
429 clear_thread_flag(TIF_RESTORE_SIGMASK);
430 }
431 return;
458 } 432 }
459 433
460 /* Did we come from a system call? */ 434 /* Did we come from a system call? */
461 if ((long)regs->orig_rax >= 0) { 435 if ((long)regs->orig_rax >= 0) {
462 /* Restart the system call - no handlers present */ 436 /* Restart the system call - no handlers present */
463 long res = regs->rax; 437 long res = regs->rax;
464 if (res == -ERESTARTNOHAND || 438 switch (res) {
465 res == -ERESTARTSYS || 439 case -ERESTARTNOHAND:
466 res == -ERESTARTNOINTR) { 440 case -ERESTARTSYS:
441 case -ERESTARTNOINTR:
467 regs->rax = regs->orig_rax; 442 regs->rax = regs->orig_rax;
468 regs->rip -= 2; 443 regs->rip -= 2;
469 } 444 break;
470 if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) { 445 case -ERESTART_RESTARTBLOCK:
471 regs->rax = test_thread_flag(TIF_IA32) ? 446 regs->rax = test_thread_flag(TIF_IA32) ?
472 __NR_ia32_restart_syscall : 447 __NR_ia32_restart_syscall :
473 __NR_restart_syscall; 448 __NR_restart_syscall;
474 regs->rip -= 2; 449 regs->rip -= 2;
450 break;
475 } 451 }
476 } 452 }
477 return 0; 453
454 /* if there's no signal to deliver, we just put the saved sigmask
455 back. */
456 if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
457 clear_thread_flag(TIF_RESTORE_SIGMASK);
458 sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
459 }
478} 460}
479 461
480void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_flags) 462void
463do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
481{ 464{
482#ifdef DEBUG_SIG 465#ifdef DEBUG_SIG
483 printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n", 466 printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n",
@@ -491,8 +474,8 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_
491 } 474 }
492 475
493 /* deal with pending signal delivery */ 476 /* deal with pending signal delivery */
494 if (thread_info_flags & _TIF_SIGPENDING) 477 if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK))
495 do_signal(regs,oldset); 478 do_signal(regs);
496} 479}
497 480
498void signal_fault(struct pt_regs *regs, void __user *frame, char *where) 481void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 06af6ca60129..4f67697f5036 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -522,26 +522,3 @@ asmlinkage void smp_call_function_interrupt(void)
522 } 522 }
523} 523}
524 524
525int safe_smp_processor_id(void)
526{
527 unsigned apicid, i;
528
529 if (disable_apic)
530 return 0;
531
532 apicid = hard_smp_processor_id();
533 if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
534 return apicid;
535
536 for (i = 0; i < NR_CPUS; ++i) {
537 if (x86_cpu_to_apicid[i] == apicid)
538 return i;
539 }
540
541 /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
542 * or called too early. Either way, we must be CPU 0. */
543 if (x86_cpu_to_apicid[0] == BAD_APICID)
544 return 0;
545
546 return 0; /* Should not happen */
547}
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 3ae9ffddddc0..7b7a6870288a 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -1091,7 +1091,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
1091 /* 1091 /*
1092 * Switch from PIC to APIC mode. 1092 * Switch from PIC to APIC mode.
1093 */ 1093 */
1094 connect_bsp_APIC();
1095 setup_local_APIC(); 1094 setup_local_APIC();
1096 1095
1097 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) { 1096 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
@@ -1176,12 +1175,9 @@ int __cpuinit __cpu_up(unsigned int cpu)
1176void __init smp_cpus_done(unsigned int max_cpus) 1175void __init smp_cpus_done(unsigned int max_cpus)
1177{ 1176{
1178 smp_cleanup_boot(); 1177 smp_cleanup_boot();
1179
1180#ifdef CONFIG_X86_IO_APIC
1181 setup_ioapic_dest(); 1178 setup_ioapic_dest();
1182#endif
1183
1184 check_nmi_watchdog(); 1179 check_nmi_watchdog();
1180 time_init_gtod();
1185} 1181}
1186 1182
1187#ifdef CONFIG_HOTPLUG_CPU 1183#ifdef CONFIG_HOTPLUG_CPU
@@ -1234,6 +1230,8 @@ int __cpu_disable(void)
1234 if (cpu == 0) 1230 if (cpu == 0)
1235 return -EBUSY; 1231 return -EBUSY;
1236 1232
1233 if (nmi_watchdog == NMI_LOCAL_APIC)
1234 stop_apic_nmi_watchdog(NULL);
1237 clear_local_APIC(); 1235 clear_local_APIC();
1238 1236
1239 /* 1237 /*
@@ -1273,11 +1271,11 @@ void __cpu_die(unsigned int cpu)
1273 printk(KERN_ERR "CPU %u didn't die...\n", cpu); 1271 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1274} 1272}
1275 1273
1276__init int setup_additional_cpus(char *s) 1274static __init int setup_additional_cpus(char *s)
1277{ 1275{
1278 return get_option(&s, &additional_cpus); 1276 return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL;
1279} 1277}
1280__setup("additional_cpus=", setup_additional_cpus); 1278early_param("additional_cpus", setup_additional_cpus);
1281 1279
1282#else /* ... !CONFIG_HOTPLUG_CPU */ 1280#else /* ... !CONFIG_HOTPLUG_CPU */
1283 1281
diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c
index 32cf55eb9af8..6026b31d037e 100644
--- a/arch/x86_64/kernel/stacktrace.c
+++ b/arch/x86_64/kernel/stacktrace.c
@@ -7,215 +7,49 @@
7 */ 7 */
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/stacktrace.h> 9#include <linux/stacktrace.h>
10#include <linux/module.h>
11#include <asm/stacktrace.h>
10 12
11#include <asm/smp.h> 13static void save_stack_warning(void *data, char *msg)
12
13static inline int
14in_range(unsigned long start, unsigned long addr, unsigned long end)
15{ 14{
16 return addr >= start && addr <= end;
17} 15}
18 16
19static unsigned long 17static void
20get_stack_end(struct task_struct *task, unsigned long stack) 18save_stack_warning_symbol(void *data, char *msg, unsigned long symbol)
21{ 19{
22 unsigned long stack_start, stack_end, flags;
23 int i, cpu;
24
25 /*
26 * The most common case is that we are in the task stack:
27 */
28 stack_start = (unsigned long)task->thread_info;
29 stack_end = stack_start + THREAD_SIZE;
30
31 if (in_range(stack_start, stack, stack_end))
32 return stack_end;
33
34 /*
35 * We are in an interrupt if irqstackptr is set:
36 */
37 raw_local_irq_save(flags);
38 cpu = safe_smp_processor_id();
39 stack_end = (unsigned long)cpu_pda(cpu)->irqstackptr;
40
41 if (stack_end) {
42 stack_start = stack_end & ~(IRQSTACKSIZE-1);
43 if (in_range(stack_start, stack, stack_end))
44 goto out_restore;
45 /*
46 * We get here if we are in an IRQ context but we
47 * are also in an exception stack.
48 */
49 }
50
51 /*
52 * Iterate over all exception stacks, and figure out whether
53 * 'stack' is in one of them:
54 */
55 for (i = 0; i < N_EXCEPTION_STACKS; i++) {
56 /*
57 * set 'end' to the end of the exception stack.
58 */
59 stack_end = per_cpu(init_tss, cpu).ist[i];
60 stack_start = stack_end - EXCEPTION_STKSZ;
61
62 /*
63 * Is 'stack' above this exception frame's end?
64 * If yes then skip to the next frame.
65 */
66 if (stack >= stack_end)
67 continue;
68 /*
69 * Is 'stack' above this exception frame's start address?
70 * If yes then we found the right frame.
71 */
72 if (stack >= stack_start)
73 goto out_restore;
74
75 /*
76 * If this is a debug stack, and if it has a larger size than
77 * the usual exception stacks, then 'stack' might still
78 * be within the lower portion of the debug stack:
79 */
80#if DEBUG_STKSZ > EXCEPTION_STKSZ
81 if (i == DEBUG_STACK - 1 && stack >= stack_end - DEBUG_STKSZ) {
82 /*
83 * Black magic. A large debug stack is composed of
84 * multiple exception stack entries, which we
85 * iterate through now. Dont look:
86 */
87 do {
88 stack_end -= EXCEPTION_STKSZ;
89 stack_start -= EXCEPTION_STKSZ;
90 } while (stack < stack_start);
91
92 goto out_restore;
93 }
94#endif
95 }
96 /*
97 * Ok, 'stack' is not pointing to any of the system stacks.
98 */
99 stack_end = 0;
100
101out_restore:
102 raw_local_irq_restore(flags);
103
104 return stack_end;
105} 20}
106 21
107 22static int save_stack_stack(void *data, char *name)
108/*
109 * Save stack-backtrace addresses into a stack_trace buffer:
110 */
111static inline unsigned long
112save_context_stack(struct stack_trace *trace, unsigned int skip,
113 unsigned long stack, unsigned long stack_end)
114{ 23{
115 unsigned long addr; 24 struct stack_trace *trace = (struct stack_trace *)data;
116 25 return trace->all_contexts ? 0 : -1;
117#ifdef CONFIG_FRAME_POINTER 26}
118 unsigned long prev_stack = 0;
119 27
120 while (in_range(prev_stack, stack, stack_end)) { 28static void save_stack_address(void *data, unsigned long addr)
121 pr_debug("stack: %p\n", (void *)stack); 29{
122 addr = (unsigned long)(((unsigned long *)stack)[1]); 30 struct stack_trace *trace = (struct stack_trace *)data;
123 pr_debug("addr: %p\n", (void *)addr); 31 if (trace->skip > 0) {
124 if (!skip) 32 trace->skip--;
125 trace->entries[trace->nr_entries++] = addr-1; 33 return;
126 else
127 skip--;
128 if (trace->nr_entries >= trace->max_entries)
129 break;
130 if (!addr)
131 return 0;
132 /*
133 * Stack frames must go forwards (otherwise a loop could
134 * happen if the stackframe is corrupted), so we move
135 * prev_stack forwards:
136 */
137 prev_stack = stack;
138 stack = (unsigned long)(((unsigned long *)stack)[0]);
139 }
140 pr_debug("invalid: %p\n", (void *)stack);
141#else
142 while (stack < stack_end) {
143 addr = ((unsigned long *)stack)[0];
144 stack += sizeof(long);
145 if (__kernel_text_address(addr)) {
146 if (!skip)
147 trace->entries[trace->nr_entries++] = addr-1;
148 else
149 skip--;
150 if (trace->nr_entries >= trace->max_entries)
151 break;
152 }
153 } 34 }
154#endif 35 if (trace->nr_entries < trace->max_entries - 1)
155 return stack; 36 trace->entries[trace->nr_entries++] = addr;
156} 37}
157 38
158#define MAX_STACKS 10 39static struct stacktrace_ops save_stack_ops = {
40 .warning = save_stack_warning,
41 .warning_symbol = save_stack_warning_symbol,
42 .stack = save_stack_stack,
43 .address = save_stack_address,
44};
159 45
160/* 46/*
161 * Save stack-backtrace addresses into a stack_trace buffer. 47 * Save stack-backtrace addresses into a stack_trace buffer.
162 * If all_contexts is set, all contexts (hardirq, softirq and process)
163 * are saved. If not set then only the current context is saved.
164 */ 48 */
165void save_stack_trace(struct stack_trace *trace, 49void save_stack_trace(struct stack_trace *trace, struct task_struct *task)
166 struct task_struct *task, int all_contexts,
167 unsigned int skip)
168{ 50{
169 unsigned long stack = (unsigned long)&stack; 51 dump_trace(task, NULL, NULL, &save_stack_ops, trace);
170 int i, nr_stacks = 0, stacks_done[MAX_STACKS]; 52 trace->entries[trace->nr_entries++] = ULONG_MAX;
171
172 WARN_ON(trace->nr_entries || !trace->max_entries);
173
174 if (!task)
175 task = current;
176
177 pr_debug("task: %p, ti: %p\n", task, task->thread_info);
178
179 if (!task || task == current) {
180 /* Grab rbp right from our regs: */
181 asm ("mov %%rbp, %0" : "=r" (stack));
182 pr_debug("rbp: %p\n", (void *)stack);
183 } else {
184 /* rbp is the last reg pushed by switch_to(): */
185 stack = task->thread.rsp;
186 pr_debug("other task rsp: %p\n", (void *)stack);
187 stack = (unsigned long)(((unsigned long *)stack)[0]);
188 pr_debug("other task rbp: %p\n", (void *)stack);
189 }
190
191 while (1) {
192 unsigned long stack_end = get_stack_end(task, stack);
193
194 pr_debug("stack: %p\n", (void *)stack);
195 pr_debug("stack end: %p\n", (void *)stack_end);
196
197 /*
198 * Invalid stack addres?
199 */
200 if (!stack_end)
201 return;
202 /*
203 * Were we in this stack already? (recursion)
204 */
205 for (i = 0; i < nr_stacks; i++)
206 if (stacks_done[i] == stack_end)
207 return;
208 stacks_done[nr_stacks] = stack_end;
209
210 stack = save_context_stack(trace, skip, stack, stack_end);
211 if (!all_contexts || !stack ||
212 trace->nr_entries >= trace->max_entries)
213 return;
214 trace->entries[trace->nr_entries++] = ULONG_MAX;
215 if (trace->nr_entries >= trace->max_entries)
216 return;
217 if (++nr_stacks >= MAX_STACKS)
218 return;
219 }
220} 53}
54EXPORT_SYMBOL(save_stack_trace);
221 55
diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c
index 5530dda3f27a..cbabfdf78e06 100644
--- a/arch/x86_64/kernel/tce.c
+++ b/arch/x86_64/kernel/tce.c
@@ -1,4 +1,6 @@
1/* 1/*
2 * This file manages the translation entries for the IBM Calgary IOMMU.
3 *
2 * Derived from arch/powerpc/platforms/pseries/iommu.c 4 * Derived from arch/powerpc/platforms/pseries/iommu.c
3 * 5 *
4 * Copyright (C) IBM Corporation, 2006 6 * Copyright (C) IBM Corporation, 2006
@@ -104,14 +106,6 @@ static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl)
104 /* set the tce table size - measured in entries */ 106 /* set the tce table size - measured in entries */
105 tbl->it_size = table_size_to_number_of_entries(specified_table_size); 107 tbl->it_size = table_size_to_number_of_entries(specified_table_size);
106 108
107 tbl->it_base = (unsigned long)tce_table_kva[dev->bus->number];
108 if (!tbl->it_base) {
109 printk(KERN_ERR "Calgary: iommu_table_setparms: "
110 "no table allocated?!\n");
111 ret = -ENOMEM;
112 goto done;
113 }
114
115 /* 109 /*
116 * number of bytes needed for the bitmap size in number of 110 * number of bytes needed for the bitmap size in number of
117 * entries; we need one bit per entry 111 * entries; we need one bit per entry
@@ -160,8 +154,6 @@ int build_tce_table(struct pci_dev *dev, void __iomem *bbar)
160 if (ret) 154 if (ret)
161 goto free_tbl; 155 goto free_tbl;
162 156
163 tce_free(tbl, 0, tbl->it_size);
164
165 tbl->bbar = bbar; 157 tbl->bbar = bbar;
166 158
167 /* 159 /*
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 7700e6cd2bd9..1c255ee76e7c 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -24,6 +24,8 @@
24#include <linux/device.h> 24#include <linux/device.h>
25#include <linux/sysdev.h> 25#include <linux/sysdev.h>
26#include <linux/bcd.h> 26#include <linux/bcd.h>
27#include <linux/notifier.h>
28#include <linux/cpu.h>
27#include <linux/kallsyms.h> 29#include <linux/kallsyms.h>
28#include <linux/acpi.h> 30#include <linux/acpi.h>
29#ifdef CONFIG_ACPI 31#ifdef CONFIG_ACPI
@@ -39,9 +41,7 @@
39#include <asm/sections.h> 41#include <asm/sections.h>
40#include <linux/cpufreq.h> 42#include <linux/cpufreq.h>
41#include <linux/hpet.h> 43#include <linux/hpet.h>
42#ifdef CONFIG_X86_LOCAL_APIC
43#include <asm/apic.h> 44#include <asm/apic.h>
44#endif
45 45
46#ifdef CONFIG_CPU_FREQ 46#ifdef CONFIG_CPU_FREQ
47static void cpufreq_delayed_get(void); 47static void cpufreq_delayed_get(void);
@@ -49,7 +49,7 @@ static void cpufreq_delayed_get(void);
49extern void i8254_timer_resume(void); 49extern void i8254_timer_resume(void);
50extern int using_apic_timer; 50extern int using_apic_timer;
51 51
52static char *time_init_gtod(void); 52static char *timename = NULL;
53 53
54DEFINE_SPINLOCK(rtc_lock); 54DEFINE_SPINLOCK(rtc_lock);
55EXPORT_SYMBOL(rtc_lock); 55EXPORT_SYMBOL(rtc_lock);
@@ -187,20 +187,15 @@ unsigned long profile_pc(struct pt_regs *regs)
187{ 187{
188 unsigned long pc = instruction_pointer(regs); 188 unsigned long pc = instruction_pointer(regs);
189 189
190 /* Assume the lock function has either no stack frame or only a single 190 /* Assume the lock function has either no stack frame or a copy
191 word. This checks if the address on the stack looks like a kernel 191 of eflags from PUSHF
192 text address. 192 Eflags always has bits 22 and up cleared unlike kernel addresses. */
193 There is a small window for false hits, but in that case the tick
194 is just accounted to the spinlock function.
195 Better would be to write these functions in assembler again
196 and check exactly. */
197 if (!user_mode(regs) && in_lock_functions(pc)) { 193 if (!user_mode(regs) && in_lock_functions(pc)) {
198 char *v = *(char **)regs->rsp; 194 unsigned long *sp = (unsigned long *)regs->rsp;
199 if ((v >= _stext && v <= _etext) || 195 if (sp[0] >> 22)
200 (v >= _sinittext && v <= _einittext) || 196 return sp[0];
201 (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END)) 197 if (sp[1] >> 22)
202 return (unsigned long)v; 198 return sp[1];
203 return ((unsigned long *)regs->rsp)[1];
204 } 199 }
205 return pc; 200 return pc;
206} 201}
@@ -281,6 +276,7 @@ static void set_rtc_mmss(unsigned long nowtime)
281 * Note: This function is required to return accurate 276 * Note: This function is required to return accurate
282 * time even in the absence of multiple timer ticks. 277 * time even in the absence of multiple timer ticks.
283 */ 278 */
279static inline unsigned long long cycles_2_ns(unsigned long long cyc);
284unsigned long long monotonic_clock(void) 280unsigned long long monotonic_clock(void)
285{ 281{
286 unsigned long seq; 282 unsigned long seq;
@@ -305,8 +301,7 @@ unsigned long long monotonic_clock(void)
305 base = monotonic_base; 301 base = monotonic_base;
306 } while (read_seqretry(&xtime_lock, seq)); 302 } while (read_seqretry(&xtime_lock, seq));
307 this_offset = get_cycles_sync(); 303 this_offset = get_cycles_sync();
308 /* FIXME: 1000 or 1000000? */ 304 offset = cycles_2_ns(this_offset - last_offset);
309 offset = (this_offset - last_offset)*1000 / cpu_khz;
310 } 305 }
311 return base + offset; 306 return base + offset;
312} 307}
@@ -410,8 +405,7 @@ void main_timer_handler(struct pt_regs *regs)
410 offset %= USEC_PER_TICK; 405 offset %= USEC_PER_TICK;
411 } 406 }
412 407
413 /* FIXME: 1000 or 1000000? */ 408 monotonic_base += cycles_2_ns(tsc - vxtime.last_tsc);
414 monotonic_base += (tsc - vxtime.last_tsc) * 1000000 / cpu_khz;
415 409
416 vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; 410 vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
417 411
@@ -441,12 +435,8 @@ void main_timer_handler(struct pt_regs *regs)
441 * have to call the local interrupt handler. 435 * have to call the local interrupt handler.
442 */ 436 */
443 437
444#ifndef CONFIG_X86_LOCAL_APIC
445 profile_tick(CPU_PROFILING, regs);
446#else
447 if (!using_apic_timer) 438 if (!using_apic_timer)
448 smp_local_timer_interrupt(regs); 439 smp_local_timer_interrupt(regs);
449#endif
450 440
451/* 441/*
452 * If we have an externally synchronized Linux clock, then update CMOS clock 442 * If we have an externally synchronized Linux clock, then update CMOS clock
@@ -470,10 +460,8 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
470 if (apic_runs_main_timer > 1) 460 if (apic_runs_main_timer > 1)
471 return IRQ_HANDLED; 461 return IRQ_HANDLED;
472 main_timer_handler(regs); 462 main_timer_handler(regs);
473#ifdef CONFIG_X86_LOCAL_APIC
474 if (using_apic_timer) 463 if (using_apic_timer)
475 smp_send_timer_broadcast_ipi(); 464 smp_send_timer_broadcast_ipi();
476#endif
477 return IRQ_HANDLED; 465 return IRQ_HANDLED;
478} 466}
479 467
@@ -893,11 +881,17 @@ static struct irqaction irq0 = {
893 timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL 881 timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
894}; 882};
895 883
896void __init time_init(void) 884static int __cpuinit
885time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
897{ 886{
898 char *timename; 887 unsigned cpu = (unsigned long) hcpu;
899 char *gtod; 888 if (action == CPU_ONLINE)
889 vsyscall_set_cpu(cpu);
890 return NOTIFY_DONE;
891}
900 892
893void __init time_init(void)
894{
901 if (nohpet) 895 if (nohpet)
902 vxtime.hpet_address = 0; 896 vxtime.hpet_address = 0;
903 897
@@ -931,18 +925,17 @@ void __init time_init(void)
931 } 925 }
932 926
933 vxtime.mode = VXTIME_TSC; 927 vxtime.mode = VXTIME_TSC;
934 gtod = time_init_gtod();
935
936 printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n",
937 vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod);
938 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
939 cpu_khz / 1000, cpu_khz % 1000);
940 vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz; 928 vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
941 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; 929 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
942 vxtime.last_tsc = get_cycles_sync(); 930 vxtime.last_tsc = get_cycles_sync();
931 set_cyc2ns_scale(cpu_khz);
943 setup_irq(0, &irq0); 932 setup_irq(0, &irq0);
933 hotcpu_notifier(time_cpu_notifier, 0);
934 time_cpu_notifier(NULL, CPU_ONLINE, (void *)(long)smp_processor_id());
944 935
945 set_cyc2ns_scale(cpu_khz); 936#ifndef CONFIG_SMP
937 time_init_gtod();
938#endif
946} 939}
947 940
948/* 941/*
@@ -973,12 +966,18 @@ __cpuinit int unsynchronized_tsc(void)
973/* 966/*
974 * Decide what mode gettimeofday should use. 967 * Decide what mode gettimeofday should use.
975 */ 968 */
976__init static char *time_init_gtod(void) 969void time_init_gtod(void)
977{ 970{
978 char *timetype; 971 char *timetype;
979 972
980 if (unsynchronized_tsc()) 973 if (unsynchronized_tsc())
981 notsc = 1; 974 notsc = 1;
975
976 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
977 vgetcpu_mode = VGETCPU_RDTSCP;
978 else
979 vgetcpu_mode = VGETCPU_LSL;
980
982 if (vxtime.hpet_address && notsc) { 981 if (vxtime.hpet_address && notsc) {
983 timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; 982 timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
984 if (hpet_use_timer) 983 if (hpet_use_timer)
@@ -1001,7 +1000,16 @@ __init static char *time_init_gtod(void)
1001 timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; 1000 timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
1002 vxtime.mode = VXTIME_TSC; 1001 vxtime.mode = VXTIME_TSC;
1003 } 1002 }
1004 return timetype; 1003
1004 printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n",
1005 vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype);
1006 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
1007 cpu_khz / 1000, cpu_khz % 1000);
1008 vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
1009 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
1010 vxtime.last_tsc = get_cycles_sync();
1011
1012 set_cyc2ns_scale(cpu_khz);
1005} 1013}
1006 1014
1007__setup("report_lost_ticks", time_setup); 1015__setup("report_lost_ticks", time_setup);
@@ -1031,8 +1039,16 @@ static int timer_resume(struct sys_device *dev)
1031 unsigned long flags; 1039 unsigned long flags;
1032 unsigned long sec; 1040 unsigned long sec;
1033 unsigned long ctime = get_cmos_time(); 1041 unsigned long ctime = get_cmos_time();
1034 unsigned long sleep_length = (ctime - sleep_start) * HZ; 1042 long sleep_length = (ctime - sleep_start) * HZ;
1035 1043
1044 if (sleep_length < 0) {
1045 printk(KERN_WARNING "Time skew detected in timer resume!\n");
1046 /* The time after the resume must not be earlier than the time
1047 * before the suspend or some nasty things will happen
1048 */
1049 sleep_length = 0;
1050 ctime = sleep_start;
1051 }
1036 if (vxtime.hpet_address) 1052 if (vxtime.hpet_address)
1037 hpet_reenable(); 1053 hpet_reenable();
1038 else 1054 else
diff --git a/arch/x86_64/kernel/trampoline.S b/arch/x86_64/kernel/trampoline.S
index 23a03eb91fc7..c79b99a9e2f6 100644
--- a/arch/x86_64/kernel/trampoline.S
+++ b/arch/x86_64/kernel/trampoline.S
@@ -64,7 +64,7 @@ idt_48:
64 .word 0, 0 # idt base = 0L 64 .word 0, 0 # idt base = 0L
65 65
66gdt_48: 66gdt_48:
67 .short __KERNEL32_CS + 7 # gdt limit 67 .short GDT_ENTRIES*8 - 1 # gdt limit
68 .long cpu_gdt_table-__START_KERNEL_map 68 .long cpu_gdt_table-__START_KERNEL_map
69 69
70.globl trampoline_end 70.globl trampoline_end
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index b1249774d1e8..01f2a8d254c2 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -45,6 +45,7 @@
45#include <asm/pda.h> 45#include <asm/pda.h>
46#include <asm/proto.h> 46#include <asm/proto.h>
47#include <asm/nmi.h> 47#include <asm/nmi.h>
48#include <asm/stacktrace.h>
48 49
49asmlinkage void divide_error(void); 50asmlinkage void divide_error(void);
50asmlinkage void debug(void); 51asmlinkage void debug(void);
@@ -142,7 +143,7 @@ void printk_address(unsigned long address)
142#endif 143#endif
143 144
144static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 145static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
145 unsigned *usedp, const char **idp) 146 unsigned *usedp, char **idp)
146{ 147{
147 static char ids[][8] = { 148 static char ids[][8] = {
148 [DEBUG_STACK - 1] = "#DB", 149 [DEBUG_STACK - 1] = "#DB",
@@ -161,26 +162,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
161 * 'stack' is in one of them: 162 * 'stack' is in one of them:
162 */ 163 */
163 for (k = 0; k < N_EXCEPTION_STACKS; k++) { 164 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
164 unsigned long end; 165 unsigned long end = per_cpu(orig_ist, cpu).ist[k];
165
166 /*
167 * set 'end' to the end of the exception stack.
168 */
169 switch (k + 1) {
170 /*
171 * TODO: this block is not needed i think, because
172 * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
173 * properly too.
174 */
175#if DEBUG_STKSZ > EXCEPTION_STKSZ
176 case DEBUG_STACK:
177 end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
178 break;
179#endif
180 default:
181 end = per_cpu(orig_ist, cpu).ist[k];
182 break;
183 }
184 /* 166 /*
185 * Is 'stack' above this exception frame's end? 167 * Is 'stack' above this exception frame's end?
186 * If yes then skip to the next frame. 168 * If yes then skip to the next frame.
@@ -234,13 +216,19 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
234 return NULL; 216 return NULL;
235} 217}
236 218
237static int show_trace_unwind(struct unwind_frame_info *info, void *context) 219struct ops_and_data {
220 struct stacktrace_ops *ops;
221 void *data;
222};
223
224static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
238{ 225{
226 struct ops_and_data *oad = (struct ops_and_data *)context;
239 int n = 0; 227 int n = 0;
240 228
241 while (unwind(info) == 0 && UNW_PC(info)) { 229 while (unwind(info) == 0 && UNW_PC(info)) {
242 n++; 230 n++;
243 printk_address(UNW_PC(info)); 231 oad->ops->address(oad->data, UNW_PC(info));
244 if (arch_unw_user_mode(info)) 232 if (arch_unw_user_mode(info))
245 break; 233 break;
246 } 234 }
@@ -254,45 +242,53 @@ static int show_trace_unwind(struct unwind_frame_info *info, void *context)
254 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 242 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
255 */ 243 */
256 244
257void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack) 245void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
246 struct stacktrace_ops *ops, void *data)
258{ 247{
259 const unsigned cpu = safe_smp_processor_id(); 248 const unsigned cpu = smp_processor_id();
260 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 249 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
261 unsigned used = 0; 250 unsigned used = 0;
262 251
263 printk("\nCall Trace:\n");
264
265 if (!tsk) 252 if (!tsk)
266 tsk = current; 253 tsk = current;
267 254
268 if (call_trace >= 0) { 255 if (call_trace >= 0) {
269 int unw_ret = 0; 256 int unw_ret = 0;
270 struct unwind_frame_info info; 257 struct unwind_frame_info info;
258 struct ops_and_data oad = { .ops = ops, .data = data };
271 259
272 if (regs) { 260 if (regs) {
273 if (unwind_init_frame_info(&info, tsk, regs) == 0) 261 if (unwind_init_frame_info(&info, tsk, regs) == 0)
274 unw_ret = show_trace_unwind(&info, NULL); 262 unw_ret = dump_trace_unwind(&info, &oad);
275 } else if (tsk == current) 263 } else if (tsk == current)
276 unw_ret = unwind_init_running(&info, show_trace_unwind, NULL); 264 unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
277 else { 265 else {
278 if (unwind_init_blocked(&info, tsk) == 0) 266 if (unwind_init_blocked(&info, tsk) == 0)
279 unw_ret = show_trace_unwind(&info, NULL); 267 unw_ret = dump_trace_unwind(&info, &oad);
280 } 268 }
281 if (unw_ret > 0) { 269 if (unw_ret > 0) {
282 if (call_trace == 1 && !arch_unw_user_mode(&info)) { 270 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
283 print_symbol("DWARF2 unwinder stuck at %s\n", 271 ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
284 UNW_PC(&info)); 272 UNW_PC(&info));
285 if ((long)UNW_SP(&info) < 0) { 273 if ((long)UNW_SP(&info) < 0) {
286 printk("Leftover inexact backtrace:\n"); 274 ops->warning(data, "Leftover inexact backtrace:\n");
287 stack = (unsigned long *)UNW_SP(&info); 275 stack = (unsigned long *)UNW_SP(&info);
276 if (!stack)
277 return;
288 } else 278 } else
289 printk("Full inexact backtrace again:\n"); 279 ops->warning(data, "Full inexact backtrace again:\n");
290 } else if (call_trace >= 1) 280 } else if (call_trace >= 1)
291 return; 281 return;
292 else 282 else
293 printk("Full inexact backtrace again:\n"); 283 ops->warning(data, "Full inexact backtrace again:\n");
294 } else 284 } else
295 printk("Inexact backtrace:\n"); 285 ops->warning(data, "Inexact backtrace:\n");
286 }
287 if (!stack) {
288 unsigned long dummy;
289 stack = &dummy;
290 if (tsk && tsk != current)
291 stack = (unsigned long *)tsk->thread.rsp;
296 } 292 }
297 293
298 /* 294 /*
@@ -303,7 +299,9 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
303#define HANDLE_STACK(cond) \ 299#define HANDLE_STACK(cond) \
304 do while (cond) { \ 300 do while (cond) { \
305 unsigned long addr = *stack++; \ 301 unsigned long addr = *stack++; \
306 if (kernel_text_address(addr)) { \ 302 if (oops_in_progress ? \
303 __kernel_text_address(addr) : \
304 kernel_text_address(addr)) { \
307 /* \ 305 /* \
308 * If the address is either in the text segment of the \ 306 * If the address is either in the text segment of the \
309 * kernel, or in the region which contains vmalloc'ed \ 307 * kernel, or in the region which contains vmalloc'ed \
@@ -312,7 +310,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
312 * down the cause of the crash will be able to figure \ 310 * down the cause of the crash will be able to figure \
313 * out the call path that was taken. \ 311 * out the call path that was taken. \
314 */ \ 312 */ \
315 printk_address(addr); \ 313 ops->address(data, addr); \
316 } \ 314 } \
317 } while (0) 315 } while (0)
318 316
@@ -321,16 +319,17 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
321 * current stack address. If the stacks consist of nested 319 * current stack address. If the stacks consist of nested
322 * exceptions 320 * exceptions
323 */ 321 */
324 for ( ; ; ) { 322 for (;;) {
325 const char *id; 323 char *id;
326 unsigned long *estack_end; 324 unsigned long *estack_end;
327 estack_end = in_exception_stack(cpu, (unsigned long)stack, 325 estack_end = in_exception_stack(cpu, (unsigned long)stack,
328 &used, &id); 326 &used, &id);
329 327
330 if (estack_end) { 328 if (estack_end) {
331 printk(" <%s>", id); 329 if (ops->stack(data, id) < 0)
330 break;
332 HANDLE_STACK (stack < estack_end); 331 HANDLE_STACK (stack < estack_end);
333 printk(" <EOE>"); 332 ops->stack(data, "<EOE>");
334 /* 333 /*
335 * We link to the next stack via the 334 * We link to the next stack via the
336 * second-to-last pointer (index -2 to end) in the 335 * second-to-last pointer (index -2 to end) in the
@@ -345,7 +344,8 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
345 (IRQSTACKSIZE - 64) / sizeof(*irqstack); 344 (IRQSTACKSIZE - 64) / sizeof(*irqstack);
346 345
347 if (stack >= irqstack && stack < irqstack_end) { 346 if (stack >= irqstack && stack < irqstack_end) {
348 printk(" <IRQ>"); 347 if (ops->stack(data, "IRQ") < 0)
348 break;
349 HANDLE_STACK (stack < irqstack_end); 349 HANDLE_STACK (stack < irqstack_end);
350 /* 350 /*
351 * We link to the next stack (which would be 351 * We link to the next stack (which would be
@@ -354,7 +354,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
354 */ 354 */
355 stack = (unsigned long *) (irqstack_end[-1]); 355 stack = (unsigned long *) (irqstack_end[-1]);
356 irqstack_end = NULL; 356 irqstack_end = NULL;
357 printk(" <EOI>"); 357 ops->stack(data, "EOI");
358 continue; 358 continue;
359 } 359 }
360 } 360 }
@@ -362,19 +362,57 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
362 } 362 }
363 363
364 /* 364 /*
365 * This prints the process stack: 365 * This handles the process stack:
366 */ 366 */
367 HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); 367 HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
368#undef HANDLE_STACK 368#undef HANDLE_STACK
369}
370EXPORT_SYMBOL(dump_trace);
371
372static void
373print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
374{
375 print_symbol(msg, symbol);
376 printk("\n");
377}
378
379static void print_trace_warning(void *data, char *msg)
380{
381 printk("%s\n", msg);
382}
383
384static int print_trace_stack(void *data, char *name)
385{
386 printk(" <%s> ", name);
387 return 0;
388}
389
390static void print_trace_address(void *data, unsigned long addr)
391{
392 printk_address(addr);
393}
394
395static struct stacktrace_ops print_trace_ops = {
396 .warning = print_trace_warning,
397 .warning_symbol = print_trace_warning_symbol,
398 .stack = print_trace_stack,
399 .address = print_trace_address,
400};
369 401
402void
403show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
404{
405 printk("\nCall Trace:\n");
406 dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
370 printk("\n"); 407 printk("\n");
371} 408}
372 409
373static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp) 410static void
411_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
374{ 412{
375 unsigned long *stack; 413 unsigned long *stack;
376 int i; 414 int i;
377 const int cpu = safe_smp_processor_id(); 415 const int cpu = smp_processor_id();
378 unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); 416 unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
379 unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); 417 unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
380 418
@@ -428,7 +466,7 @@ void show_registers(struct pt_regs *regs)
428 int i; 466 int i;
429 int in_kernel = !user_mode(regs); 467 int in_kernel = !user_mode(regs);
430 unsigned long rsp; 468 unsigned long rsp;
431 const int cpu = safe_smp_processor_id(); 469 const int cpu = smp_processor_id();
432 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 470 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
433 471
434 rsp = regs->rsp; 472 rsp = regs->rsp;
@@ -503,9 +541,11 @@ static unsigned int die_nest_count;
503 541
504unsigned __kprobes long oops_begin(void) 542unsigned __kprobes long oops_begin(void)
505{ 543{
506 int cpu = safe_smp_processor_id(); 544 int cpu = smp_processor_id();
507 unsigned long flags; 545 unsigned long flags;
508 546
547 oops_enter();
548
509 /* racy, but better than risking deadlock. */ 549 /* racy, but better than risking deadlock. */
510 local_irq_save(flags); 550 local_irq_save(flags);
511 if (!spin_trylock(&die_lock)) { 551 if (!spin_trylock(&die_lock)) {
@@ -534,6 +574,7 @@ void __kprobes oops_end(unsigned long flags)
534 spin_unlock_irqrestore(&die_lock, flags); 574 spin_unlock_irqrestore(&die_lock, flags);
535 if (panic_on_oops) 575 if (panic_on_oops)
536 panic("Fatal exception"); 576 panic("Fatal exception");
577 oops_exit();
537} 578}
538 579
539void __kprobes __die(const char * str, struct pt_regs * regs, long err) 580void __kprobes __die(const char * str, struct pt_regs * regs, long err)
@@ -570,7 +611,7 @@ void die(const char * str, struct pt_regs * regs, long err)
570 do_exit(SIGSEGV); 611 do_exit(SIGSEGV);
571} 612}
572 613
573void __kprobes die_nmi(char *str, struct pt_regs *regs) 614void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
574{ 615{
575 unsigned long flags = oops_begin(); 616 unsigned long flags = oops_begin();
576 617
@@ -578,13 +619,12 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs)
578 * We are in trouble anyway, lets at least try 619 * We are in trouble anyway, lets at least try
579 * to get a message out. 620 * to get a message out.
580 */ 621 */
581 printk(str, safe_smp_processor_id()); 622 printk(str, smp_processor_id());
582 show_registers(regs); 623 show_registers(regs);
583 if (kexec_should_crash(current)) 624 if (kexec_should_crash(current))
584 crash_kexec(regs); 625 crash_kexec(regs);
585 if (panic_on_timeout || panic_on_oops) 626 if (do_panic || panic_on_oops)
586 panic("nmi watchdog"); 627 panic("Non maskable interrupt");
587 printk("console shuts up ...\n");
588 oops_end(flags); 628 oops_end(flags);
589 nmi_exit(); 629 nmi_exit();
590 local_irq_enable(); 630 local_irq_enable();
@@ -730,8 +770,15 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
730static __kprobes void 770static __kprobes void
731mem_parity_error(unsigned char reason, struct pt_regs * regs) 771mem_parity_error(unsigned char reason, struct pt_regs * regs)
732{ 772{
733 printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); 773 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
734 printk("You probably have a hardware problem with your RAM chips\n"); 774 reason);
775 printk(KERN_EMERG "You probably have a hardware problem with your "
776 "RAM chips\n");
777
778 if (panic_on_unrecovered_nmi)
779 panic("NMI: Not continuing");
780
781 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
735 782
736 /* Clear and disable the memory parity error line. */ 783 /* Clear and disable the memory parity error line. */
737 reason = (reason & 0xf) | 4; 784 reason = (reason & 0xf) | 4;
@@ -754,9 +801,15 @@ io_check_error(unsigned char reason, struct pt_regs * regs)
754 801
755static __kprobes void 802static __kprobes void
756unknown_nmi_error(unsigned char reason, struct pt_regs * regs) 803unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
757{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); 804{
758 printk("Dazed and confused, but trying to continue\n"); 805 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
759 printk("Do you have a strange power saving mode enabled?\n"); 806 reason);
807 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
808
809 if (panic_on_unrecovered_nmi)
810 panic("NMI: Not continuing");
811
812 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
760} 813}
761 814
762/* Runs on IST stack. This code must keep interrupts off all the time. 815/* Runs on IST stack. This code must keep interrupts off all the time.
@@ -776,17 +829,15 @@ asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs)
776 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) 829 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
777 == NOTIFY_STOP) 830 == NOTIFY_STOP)
778 return; 831 return;
779#ifdef CONFIG_X86_LOCAL_APIC
780 /* 832 /*
781 * Ok, so this is none of the documented NMI sources, 833 * Ok, so this is none of the documented NMI sources,
782 * so it must be the NMI watchdog. 834 * so it must be the NMI watchdog.
783 */ 835 */
784 if (nmi_watchdog > 0) { 836 if (nmi_watchdog_tick(regs,reason))
785 nmi_watchdog_tick(regs,reason);
786 return; 837 return;
787 } 838 if (!do_nmi_callback(regs,cpu))
788#endif 839 unknown_nmi_error(reason, regs);
789 unknown_nmi_error(reason, regs); 840
790 return; 841 return;
791 } 842 }
792 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 843 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
@@ -1071,6 +1122,7 @@ asmlinkage void math_state_restore(void)
1071 init_fpu(me); 1122 init_fpu(me);
1072 restore_fpu_checking(&me->thread.i387.fxsave); 1123 restore_fpu_checking(&me->thread.i387.fxsave);
1073 task_thread_info(me)->status |= TS_USEDFPU; 1124 task_thread_info(me)->status |= TS_USEDFPU;
1125 me->fpu_counter++;
1074} 1126}
1075 1127
1076void __init trap_init(void) 1128void __init trap_init(void)
@@ -1109,24 +1161,30 @@ void __init trap_init(void)
1109} 1161}
1110 1162
1111 1163
1112/* Actual parsing is done early in setup.c. */ 1164static int __init oops_setup(char *s)
1113static int __init oops_dummy(char *s)
1114{ 1165{
1115 panic_on_oops = 1; 1166 if (!s)
1116 return 1; 1167 return -EINVAL;
1168 if (!strcmp(s, "panic"))
1169 panic_on_oops = 1;
1170 return 0;
1117} 1171}
1118__setup("oops=", oops_dummy); 1172early_param("oops", oops_setup);
1119 1173
1120static int __init kstack_setup(char *s) 1174static int __init kstack_setup(char *s)
1121{ 1175{
1176 if (!s)
1177 return -EINVAL;
1122 kstack_depth_to_print = simple_strtoul(s,NULL,0); 1178 kstack_depth_to_print = simple_strtoul(s,NULL,0);
1123 return 1; 1179 return 0;
1124} 1180}
1125__setup("kstack=", kstack_setup); 1181early_param("kstack", kstack_setup);
1126 1182
1127#ifdef CONFIG_STACK_UNWIND 1183#ifdef CONFIG_STACK_UNWIND
1128static int __init call_trace_setup(char *s) 1184static int __init call_trace_setup(char *s)
1129{ 1185{
1186 if (!s)
1187 return -EINVAL;
1130 if (strcmp(s, "old") == 0) 1188 if (strcmp(s, "old") == 0)
1131 call_trace = -1; 1189 call_trace = -1;
1132 else if (strcmp(s, "both") == 0) 1190 else if (strcmp(s, "both") == 0)
@@ -1135,7 +1193,7 @@ static int __init call_trace_setup(char *s)
1135 call_trace = 1; 1193 call_trace = 1;
1136 else if (strcmp(s, "new") == 0) 1194 else if (strcmp(s, "new") == 0)
1137 call_trace = 2; 1195 call_trace = 2;
1138 return 1; 1196 return 0;
1139} 1197}
1140__setup("call_trace=", call_trace_setup); 1198early_param("call_trace", call_trace_setup);
1141#endif 1199#endif
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 7c4de31471d4..d0564f1bcb0b 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
13OUTPUT_ARCH(i386:x86-64) 13OUTPUT_ARCH(i386:x86-64)
14ENTRY(phys_startup_64) 14ENTRY(phys_startup_64)
15jiffies_64 = jiffies; 15jiffies_64 = jiffies;
16PHDRS {
17 text PT_LOAD FLAGS(5); /* R_E */
18 data PT_LOAD FLAGS(7); /* RWE */
19 user PT_LOAD FLAGS(7); /* RWE */
20 note PT_NOTE FLAGS(4); /* R__ */
21}
16SECTIONS 22SECTIONS
17{ 23{
18 . = __START_KERNEL; 24 . = __START_KERNEL;
@@ -31,7 +37,7 @@ SECTIONS
31 KPROBES_TEXT 37 KPROBES_TEXT
32 *(.fixup) 38 *(.fixup)
33 *(.gnu.warning) 39 *(.gnu.warning)
34 } = 0x9090 40 } :text = 0x9090
35 /* out-of-line lock text */ 41 /* out-of-line lock text */
36 .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) } 42 .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) }
37 43
@@ -57,7 +63,7 @@ SECTIONS
57 .data : AT(ADDR(.data) - LOAD_OFFSET) { 63 .data : AT(ADDR(.data) - LOAD_OFFSET) {
58 *(.data) 64 *(.data)
59 CONSTRUCTORS 65 CONSTRUCTORS
60 } 66 } :data
61 67
62 _edata = .; /* End of data section */ 68 _edata = .; /* End of data section */
63 69
@@ -89,7 +95,7 @@ SECTIONS
89#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) 95#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
90 96
91 . = VSYSCALL_ADDR; 97 . = VSYSCALL_ADDR;
92 .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } 98 .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user
93 __vsyscall_0 = VSYSCALL_VIRT_ADDR; 99 __vsyscall_0 = VSYSCALL_VIRT_ADDR;
94 100
95 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 101 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
@@ -99,6 +105,9 @@ SECTIONS
99 .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } 105 .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) }
100 vxtime = VVIRT(.vxtime); 106 vxtime = VVIRT(.vxtime);
101 107
108 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
109 vgetcpu_mode = VVIRT(.vgetcpu_mode);
110
102 .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } 111 .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) }
103 wall_jiffies = VVIRT(.wall_jiffies); 112 wall_jiffies = VVIRT(.wall_jiffies);
104 113
@@ -132,7 +141,7 @@ SECTIONS
132 . = ALIGN(8192); /* init_task */ 141 . = ALIGN(8192); /* init_task */
133 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { 142 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
134 *(.data.init_task) 143 *(.data.init_task)
135 } 144 } :data
136 145
137 . = ALIGN(4096); 146 . = ALIGN(4096);
138 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { 147 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
@@ -207,14 +216,12 @@ SECTIONS
207 __initramfs_start = .; 216 __initramfs_start = .;
208 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } 217 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
209 __initramfs_end = .; 218 __initramfs_end = .;
210 /* temporary here to work around NR_CPUS. If you see this comment in 2.6.17+ 219 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
211 complain */
212 . = ALIGN(4096);
213 __init_end = .;
214 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
215 __per_cpu_start = .; 220 __per_cpu_start = .;
216 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } 221 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
217 __per_cpu_end = .; 222 __per_cpu_end = .;
223 . = ALIGN(4096);
224 __init_end = .;
218 225
219 . = ALIGN(4096); 226 . = ALIGN(4096);
220 __nosave_begin = .; 227 __nosave_begin = .;
diff --git a/arch/x86_64/kernel/vsmp.c b/arch/x86_64/kernel/vsmp.c
index 92f70c74965f..044e852bd25e 100644
--- a/arch/x86_64/kernel/vsmp.c
+++ b/arch/x86_64/kernel/vsmp.c
@@ -20,6 +20,9 @@ static int __init vsmp_init(void)
20 void *address; 20 void *address;
21 unsigned int cap, ctl; 21 unsigned int cap, ctl;
22 22
23 if (!early_pci_allowed())
24 return 0;
25
23 /* Check if we are running on a ScaleMP vSMP box */ 26 /* Check if we are running on a ScaleMP vSMP box */
24 if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != PCI_VENDOR_ID_SCALEMP) || 27 if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != PCI_VENDOR_ID_SCALEMP) ||
25 (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) 28 (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != PCI_DEVICE_ID_SCALEMP_VSMP_CTL))
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index f603037df162..ac48c3857ddb 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -26,6 +26,7 @@
26#include <linux/seqlock.h> 26#include <linux/seqlock.h>
27#include <linux/jiffies.h> 27#include <linux/jiffies.h>
28#include <linux/sysctl.h> 28#include <linux/sysctl.h>
29#include <linux/getcpu.h>
29 30
30#include <asm/vsyscall.h> 31#include <asm/vsyscall.h>
31#include <asm/pgtable.h> 32#include <asm/pgtable.h>
@@ -33,11 +34,15 @@
33#include <asm/fixmap.h> 34#include <asm/fixmap.h>
34#include <asm/errno.h> 35#include <asm/errno.h>
35#include <asm/io.h> 36#include <asm/io.h>
37#include <asm/segment.h>
38#include <asm/desc.h>
39#include <asm/topology.h>
36 40
37#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 41#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
38 42
39int __sysctl_vsyscall __section_sysctl_vsyscall = 1; 43int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
40seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; 44seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
45int __vgetcpu_mode __section_vgetcpu_mode;
41 46
42#include <asm/unistd.h> 47#include <asm/unistd.h>
43 48
@@ -72,7 +77,8 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
72 __vxtime.tsc_quot) >> 32; 77 __vxtime.tsc_quot) >> 32;
73 /* See comment in x86_64 do_gettimeofday. */ 78 /* See comment in x86_64 do_gettimeofday. */
74 } else { 79 } else {
75 usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) - 80 usec += ((readl((void __iomem *)
81 fix_to_virt(VSYSCALL_HPET) + 0xf0) -
76 __vxtime.last) * __vxtime.quot) >> 32; 82 __vxtime.last) * __vxtime.quot) >> 32;
77 } 83 }
78 } while (read_seqretry(&__xtime_lock, sequence)); 84 } while (read_seqretry(&__xtime_lock, sequence));
@@ -127,9 +133,46 @@ time_t __vsyscall(1) vtime(time_t *t)
127 return __xtime.tv_sec; 133 return __xtime.tv_sec;
128} 134}
129 135
130long __vsyscall(2) venosys_0(void) 136/* Fast way to get current CPU and node.
137 This helps to do per node and per CPU caches in user space.
138 The result is not guaranteed without CPU affinity, but usually
139 works out because the scheduler tries to keep a thread on the same
140 CPU.
141
142 tcache must point to a two element sized long array.
143 All arguments can be NULL. */
144long __vsyscall(2)
145vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
131{ 146{
132 return -ENOSYS; 147 unsigned int dummy, p;
148 unsigned long j = 0;
149
150 /* Fast cache - only recompute value once per jiffies and avoid
151 relatively costly rdtscp/cpuid otherwise.
152 This works because the scheduler usually keeps the process
153 on the same CPU and this syscall doesn't guarantee its
154 results anyways.
155 We do this here because otherwise user space would do it on
156 its own in a likely inferior way (no access to jiffies).
157 If you don't like it pass NULL. */
158 if (tcache && tcache->t0 == (j = __jiffies)) {
159 p = tcache->t1;
160 } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
161 /* Load per CPU data from RDTSCP */
162 rdtscp(dummy, dummy, p);
163 } else {
164 /* Load per CPU data from GDT */
165 asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
166 }
167 if (tcache) {
168 tcache->t0 = j;
169 tcache->t1 = p;
170 }
171 if (cpu)
172 *cpu = p & 0xfff;
173 if (node)
174 *node = p >> 12;
175 return 0;
133} 176}
134 177
135long __vsyscall(3) venosys_1(void) 178long __vsyscall(3) venosys_1(void)
@@ -149,7 +192,8 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
149 void __user *buffer, size_t *lenp, loff_t *ppos) 192 void __user *buffer, size_t *lenp, loff_t *ppos)
150{ 193{
151 extern u16 vsysc1, vsysc2; 194 extern u16 vsysc1, vsysc2;
152 u16 *map1, *map2; 195 u16 __iomem *map1;
196 u16 __iomem *map2;
153 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 197 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
154 if (!write) 198 if (!write)
155 return ret; 199 return ret;
@@ -164,11 +208,11 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
164 goto out; 208 goto out;
165 } 209 }
166 if (!sysctl_vsyscall) { 210 if (!sysctl_vsyscall) {
167 *map1 = SYSCALL; 211 writew(SYSCALL, map1);
168 *map2 = SYSCALL; 212 writew(SYSCALL, map2);
169 } else { 213 } else {
170 *map1 = NOP2; 214 writew(NOP2, map1);
171 *map2 = NOP2; 215 writew(NOP2, map2);
172 } 216 }
173 iounmap(map2); 217 iounmap(map2);
174out: 218out:
@@ -200,6 +244,43 @@ static ctl_table kernel_root_table2[] = {
200 244
201#endif 245#endif
202 246
247static void __cpuinit write_rdtscp_cb(void *info)
248{
249 write_rdtscp_aux((unsigned long)info);
250}
251
252void __cpuinit vsyscall_set_cpu(int cpu)
253{
254 unsigned long *d;
255 unsigned long node = 0;
256#ifdef CONFIG_NUMA
257 node = cpu_to_node[cpu];
258#endif
259 if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
260 void *info = (void *)((node << 12) | cpu);
261 /* Can happen on preemptive kernel */
262 if (get_cpu() == cpu)
263 write_rdtscp_cb(info);
264#ifdef CONFIG_SMP
265 else {
266 /* the notifier is unfortunately not executed on the
267 target CPU */
268 smp_call_function_single(cpu,write_rdtscp_cb,info,0,1);
269 }
270#endif
271 put_cpu();
272 }
273
274 /* Store cpu number in limit so that it can be loaded quickly
275 in user space in vgetcpu.
276 12 bits for the CPU and 8 bits for the node. */
277 d = (unsigned long *)(cpu_gdt(cpu) + GDT_ENTRY_PER_CPU);
278 *d = 0x0f40000000000ULL;
279 *d |= cpu;
280 *d |= (node & 0xf) << 12;
281 *d |= (node >> 4) << 48;
282}
283
203static void __init map_vsyscall(void) 284static void __init map_vsyscall(void)
204{ 285{
205 extern char __vsyscall_0; 286 extern char __vsyscall_0;
@@ -214,6 +295,7 @@ static int __init vsyscall_init(void)
214 VSYSCALL_ADDR(__NR_vgettimeofday))); 295 VSYSCALL_ADDR(__NR_vgettimeofday)));
215 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); 296 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
216 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); 297 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
298 BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
217 map_vsyscall(); 299 map_vsyscall();
218#ifdef CONFIG_SYSCTL 300#ifdef CONFIG_SYSCTL
219 register_sysctl_table(kernel_root_table2, 0); 301 register_sysctl_table(kernel_root_table2, 0);
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index 370952c4ff22..c3454af5e3a2 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -29,6 +29,7 @@ EXPORT_SYMBOL(__put_user_8);
29EXPORT_SYMBOL(copy_user_generic); 29EXPORT_SYMBOL(copy_user_generic);
30EXPORT_SYMBOL(copy_from_user); 30EXPORT_SYMBOL(copy_from_user);
31EXPORT_SYMBOL(copy_to_user); 31EXPORT_SYMBOL(copy_to_user);
32EXPORT_SYMBOL(__copy_from_user_inatomic);
32 33
33EXPORT_SYMBOL(copy_page); 34EXPORT_SYMBOL(copy_page);
34EXPORT_SYMBOL(clear_page); 35EXPORT_SYMBOL(clear_page);
diff --git a/arch/x86_64/lib/Makefile b/arch/x86_64/lib/Makefile
index ccef6ae747a3..b78d4170fce2 100644
--- a/arch/x86_64/lib/Makefile
+++ b/arch/x86_64/lib/Makefile
@@ -9,4 +9,4 @@ obj-y := io.o iomap_copy.o
9lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \ 9lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \
10 usercopy.o getuser.o putuser.o \ 10 usercopy.o getuser.o putuser.o \
11 thunk.o clear_page.o copy_page.o bitstr.o bitops.o 11 thunk.o clear_page.o copy_page.o bitstr.o bitops.o
12lib-y += memcpy.o memmove.o memset.o copy_user.o 12lib-y += memcpy.o memmove.o memset.o copy_user.o rwlock.o
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S
index 1f81b79b796c..9a10a78bb4a4 100644
--- a/arch/x86_64/lib/clear_page.S
+++ b/arch/x86_64/lib/clear_page.S
@@ -1,10 +1,22 @@
1#include <linux/linkage.h>
2#include <asm/dwarf2.h>
3
1/* 4/*
2 * Zero a page. 5 * Zero a page.
3 * rdi page 6 * rdi page
4 */ 7 */
5 .globl clear_page 8 ALIGN
6 .p2align 4 9clear_page_c:
7clear_page: 10 CFI_STARTPROC
11 movl $4096/8,%ecx
12 xorl %eax,%eax
13 rep stosq
14 ret
15 CFI_ENDPROC
16ENDPROC(clear_page)
17
18ENTRY(clear_page)
19 CFI_STARTPROC
8 xorl %eax,%eax 20 xorl %eax,%eax
9 movl $4096/64,%ecx 21 movl $4096/64,%ecx
10 .p2align 4 22 .p2align 4
@@ -23,28 +35,25 @@ clear_page:
23 jnz .Lloop 35 jnz .Lloop
24 nop 36 nop
25 ret 37 ret
26clear_page_end: 38 CFI_ENDPROC
39.Lclear_page_end:
40ENDPROC(clear_page)
27 41
28 /* Some CPUs run faster using the string instructions. 42 /* Some CPUs run faster using the string instructions.
29 It is also a lot simpler. Use this when possible */ 43 It is also a lot simpler. Use this when possible */
30 44
31#include <asm/cpufeature.h> 45#include <asm/cpufeature.h>
32 46
47 .section .altinstr_replacement,"ax"
481: .byte 0xeb /* jmp <disp8> */
49 .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
502:
51 .previous
33 .section .altinstructions,"a" 52 .section .altinstructions,"a"
34 .align 8 53 .align 8
35 .quad clear_page 54 .quad clear_page
36 .quad clear_page_c 55 .quad 1b
37 .byte X86_FEATURE_REP_GOOD 56 .byte X86_FEATURE_REP_GOOD
38 .byte clear_page_end-clear_page 57 .byte .Lclear_page_end - clear_page
39 .byte clear_page_c_end-clear_page_c 58 .byte 2b - 1b
40 .previous
41
42 .section .altinstr_replacement,"ax"
43clear_page_c:
44 movl $4096/8,%ecx
45 xorl %eax,%eax
46 rep
47 stosq
48 ret
49clear_page_c_end:
50 .previous 59 .previous
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S
index 8fa19d96a7ee..0ebb03b60e79 100644
--- a/arch/x86_64/lib/copy_page.S
+++ b/arch/x86_64/lib/copy_page.S
@@ -1,17 +1,33 @@
1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ 1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
2 2
3#include <linux/config.h>
4#include <linux/linkage.h>
5#include <asm/dwarf2.h>
6
7 ALIGN
8copy_page_c:
9 CFI_STARTPROC
10 movl $4096/8,%ecx
11 rep movsq
12 ret
13 CFI_ENDPROC
14ENDPROC(copy_page_c)
15
3/* Don't use streaming store because it's better when the target 16/* Don't use streaming store because it's better when the target
4 ends up in cache. */ 17 ends up in cache. */
5 18
6/* Could vary the prefetch distance based on SMP/UP */ 19/* Could vary the prefetch distance based on SMP/UP */
7 20
8 .globl copy_page 21ENTRY(copy_page)
9 .p2align 4 22 CFI_STARTPROC
10copy_page:
11 subq $3*8,%rsp 23 subq $3*8,%rsp
24 CFI_ADJUST_CFA_OFFSET 3*8
12 movq %rbx,(%rsp) 25 movq %rbx,(%rsp)
26 CFI_REL_OFFSET rbx, 0
13 movq %r12,1*8(%rsp) 27 movq %r12,1*8(%rsp)
28 CFI_REL_OFFSET r12, 1*8
14 movq %r13,2*8(%rsp) 29 movq %r13,2*8(%rsp)
30 CFI_REL_OFFSET r13, 2*8
15 31
16 movl $(4096/64)-5,%ecx 32 movl $(4096/64)-5,%ecx
17 .p2align 4 33 .p2align 4
@@ -72,30 +88,33 @@ copy_page:
72 jnz .Loop2 88 jnz .Loop2
73 89
74 movq (%rsp),%rbx 90 movq (%rsp),%rbx
91 CFI_RESTORE rbx
75 movq 1*8(%rsp),%r12 92 movq 1*8(%rsp),%r12
93 CFI_RESTORE r12
76 movq 2*8(%rsp),%r13 94 movq 2*8(%rsp),%r13
95 CFI_RESTORE r13
77 addq $3*8,%rsp 96 addq $3*8,%rsp
97 CFI_ADJUST_CFA_OFFSET -3*8
78 ret 98 ret
99.Lcopy_page_end:
100 CFI_ENDPROC
101ENDPROC(copy_page)
79 102
80 /* Some CPUs run faster using the string copy instructions. 103 /* Some CPUs run faster using the string copy instructions.
81 It is also a lot simpler. Use this when possible */ 104 It is also a lot simpler. Use this when possible */
82 105
83#include <asm/cpufeature.h> 106#include <asm/cpufeature.h>
84 107
108 .section .altinstr_replacement,"ax"
1091: .byte 0xeb /* jmp <disp8> */
110 .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
1112:
112 .previous
85 .section .altinstructions,"a" 113 .section .altinstructions,"a"
86 .align 8 114 .align 8
87 .quad copy_page 115 .quad copy_page
88 .quad copy_page_c 116 .quad 1b
89 .byte X86_FEATURE_REP_GOOD 117 .byte X86_FEATURE_REP_GOOD
90 .byte copy_page_c_end-copy_page_c 118 .byte .Lcopy_page_end - copy_page
91 .byte copy_page_c_end-copy_page_c 119 .byte 2b - 1b
92 .previous
93
94 .section .altinstr_replacement,"ax"
95copy_page_c:
96 movl $4096/8,%ecx
97 rep
98 movsq
99 ret
100copy_page_c_end:
101 .previous 120 .previous
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S
index f64569b83b54..70bebd310408 100644
--- a/arch/x86_64/lib/copy_user.S
+++ b/arch/x86_64/lib/copy_user.S
@@ -4,56 +4,78 @@
4 * Functions to copy from and to user space. 4 * Functions to copy from and to user space.
5 */ 5 */
6 6
7#include <linux/linkage.h>
8#include <asm/dwarf2.h>
9
7#define FIX_ALIGNMENT 1 10#define FIX_ALIGNMENT 1
8 11
9 #include <asm/current.h> 12#include <asm/current.h>
10 #include <asm/asm-offsets.h> 13#include <asm/asm-offsets.h>
11 #include <asm/thread_info.h> 14#include <asm/thread_info.h>
12 #include <asm/cpufeature.h> 15#include <asm/cpufeature.h>
13 16
14/* Standard copy_to_user with segment limit checking */ 17 .macro ALTERNATIVE_JUMP feature,orig,alt
15 .globl copy_to_user 180:
16 .p2align 4
17copy_to_user:
18 GET_THREAD_INFO(%rax)
19 movq %rdi,%rcx
20 addq %rdx,%rcx
21 jc bad_to_user
22 cmpq threadinfo_addr_limit(%rax),%rcx
23 jae bad_to_user
242:
25 .byte 0xe9 /* 32bit jump */ 19 .byte 0xe9 /* 32bit jump */
26 .long .Lcug-1f 20 .long \orig-1f /* by default jump to orig */
271: 211:
28
29 .section .altinstr_replacement,"ax" 22 .section .altinstr_replacement,"ax"
303: .byte 0xe9 /* replacement jmp with 8 bit immediate */ 232: .byte 0xe9 /* near jump with 32bit immediate */
31 .long copy_user_generic_c-1b /* offset */ 24 .long \alt-1b /* offset */ /* or alternatively to alt */
32 .previous 25 .previous
33 .section .altinstructions,"a" 26 .section .altinstructions,"a"
34 .align 8 27 .align 8
28 .quad 0b
35 .quad 2b 29 .quad 2b
36 .quad 3b 30 .byte \feature /* when feature is set */
37 .byte X86_FEATURE_REP_GOOD
38 .byte 5 31 .byte 5
39 .byte 5 32 .byte 5
40 .previous 33 .previous
34 .endm
35
36/* Standard copy_to_user with segment limit checking */
37ENTRY(copy_to_user)
38 CFI_STARTPROC
39 GET_THREAD_INFO(%rax)
40 movq %rdi,%rcx
41 addq %rdx,%rcx
42 jc bad_to_user
43 cmpq threadinfo_addr_limit(%rax),%rcx
44 jae bad_to_user
45 xorl %eax,%eax /* clear zero flag */
46 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
47 CFI_ENDPROC
48
49ENTRY(copy_user_generic)
50 CFI_STARTPROC
51 movl $1,%ecx /* set zero flag */
52 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
53 CFI_ENDPROC
54
55ENTRY(__copy_from_user_inatomic)
56 CFI_STARTPROC
57 xorl %ecx,%ecx /* clear zero flag */
58 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
59 CFI_ENDPROC
41 60
42/* Standard copy_from_user with segment limit checking */ 61/* Standard copy_from_user with segment limit checking */
43 .globl copy_from_user 62ENTRY(copy_from_user)
44 .p2align 4 63 CFI_STARTPROC
45copy_from_user:
46 GET_THREAD_INFO(%rax) 64 GET_THREAD_INFO(%rax)
47 movq %rsi,%rcx 65 movq %rsi,%rcx
48 addq %rdx,%rcx 66 addq %rdx,%rcx
49 jc bad_from_user 67 jc bad_from_user
50 cmpq threadinfo_addr_limit(%rax),%rcx 68 cmpq threadinfo_addr_limit(%rax),%rcx
51 jae bad_from_user 69 jae bad_from_user
52 /* FALL THROUGH to copy_user_generic */ 70 movl $1,%ecx /* set zero flag */
71 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
72 CFI_ENDPROC
73ENDPROC(copy_from_user)
53 74
54 .section .fixup,"ax" 75 .section .fixup,"ax"
55 /* must zero dest */ 76 /* must zero dest */
56bad_from_user: 77bad_from_user:
78 CFI_STARTPROC
57 movl %edx,%ecx 79 movl %edx,%ecx
58 xorl %eax,%eax 80 xorl %eax,%eax
59 rep 81 rep
@@ -61,40 +83,32 @@ bad_from_user:
61bad_to_user: 83bad_to_user:
62 movl %edx,%eax 84 movl %edx,%eax
63 ret 85 ret
86 CFI_ENDPROC
87END(bad_from_user)
64 .previous 88 .previous
65 89
66 90
67/* 91/*
68 * copy_user_generic - memory copy with exception handling. 92 * copy_user_generic_unrolled - memory copy with exception handling.
93 * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
69 * 94 *
70 * Input: 95 * Input:
71 * rdi destination 96 * rdi destination
72 * rsi source 97 * rsi source
73 * rdx count 98 * rdx count
99 * ecx zero flag -- if true zero destination on error
74 * 100 *
75 * Output: 101 * Output:
76 * eax uncopied bytes or 0 if successful. 102 * eax uncopied bytes or 0 if successful.
77 */ 103 */
78 .globl copy_user_generic 104ENTRY(copy_user_generic_unrolled)
79 .p2align 4 105 CFI_STARTPROC
80copy_user_generic:
81 .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */
82 .byte 0x66,0x90
831:
84 .section .altinstr_replacement,"ax"
852: .byte 0xe9 /* near jump with 32bit immediate */
86 .long copy_user_generic_c-1b /* offset */
87 .previous
88 .section .altinstructions,"a"
89 .align 8
90 .quad copy_user_generic
91 .quad 2b
92 .byte X86_FEATURE_REP_GOOD
93 .byte 5
94 .byte 5
95 .previous
96.Lcug:
97 pushq %rbx 106 pushq %rbx
107 CFI_ADJUST_CFA_OFFSET 8
108 CFI_REL_OFFSET rbx, 0
109 pushq %rcx
110 CFI_ADJUST_CFA_OFFSET 8
111 CFI_REL_OFFSET rcx, 0
98 xorl %eax,%eax /*zero for the exception handler */ 112 xorl %eax,%eax /*zero for the exception handler */
99 113
100#ifdef FIX_ALIGNMENT 114#ifdef FIX_ALIGNMENT
@@ -168,9 +182,16 @@ copy_user_generic:
168 decl %ecx 182 decl %ecx
169 jnz .Lloop_1 183 jnz .Lloop_1
170 184
185 CFI_REMEMBER_STATE
171.Lende: 186.Lende:
187 popq %rcx
188 CFI_ADJUST_CFA_OFFSET -8
189 CFI_RESTORE rcx
172 popq %rbx 190 popq %rbx
191 CFI_ADJUST_CFA_OFFSET -8
192 CFI_RESTORE rbx
173 ret 193 ret
194 CFI_RESTORE_STATE
174 195
175#ifdef FIX_ALIGNMENT 196#ifdef FIX_ALIGNMENT
176 /* align destination */ 197 /* align destination */
@@ -252,6 +273,8 @@ copy_user_generic:
252 addl %ecx,%edx 273 addl %ecx,%edx
253 /* edx: bytes to zero, rdi: dest, eax:zero */ 274 /* edx: bytes to zero, rdi: dest, eax:zero */
254.Lzero_rest: 275.Lzero_rest:
276 cmpl $0,(%rsp)
277 jz .Le_zero
255 movq %rdx,%rcx 278 movq %rdx,%rcx
256.Le_byte: 279.Le_byte:
257 xorl %eax,%eax 280 xorl %eax,%eax
@@ -261,6 +284,9 @@ copy_user_generic:
261.Le_zero: 284.Le_zero:
262 movq %rdx,%rax 285 movq %rdx,%rax
263 jmp .Lende 286 jmp .Lende
287 CFI_ENDPROC
288ENDPROC(copy_user_generic)
289
264 290
265 /* Some CPUs run faster using the string copy instructions. 291 /* Some CPUs run faster using the string copy instructions.
266 This is also a lot simpler. Use them when possible. 292 This is also a lot simpler. Use them when possible.
@@ -270,6 +296,7 @@ copy_user_generic:
270 /* rdi destination 296 /* rdi destination
271 * rsi source 297 * rsi source
272 * rdx count 298 * rdx count
299 * ecx zero flag
273 * 300 *
274 * Output: 301 * Output:
275 * eax uncopied bytes or 0 if successfull. 302 * eax uncopied bytes or 0 if successfull.
@@ -280,22 +307,48 @@ copy_user_generic:
280 * And more would be dangerous because both Intel and AMD have 307 * And more would be dangerous because both Intel and AMD have
281 * errata with rep movsq > 4GB. If someone feels the need to fix 308 * errata with rep movsq > 4GB. If someone feels the need to fix
282 * this please consider this. 309 * this please consider this.
283 */ 310 */
284copy_user_generic_c: 311ENTRY(copy_user_generic_string)
312 CFI_STARTPROC
313 movl %ecx,%r8d /* save zero flag */
285 movl %edx,%ecx 314 movl %edx,%ecx
286 shrl $3,%ecx 315 shrl $3,%ecx
287 andl $7,%edx 316 andl $7,%edx
317 jz 10f
2881: rep 3181: rep
289 movsq 319 movsq
290 movl %edx,%ecx 320 movl %edx,%ecx
2912: rep 3212: rep
292 movsb 322 movsb
2934: movl %ecx,%eax 3239: movl %ecx,%eax
294 ret 324 ret
2953: lea (%rdx,%rcx,8),%rax 325
326 /* multiple of 8 byte */
32710: rep
328 movsq
329 xor %eax,%eax
296 ret 330 ret
297 331
332 /* exception handling */
3333: lea (%rdx,%rcx,8),%rax /* exception on quad loop */
334 jmp 6f
3355: movl %ecx,%eax /* exception on byte loop */
336 /* eax: left over bytes */
3376: testl %r8d,%r8d /* zero flag set? */
338 jz 7f
339 movl %eax,%ecx /* initialize x86 loop counter */
340 push %rax
341 xorl %eax,%eax
3428: rep
343 stosb /* zero the rest */
34411: pop %rax
3457: ret
346 CFI_ENDPROC
347END(copy_user_generic_c)
348
298 .section __ex_table,"a" 349 .section __ex_table,"a"
299 .quad 1b,3b 350 .quad 1b,3b
300 .quad 2b,4b 351 .quad 2b,5b
352 .quad 8b,11b
353 .quad 10b,3b
301 .previous 354 .previous
diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S
index 72fd55ee896e..f0dba36578ea 100644
--- a/arch/x86_64/lib/csum-copy.S
+++ b/arch/x86_64/lib/csum-copy.S
@@ -5,8 +5,9 @@
5 * License. See the file COPYING in the main directory of this archive 5 * License. See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all. 6 * for more details. No warranty for anything given at all.
7 */ 7 */
8 #include <linux/linkage.h> 8#include <linux/linkage.h>
9 #include <asm/errno.h> 9#include <asm/dwarf2.h>
10#include <asm/errno.h>
10 11
11/* 12/*
12 * Checksum copy with exception handling. 13 * Checksum copy with exception handling.
@@ -53,19 +54,24 @@
53 .endm 54 .endm
54 55
55 56
56 .globl csum_partial_copy_generic 57ENTRY(csum_partial_copy_generic)
57 .p2align 4 58 CFI_STARTPROC
58csum_partial_copy_generic:
59 cmpl $3*64,%edx 59 cmpl $3*64,%edx
60 jle .Lignore 60 jle .Lignore
61 61
62.Lignore: 62.Lignore:
63 subq $7*8,%rsp 63 subq $7*8,%rsp
64 CFI_ADJUST_CFA_OFFSET 7*8
64 movq %rbx,2*8(%rsp) 65 movq %rbx,2*8(%rsp)
66 CFI_REL_OFFSET rbx, 2*8
65 movq %r12,3*8(%rsp) 67 movq %r12,3*8(%rsp)
68 CFI_REL_OFFSET r12, 3*8
66 movq %r14,4*8(%rsp) 69 movq %r14,4*8(%rsp)
70 CFI_REL_OFFSET r14, 4*8
67 movq %r13,5*8(%rsp) 71 movq %r13,5*8(%rsp)
72 CFI_REL_OFFSET r13, 5*8
68 movq %rbp,6*8(%rsp) 73 movq %rbp,6*8(%rsp)
74 CFI_REL_OFFSET rbp, 6*8
69 75
70 movq %r8,(%rsp) 76 movq %r8,(%rsp)
71 movq %r9,1*8(%rsp) 77 movq %r9,1*8(%rsp)
@@ -208,14 +214,22 @@ csum_partial_copy_generic:
208 addl %ebx,%eax 214 addl %ebx,%eax
209 adcl %r9d,%eax /* carry */ 215 adcl %r9d,%eax /* carry */
210 216
217 CFI_REMEMBER_STATE
211.Lende: 218.Lende:
212 movq 2*8(%rsp),%rbx 219 movq 2*8(%rsp),%rbx
220 CFI_RESTORE rbx
213 movq 3*8(%rsp),%r12 221 movq 3*8(%rsp),%r12
222 CFI_RESTORE r12
214 movq 4*8(%rsp),%r14 223 movq 4*8(%rsp),%r14
224 CFI_RESTORE r14
215 movq 5*8(%rsp),%r13 225 movq 5*8(%rsp),%r13
226 CFI_RESTORE r13
216 movq 6*8(%rsp),%rbp 227 movq 6*8(%rsp),%rbp
228 CFI_RESTORE rbp
217 addq $7*8,%rsp 229 addq $7*8,%rsp
230 CFI_ADJUST_CFA_OFFSET -7*8
218 ret 231 ret
232 CFI_RESTORE_STATE
219 233
220 /* Exception handlers. Very simple, zeroing is done in the wrappers */ 234 /* Exception handlers. Very simple, zeroing is done in the wrappers */
221.Lbad_source: 235.Lbad_source:
@@ -231,3 +245,5 @@ csum_partial_copy_generic:
231 jz .Lende 245 jz .Lende
232 movl $-EFAULT,(%rax) 246 movl $-EFAULT,(%rax)
233 jmp .Lende 247 jmp .Lende
248 CFI_ENDPROC
249ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S
index 3844d5e885a4..5448876261f8 100644
--- a/arch/x86_64/lib/getuser.S
+++ b/arch/x86_64/lib/getuser.S
@@ -27,25 +27,26 @@
27 */ 27 */
28 28
29#include <linux/linkage.h> 29#include <linux/linkage.h>
30#include <asm/dwarf2.h>
30#include <asm/page.h> 31#include <asm/page.h>
31#include <asm/errno.h> 32#include <asm/errno.h>
32#include <asm/asm-offsets.h> 33#include <asm/asm-offsets.h>
33#include <asm/thread_info.h> 34#include <asm/thread_info.h>
34 35
35 .text 36 .text
36 .p2align 4 37ENTRY(__get_user_1)
37.globl __get_user_1 38 CFI_STARTPROC
38__get_user_1:
39 GET_THREAD_INFO(%r8) 39 GET_THREAD_INFO(%r8)
40 cmpq threadinfo_addr_limit(%r8),%rcx 40 cmpq threadinfo_addr_limit(%r8),%rcx
41 jae bad_get_user 41 jae bad_get_user
421: movzb (%rcx),%edx 421: movzb (%rcx),%edx
43 xorl %eax,%eax 43 xorl %eax,%eax
44 ret 44 ret
45 CFI_ENDPROC
46ENDPROC(__get_user_1)
45 47
46 .p2align 4 48ENTRY(__get_user_2)
47.globl __get_user_2 49 CFI_STARTPROC
48__get_user_2:
49 GET_THREAD_INFO(%r8) 50 GET_THREAD_INFO(%r8)
50 addq $1,%rcx 51 addq $1,%rcx
51 jc 20f 52 jc 20f
@@ -57,10 +58,11 @@ __get_user_2:
57 ret 58 ret
5820: decq %rcx 5920: decq %rcx
59 jmp bad_get_user 60 jmp bad_get_user
61 CFI_ENDPROC
62ENDPROC(__get_user_2)
60 63
61 .p2align 4 64ENTRY(__get_user_4)
62.globl __get_user_4 65 CFI_STARTPROC
63__get_user_4:
64 GET_THREAD_INFO(%r8) 66 GET_THREAD_INFO(%r8)
65 addq $3,%rcx 67 addq $3,%rcx
66 jc 30f 68 jc 30f
@@ -72,10 +74,11 @@ __get_user_4:
72 ret 74 ret
7330: subq $3,%rcx 7530: subq $3,%rcx
74 jmp bad_get_user 76 jmp bad_get_user
77 CFI_ENDPROC
78ENDPROC(__get_user_4)
75 79
76 .p2align 4 80ENTRY(__get_user_8)
77.globl __get_user_8 81 CFI_STARTPROC
78__get_user_8:
79 GET_THREAD_INFO(%r8) 82 GET_THREAD_INFO(%r8)
80 addq $7,%rcx 83 addq $7,%rcx
81 jc 40f 84 jc 40f
@@ -87,11 +90,16 @@ __get_user_8:
87 ret 90 ret
8840: subq $7,%rcx 9140: subq $7,%rcx
89 jmp bad_get_user 92 jmp bad_get_user
93 CFI_ENDPROC
94ENDPROC(__get_user_8)
90 95
91bad_get_user: 96bad_get_user:
97 CFI_STARTPROC
92 xorl %edx,%edx 98 xorl %edx,%edx
93 movq $(-EFAULT),%rax 99 movq $(-EFAULT),%rax
94 ret 100 ret
101 CFI_ENDPROC
102END(bad_get_user)
95 103
96.section __ex_table,"a" 104.section __ex_table,"a"
97 .quad 1b,bad_get_user 105 .quad 1b,bad_get_user
diff --git a/arch/x86_64/lib/iomap_copy.S b/arch/x86_64/lib/iomap_copy.S
index 8bbade5fea05..05a95e713da8 100644
--- a/arch/x86_64/lib/iomap_copy.S
+++ b/arch/x86_64/lib/iomap_copy.S
@@ -15,12 +15,16 @@
15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
16 */ 16 */
17 17
18#include <linux/linkage.h>
19#include <asm/dwarf2.h>
20
18/* 21/*
19 * override generic version in lib/iomap_copy.c 22 * override generic version in lib/iomap_copy.c
20 */ 23 */
21 .globl __iowrite32_copy 24ENTRY(__iowrite32_copy)
22 .p2align 4 25 CFI_STARTPROC
23__iowrite32_copy:
24 movl %edx,%ecx 26 movl %edx,%ecx
25 rep movsd 27 rep movsd
26 ret 28 ret
29 CFI_ENDPROC
30ENDPROC(__iowrite32_copy)
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S
index 5554948b5554..967b22fa7d07 100644
--- a/arch/x86_64/lib/memcpy.S
+++ b/arch/x86_64/lib/memcpy.S
@@ -1,6 +1,10 @@
1/* Copyright 2002 Andi Kleen */ 1/* Copyright 2002 Andi Kleen */
2 2
3 #include <asm/cpufeature.h> 3#include <linux/config.h>
4#include <linux/linkage.h>
5#include <asm/dwarf2.h>
6#include <asm/cpufeature.h>
7
4/* 8/*
5 * memcpy - Copy a memory block. 9 * memcpy - Copy a memory block.
6 * 10 *
@@ -13,12 +17,26 @@
13 * rax original destination 17 * rax original destination
14 */ 18 */
15 19
16 .globl __memcpy 20 ALIGN
17 .globl memcpy 21memcpy_c:
18 .p2align 4 22 CFI_STARTPROC
19__memcpy: 23 movq %rdi,%rax
20memcpy: 24 movl %edx,%ecx
25 shrl $3,%ecx
26 andl $7,%edx
27 rep movsq
28 movl %edx,%ecx
29 rep movsb
30 ret
31 CFI_ENDPROC
32ENDPROC(memcpy_c)
33
34ENTRY(__memcpy)
35ENTRY(memcpy)
36 CFI_STARTPROC
21 pushq %rbx 37 pushq %rbx
38 CFI_ADJUST_CFA_OFFSET 8
39 CFI_REL_OFFSET rbx, 0
22 movq %rdi,%rax 40 movq %rdi,%rax
23 41
24 movl %edx,%ecx 42 movl %edx,%ecx
@@ -86,36 +104,27 @@ memcpy:
86 104
87.Lende: 105.Lende:
88 popq %rbx 106 popq %rbx
107 CFI_ADJUST_CFA_OFFSET -8
108 CFI_RESTORE rbx
89 ret 109 ret
90.Lfinal: 110.Lfinal:
111 CFI_ENDPROC
112ENDPROC(memcpy)
113ENDPROC(__memcpy)
91 114
92 /* Some CPUs run faster using the string copy instructions. 115 /* Some CPUs run faster using the string copy instructions.
93 It is also a lot simpler. Use this when possible */ 116 It is also a lot simpler. Use this when possible */
94 117
118 .section .altinstr_replacement,"ax"
1191: .byte 0xeb /* jmp <disp8> */
120 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1212:
122 .previous
95 .section .altinstructions,"a" 123 .section .altinstructions,"a"
96 .align 8 124 .align 8
97 .quad memcpy 125 .quad memcpy
98 .quad memcpy_c 126 .quad 1b
99 .byte X86_FEATURE_REP_GOOD 127 .byte X86_FEATURE_REP_GOOD
100 .byte .Lfinal-memcpy 128 .byte .Lfinal - memcpy
101 .byte memcpy_c_end-memcpy_c 129 .byte 2b - 1b
102 .previous
103
104 .section .altinstr_replacement,"ax"
105 /* rdi destination
106 * rsi source
107 * rdx count
108 */
109memcpy_c:
110 movq %rdi,%rax
111 movl %edx,%ecx
112 shrl $3,%ecx
113 andl $7,%edx
114 rep
115 movsq
116 movl %edx,%ecx
117 rep
118 movsb
119 ret
120memcpy_c_end:
121 .previous 130 .previous
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S
index ad397f2c7de8..09ed1f6b0eaa 100644
--- a/arch/x86_64/lib/memset.S
+++ b/arch/x86_64/lib/memset.S
@@ -1,4 +1,9 @@
1/* Copyright 2002 Andi Kleen, SuSE Labs */ 1/* Copyright 2002 Andi Kleen, SuSE Labs */
2
3#include <linux/config.h>
4#include <linux/linkage.h>
5#include <asm/dwarf2.h>
6
2/* 7/*
3 * ISO C memset - set a memory block to a byte value. 8 * ISO C memset - set a memory block to a byte value.
4 * 9 *
@@ -8,11 +13,29 @@
8 * 13 *
9 * rax original destination 14 * rax original destination
10 */ 15 */
11 .globl __memset 16 ALIGN
12 .globl memset 17memset_c:
13 .p2align 4 18 CFI_STARTPROC
14memset: 19 movq %rdi,%r9
15__memset: 20 movl %edx,%r8d
21 andl $7,%r8d
22 movl %edx,%ecx
23 shrl $3,%ecx
24 /* expand byte value */
25 movzbl %sil,%esi
26 movabs $0x0101010101010101,%rax
27 mulq %rsi /* with rax, clobbers rdx */
28 rep stosq
29 movl %r8d,%ecx
30 rep stosb
31 movq %r9,%rax
32 ret
33 CFI_ENDPROC
34ENDPROC(memset_c)
35
36ENTRY(memset)
37ENTRY(__memset)
38 CFI_STARTPROC
16 movq %rdi,%r10 39 movq %rdi,%r10
17 movq %rdx,%r11 40 movq %rdx,%r11
18 41
@@ -25,6 +48,7 @@ __memset:
25 movl %edi,%r9d 48 movl %edi,%r9d
26 andl $7,%r9d 49 andl $7,%r9d
27 jnz .Lbad_alignment 50 jnz .Lbad_alignment
51 CFI_REMEMBER_STATE
28.Lafter_bad_alignment: 52.Lafter_bad_alignment:
29 53
30 movl %r11d,%ecx 54 movl %r11d,%ecx
@@ -75,6 +99,7 @@ __memset:
75 movq %r10,%rax 99 movq %r10,%rax
76 ret 100 ret
77 101
102 CFI_RESTORE_STATE
78.Lbad_alignment: 103.Lbad_alignment:
79 cmpq $7,%r11 104 cmpq $7,%r11
80 jbe .Lhandle_7 105 jbe .Lhandle_7
@@ -84,42 +109,26 @@ __memset:
84 addq %r8,%rdi 109 addq %r8,%rdi
85 subq %r8,%r11 110 subq %r8,%r11
86 jmp .Lafter_bad_alignment 111 jmp .Lafter_bad_alignment
112.Lfinal:
113 CFI_ENDPROC
114ENDPROC(memset)
115ENDPROC(__memset)
87 116
88 /* Some CPUs run faster using the string instructions. 117 /* Some CPUs run faster using the string instructions.
89 It is also a lot simpler. Use this when possible */ 118 It is also a lot simpler. Use this when possible */
90 119
91#include <asm/cpufeature.h> 120#include <asm/cpufeature.h>
92 121
122 .section .altinstr_replacement,"ax"
1231: .byte 0xeb /* jmp <disp8> */
124 .byte (memset_c - memset) - (2f - 1b) /* offset */
1252:
126 .previous
93 .section .altinstructions,"a" 127 .section .altinstructions,"a"
94 .align 8 128 .align 8
95 .quad memset 129 .quad memset
96 .quad memset_c 130 .quad 1b
97 .byte X86_FEATURE_REP_GOOD 131 .byte X86_FEATURE_REP_GOOD
98 .byte memset_c_end-memset_c 132 .byte .Lfinal - memset
99 .byte memset_c_end-memset_c 133 .byte 2b - 1b
100 .previous
101
102 .section .altinstr_replacement,"ax"
103 /* rdi destination
104 * rsi value
105 * rdx count
106 */
107memset_c:
108 movq %rdi,%r9
109 movl %edx,%r8d
110 andl $7,%r8d
111 movl %edx,%ecx
112 shrl $3,%ecx
113 /* expand byte value */
114 movzbl %sil,%esi
115 movabs $0x0101010101010101,%rax
116 mulq %rsi /* with rax, clobbers rdx */
117 rep
118 stosq
119 movl %r8d,%ecx
120 rep
121 stosb
122 movq %r9,%rax
123 ret
124memset_c_end:
125 .previous 134 .previous
diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S
index 7f5593974e2d..4989f5a8fa9b 100644
--- a/arch/x86_64/lib/putuser.S
+++ b/arch/x86_64/lib/putuser.S
@@ -25,25 +25,26 @@
25 */ 25 */
26 26
27#include <linux/linkage.h> 27#include <linux/linkage.h>
28#include <asm/dwarf2.h>
28#include <asm/page.h> 29#include <asm/page.h>
29#include <asm/errno.h> 30#include <asm/errno.h>
30#include <asm/asm-offsets.h> 31#include <asm/asm-offsets.h>
31#include <asm/thread_info.h> 32#include <asm/thread_info.h>
32 33
33 .text 34 .text
34 .p2align 4 35ENTRY(__put_user_1)
35.globl __put_user_1 36 CFI_STARTPROC
36__put_user_1:
37 GET_THREAD_INFO(%r8) 37 GET_THREAD_INFO(%r8)
38 cmpq threadinfo_addr_limit(%r8),%rcx 38 cmpq threadinfo_addr_limit(%r8),%rcx
39 jae bad_put_user 39 jae bad_put_user
401: movb %dl,(%rcx) 401: movb %dl,(%rcx)
41 xorl %eax,%eax 41 xorl %eax,%eax
42 ret 42 ret
43 CFI_ENDPROC
44ENDPROC(__put_user_1)
43 45
44 .p2align 4 46ENTRY(__put_user_2)
45.globl __put_user_2 47 CFI_STARTPROC
46__put_user_2:
47 GET_THREAD_INFO(%r8) 48 GET_THREAD_INFO(%r8)
48 addq $1,%rcx 49 addq $1,%rcx
49 jc 20f 50 jc 20f
@@ -55,10 +56,11 @@ __put_user_2:
55 ret 56 ret
5620: decq %rcx 5720: decq %rcx
57 jmp bad_put_user 58 jmp bad_put_user
59 CFI_ENDPROC
60ENDPROC(__put_user_2)
58 61
59 .p2align 4 62ENTRY(__put_user_4)
60.globl __put_user_4 63 CFI_STARTPROC
61__put_user_4:
62 GET_THREAD_INFO(%r8) 64 GET_THREAD_INFO(%r8)
63 addq $3,%rcx 65 addq $3,%rcx
64 jc 30f 66 jc 30f
@@ -70,10 +72,11 @@ __put_user_4:
70 ret 72 ret
7130: subq $3,%rcx 7330: subq $3,%rcx
72 jmp bad_put_user 74 jmp bad_put_user
75 CFI_ENDPROC
76ENDPROC(__put_user_4)
73 77
74 .p2align 4 78ENTRY(__put_user_8)
75.globl __put_user_8 79 CFI_STARTPROC
76__put_user_8:
77 GET_THREAD_INFO(%r8) 80 GET_THREAD_INFO(%r8)
78 addq $7,%rcx 81 addq $7,%rcx
79 jc 40f 82 jc 40f
@@ -85,10 +88,15 @@ __put_user_8:
85 ret 88 ret
8640: subq $7,%rcx 8940: subq $7,%rcx
87 jmp bad_put_user 90 jmp bad_put_user
91 CFI_ENDPROC
92ENDPROC(__put_user_8)
88 93
89bad_put_user: 94bad_put_user:
95 CFI_STARTPROC
90 movq $(-EFAULT),%rax 96 movq $(-EFAULT),%rax
91 ret 97 ret
98 CFI_ENDPROC
99END(bad_put_user)
92 100
93.section __ex_table,"a" 101.section __ex_table,"a"
94 .quad 1b,bad_put_user 102 .quad 1b,bad_put_user
diff --git a/arch/x86_64/lib/rwlock.S b/arch/x86_64/lib/rwlock.S
new file mode 100644
index 000000000000..0cde1f807314
--- /dev/null
+++ b/arch/x86_64/lib/rwlock.S
@@ -0,0 +1,38 @@
1/* Slow paths of read/write spinlocks. */
2
3#include <linux/linkage.h>
4#include <asm/rwlock.h>
5#include <asm/alternative-asm.i>
6#include <asm/dwarf2.h>
7
8/* rdi: pointer to rwlock_t */
9ENTRY(__write_lock_failed)
10 CFI_STARTPROC
11 LOCK_PREFIX
12 addl $RW_LOCK_BIAS,(%rdi)
131: rep
14 nop
15 cmpl $RW_LOCK_BIAS,(%rdi)
16 jne 1b
17 LOCK_PREFIX
18 subl $RW_LOCK_BIAS,(%rdi)
19 jnz __write_lock_failed
20 ret
21 CFI_ENDPROC
22END(__write_lock_failed)
23
24/* rdi: pointer to rwlock_t */
25ENTRY(__read_lock_failed)
26 CFI_STARTPROC
27 LOCK_PREFIX
28 incl (%rdi)
291: rep
30 nop
31 cmpl $1,(%rdi)
32 js 1b
33 LOCK_PREFIX
34 decl (%rdi)
35 js __read_lock_failed
36 ret
37 CFI_ENDPROC
38END(__read_lock_failed)
diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S
index 332ea5dff916..0025535cac8d 100644
--- a/arch/x86_64/lib/thunk.S
+++ b/arch/x86_64/lib/thunk.S
@@ -1,10 +1,9 @@
1 /* 1/*
2 * Save registers before calling assembly functions. This avoids 2 * Save registers before calling assembly functions. This avoids
3 * disturbance of register allocation in some inline assembly constructs. 3 * disturbance of register allocation in some inline assembly constructs.
4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs. 4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
5 * Subject to the GNU public license, v.2. No warranty of any kind. 5 * Subject to the GNU public license, v.2. No warranty of any kind.
6 * $Id: thunk.S,v 1.2 2002/03/13 20:06:58 ak Exp $ 6 */
7 */
8 7
9 #include <linux/config.h> 8 #include <linux/config.h>
10 #include <linux/linkage.h> 9 #include <linux/linkage.h>
@@ -67,33 +66,3 @@ restore_norax:
67 RESTORE_ARGS 1 66 RESTORE_ARGS 1
68 ret 67 ret
69 CFI_ENDPROC 68 CFI_ENDPROC
70
71#ifdef CONFIG_SMP
72/* Support for read/write spinlocks. */
73 .text
74/* rax: pointer to rwlock_t */
75ENTRY(__write_lock_failed)
76 lock
77 addl $RW_LOCK_BIAS,(%rax)
781: rep
79 nop
80 cmpl $RW_LOCK_BIAS,(%rax)
81 jne 1b
82 lock
83 subl $RW_LOCK_BIAS,(%rax)
84 jnz __write_lock_failed
85 ret
86
87/* rax: pointer to rwlock_t */
88ENTRY(__read_lock_failed)
89 lock
90 incl (%rax)
911: rep
92 nop
93 cmpl $1,(%rax)
94 js 1b
95 lock
96 decl (%rax)
97 js __read_lock_failed
98 ret
99#endif
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 4198798e1469..1a17b0733ab5 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -40,8 +40,7 @@
40#define PF_RSVD (1<<3) 40#define PF_RSVD (1<<3)
41#define PF_INSTR (1<<4) 41#define PF_INSTR (1<<4)
42 42
43#ifdef CONFIG_KPROBES 43static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
44ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
45 44
46/* Hook to register for page fault notifications */ 45/* Hook to register for page fault notifications */
47int register_page_fault_notifier(struct notifier_block *nb) 46int register_page_fault_notifier(struct notifier_block *nb)
@@ -49,11 +48,13 @@ int register_page_fault_notifier(struct notifier_block *nb)
49 vmalloc_sync_all(); 48 vmalloc_sync_all();
50 return atomic_notifier_chain_register(&notify_page_fault_chain, nb); 49 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
51} 50}
51EXPORT_SYMBOL_GPL(register_page_fault_notifier);
52 52
53int unregister_page_fault_notifier(struct notifier_block *nb) 53int unregister_page_fault_notifier(struct notifier_block *nb)
54{ 54{
55 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb); 55 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
56} 56}
57EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
57 58
58static inline int notify_page_fault(enum die_val val, const char *str, 59static inline int notify_page_fault(enum die_val val, const char *str,
59 struct pt_regs *regs, long err, int trap, int sig) 60 struct pt_regs *regs, long err, int trap, int sig)
@@ -67,13 +68,6 @@ static inline int notify_page_fault(enum die_val val, const char *str,
67 }; 68 };
68 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args); 69 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
69} 70}
70#else
71static inline int notify_page_fault(enum die_val val, const char *str,
72 struct pt_regs *regs, long err, int trap, int sig)
73{
74 return NOTIFY_DONE;
75}
76#endif
77 71
78void bust_spinlocks(int yes) 72void bust_spinlocks(int yes)
79{ 73{
@@ -102,7 +96,7 @@ void bust_spinlocks(int yes)
102static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, 96static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
103 unsigned long error_code) 97 unsigned long error_code)
104{ 98{
105 unsigned char *instr; 99 unsigned char __user *instr;
106 int scan_more = 1; 100 int scan_more = 1;
107 int prefetch = 0; 101 int prefetch = 0;
108 unsigned char *max_instr; 102 unsigned char *max_instr;
@@ -111,7 +105,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
111 if (error_code & PF_INSTR) 105 if (error_code & PF_INSTR)
112 return 0; 106 return 0;
113 107
114 instr = (unsigned char *)convert_rip_to_linear(current, regs); 108 instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
115 max_instr = instr + 15; 109 max_instr = instr + 15;
116 110
117 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) 111 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
@@ -122,7 +116,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
122 unsigned char instr_hi; 116 unsigned char instr_hi;
123 unsigned char instr_lo; 117 unsigned char instr_lo;
124 118
125 if (__get_user(opcode, instr)) 119 if (__get_user(opcode, (char __user *)instr))
126 break; 120 break;
127 121
128 instr_hi = opcode & 0xf0; 122 instr_hi = opcode & 0xf0;
@@ -160,7 +154,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
160 case 0x00: 154 case 0x00:
161 /* Prefetch instruction is 0x0F0D or 0x0F18 */ 155 /* Prefetch instruction is 0x0F0D or 0x0F18 */
162 scan_more = 0; 156 scan_more = 0;
163 if (__get_user(opcode, instr)) 157 if (__get_user(opcode, (char __user *)instr))
164 break; 158 break;
165 prefetch = (instr_lo == 0xF) && 159 prefetch = (instr_lo == 0xF) &&
166 (opcode == 0x0D || opcode == 0x18); 160 (opcode == 0x0D || opcode == 0x18);
@@ -176,7 +170,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
176static int bad_address(void *p) 170static int bad_address(void *p)
177{ 171{
178 unsigned long dummy; 172 unsigned long dummy;
179 return __get_user(dummy, (unsigned long *)p); 173 return __get_user(dummy, (unsigned long __user *)p);
180} 174}
181 175
182void dump_pagetable(unsigned long address) 176void dump_pagetable(unsigned long address)
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 52fd42c40c86..1e4669fa5734 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -229,7 +229,6 @@ __init void *early_ioremap(unsigned long addr, unsigned long size)
229 229
230 /* actually usually some more */ 230 /* actually usually some more */
231 if (size >= LARGE_PAGE_SIZE) { 231 if (size >= LARGE_PAGE_SIZE) {
232 printk("SMBIOS area too long %lu\n", size);
233 return NULL; 232 return NULL;
234 } 233 }
235 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); 234 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
@@ -250,12 +249,13 @@ __init void early_iounmap(void *addr, unsigned long size)
250} 249}
251 250
252static void __meminit 251static void __meminit
253phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) 252phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
254{ 253{
255 int i; 254 int i = pmd_index(address);
256 255
257 for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) { 256 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
258 unsigned long entry; 257 unsigned long entry;
258 pmd_t *pmd = pmd_page + pmd_index(address);
259 259
260 if (address >= end) { 260 if (address >= end) {
261 if (!after_bootmem) 261 if (!after_bootmem)
@@ -263,6 +263,10 @@ phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
263 set_pmd(pmd, __pmd(0)); 263 set_pmd(pmd, __pmd(0));
264 break; 264 break;
265 } 265 }
266
267 if (pmd_val(*pmd))
268 continue;
269
266 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address; 270 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
267 entry &= __supported_pte_mask; 271 entry &= __supported_pte_mask;
268 set_pmd(pmd, __pmd(entry)); 272 set_pmd(pmd, __pmd(entry));
@@ -272,45 +276,41 @@ phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
272static void __meminit 276static void __meminit
273phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) 277phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
274{ 278{
275 pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address)); 279 pmd_t *pmd = pmd_offset(pud,0);
276 280 spin_lock(&init_mm.page_table_lock);
277 if (pmd_none(*pmd)) { 281 phys_pmd_init(pmd, address, end);
278 spin_lock(&init_mm.page_table_lock); 282 spin_unlock(&init_mm.page_table_lock);
279 phys_pmd_init(pmd, address, end); 283 __flush_tlb_all();
280 spin_unlock(&init_mm.page_table_lock);
281 __flush_tlb_all();
282 }
283} 284}
284 285
285static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) 286static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
286{ 287{
287 long i = pud_index(address); 288 int i = pud_index(addr);
288
289 pud = pud + i;
290 289
291 if (after_bootmem && pud_val(*pud)) {
292 phys_pmd_update(pud, address, end);
293 return;
294 }
295 290
296 for (; i < PTRS_PER_PUD; pud++, i++) { 291 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
297 int map; 292 int map;
298 unsigned long paddr, pmd_phys; 293 unsigned long pmd_phys;
294 pud_t *pud = pud_page + pud_index(addr);
299 pmd_t *pmd; 295 pmd_t *pmd;
300 296
301 paddr = (address & PGDIR_MASK) + i*PUD_SIZE; 297 if (addr >= end)
302 if (paddr >= end)
303 break; 298 break;
304 299
305 if (!after_bootmem && !e820_any_mapped(paddr, paddr+PUD_SIZE, 0)) { 300 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
306 set_pud(pud, __pud(0)); 301 set_pud(pud, __pud(0));
307 continue; 302 continue;
308 } 303 }
309 304
305 if (pud_val(*pud)) {
306 phys_pmd_update(pud, addr, end);
307 continue;
308 }
309
310 pmd = alloc_low_page(&map, &pmd_phys); 310 pmd = alloc_low_page(&map, &pmd_phys);
311 spin_lock(&init_mm.page_table_lock); 311 spin_lock(&init_mm.page_table_lock);
312 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); 312 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
313 phys_pmd_init(pmd, paddr, end); 313 phys_pmd_init(pmd, addr, end);
314 spin_unlock(&init_mm.page_table_lock); 314 spin_unlock(&init_mm.page_table_lock);
315 unmap_low_page(map); 315 unmap_low_page(map);
316 } 316 }
@@ -597,12 +597,6 @@ void __init mem_init(void)
597 597
598 pci_iommu_alloc(); 598 pci_iommu_alloc();
599 599
600 /* How many end-of-memory variables you have, grandma! */
601 max_low_pfn = end_pfn;
602 max_pfn = end_pfn;
603 num_physpages = end_pfn;
604 high_memory = (void *) __va(end_pfn * PAGE_SIZE);
605
606 /* clear the zero-page */ 600 /* clear the zero-page */
607 memset(empty_zero_page, 0, PAGE_SIZE); 601 memset(empty_zero_page, 0, PAGE_SIZE);
608 602
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
index 7c45c2d2b8b2..5cf594f9230d 100644
--- a/arch/x86_64/mm/k8topology.c
+++ b/arch/x86_64/mm/k8topology.c
@@ -54,6 +54,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
54 54
55 nodes_clear(nodes_parsed); 55 nodes_clear(nodes_parsed);
56 56
57 if (!early_pci_allowed())
58 return -1;
59
57 nb = find_northbridge(); 60 nb = find_northbridge();
58 if (nb < 0) 61 if (nb < 0)
59 return nb; 62 return nb;
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index b2fac14baac0..322bf45fc36a 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -225,7 +225,7 @@ void __init numa_init_array(void)
225int numa_fake __initdata = 0; 225int numa_fake __initdata = 0;
226 226
227/* Numa emulation */ 227/* Numa emulation */
228static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) 228static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
229{ 229{
230 int i; 230 int i;
231 struct bootnode nodes[MAX_NUMNODES]; 231 struct bootnode nodes[MAX_NUMNODES];
@@ -348,9 +348,10 @@ void __init paging_init(void)
348 } 348 }
349} 349}
350 350
351/* [numa=off] */ 351static __init int numa_setup(char *opt)
352__init int numa_setup(char *opt)
353{ 352{
353 if (!opt)
354 return -EINVAL;
354 if (!strncmp(opt,"off",3)) 355 if (!strncmp(opt,"off",3))
355 numa_off = 1; 356 numa_off = 1;
356#ifdef CONFIG_NUMA_EMU 357#ifdef CONFIG_NUMA_EMU
@@ -366,9 +367,11 @@ __init int numa_setup(char *opt)
366 if (!strncmp(opt,"hotadd=", 7)) 367 if (!strncmp(opt,"hotadd=", 7))
367 hotadd_percent = simple_strtoul(opt+7, NULL, 10); 368 hotadd_percent = simple_strtoul(opt+7, NULL, 10);
368#endif 369#endif
369 return 1; 370 return 0;
370} 371}
371 372
373early_param("numa", numa_setup);
374
372/* 375/*
373 * Setup early cpu_to_node. 376 * Setup early cpu_to_node.
374 * 377 *
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 2685b1f3671c..3e231d762aaa 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -108,8 +108,8 @@ static void revert_page(unsigned long address, pgprot_t ref_prot)
108 BUG_ON(pud_none(*pud)); 108 BUG_ON(pud_none(*pud));
109 pmd = pmd_offset(pud, address); 109 pmd = pmd_offset(pud, address);
110 BUG_ON(pmd_val(*pmd) & _PAGE_PSE); 110 BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
111 pgprot_val(ref_prot) |= _PAGE_PSE;
112 large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot); 111 large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
112 large_pte = pte_mkhuge(large_pte);
113 set_pte((pte_t *)pmd, large_pte); 113 set_pte((pte_t *)pmd, large_pte);
114} 114}
115 115
@@ -119,32 +119,28 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
119{ 119{
120 pte_t *kpte; 120 pte_t *kpte;
121 struct page *kpte_page; 121 struct page *kpte_page;
122 unsigned kpte_flags;
123 pgprot_t ref_prot2; 122 pgprot_t ref_prot2;
124 kpte = lookup_address(address); 123 kpte = lookup_address(address);
125 if (!kpte) return 0; 124 if (!kpte) return 0;
126 kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); 125 kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
127 kpte_flags = pte_val(*kpte);
128 if (pgprot_val(prot) != pgprot_val(ref_prot)) { 126 if (pgprot_val(prot) != pgprot_val(ref_prot)) {
129 if ((kpte_flags & _PAGE_PSE) == 0) { 127 if (!pte_huge(*kpte)) {
130 set_pte(kpte, pfn_pte(pfn, prot)); 128 set_pte(kpte, pfn_pte(pfn, prot));
131 } else { 129 } else {
132 /* 130 /*
133 * split_large_page will take the reference for this 131 * split_large_page will take the reference for this
134 * change_page_attr on the split page. 132 * change_page_attr on the split page.
135 */ 133 */
136
137 struct page *split; 134 struct page *split;
138 ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE)); 135 ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
139
140 split = split_large_page(address, prot, ref_prot2); 136 split = split_large_page(address, prot, ref_prot2);
141 if (!split) 137 if (!split)
142 return -ENOMEM; 138 return -ENOMEM;
143 set_pte(kpte,mk_pte(split, ref_prot2)); 139 set_pte(kpte, mk_pte(split, ref_prot2));
144 kpte_page = split; 140 kpte_page = split;
145 } 141 }
146 page_private(kpte_page)++; 142 page_private(kpte_page)++;
147 } else if ((kpte_flags & _PAGE_PSE) == 0) { 143 } else if (!pte_huge(*kpte)) {
148 set_pte(kpte, pfn_pte(pfn, ref_prot)); 144 set_pte(kpte, pfn_pte(pfn, ref_prot));
149 BUG_ON(page_private(kpte_page) == 0); 145 BUG_ON(page_private(kpte_page) == 0);
150 page_private(kpte_page)--; 146 page_private(kpte_page)--;
@@ -190,10 +186,12 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
190 * lowmem */ 186 * lowmem */
191 if (__pa(address) < KERNEL_TEXT_SIZE) { 187 if (__pa(address) < KERNEL_TEXT_SIZE) {
192 unsigned long addr2; 188 unsigned long addr2;
193 pgprot_t prot2 = prot; 189 pgprot_t prot2;
194 addr2 = __START_KERNEL_map + __pa(address); 190 addr2 = __START_KERNEL_map + __pa(address);
195 pgprot_val(prot2) &= ~_PAGE_NX; 191 /* Make sure the kernel mappings stay executable */
196 err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC); 192 prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
193 err = __change_page_attr(addr2, pfn, prot2,
194 PAGE_KERNEL_EXEC);
197 } 195 }
198 } 196 }
199 up_write(&init_mm.mmap_sem); 197 up_write(&init_mm.mmap_sem);
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 502fce65e96a..ca10701e7a90 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -21,6 +21,8 @@
21#include <asm/numa.h> 21#include <asm/numa.h>
22#include <asm/e820.h> 22#include <asm/e820.h>
23 23
24int acpi_numa __initdata;
25
24#if (defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \ 26#if (defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \
25 defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) \ 27 defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) \
26 && !defined(CONFIG_MEMORY_HOTPLUG) 28 && !defined(CONFIG_MEMORY_HOTPLUG)
diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile
index a3f6ad570179..1eb18f421edf 100644
--- a/arch/x86_64/pci/Makefile
+++ b/arch/x86_64/pci/Makefile
@@ -9,7 +9,7 @@ obj-y := i386.o
9obj-$(CONFIG_PCI_DIRECT)+= direct.o 9obj-$(CONFIG_PCI_DIRECT)+= direct.o
10obj-y += fixup.o init.o 10obj-y += fixup.o init.o
11obj-$(CONFIG_ACPI) += acpi.o 11obj-$(CONFIG_ACPI) += acpi.o
12obj-y += legacy.o irq.o common.o 12obj-y += legacy.o irq.o common.o early.o
13# mmconfig has a 64bit special 13# mmconfig has a 64bit special
14obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o 14obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o
15 15
@@ -23,3 +23,4 @@ common-y += ../../i386/pci/common.o
23fixup-y += ../../i386/pci/fixup.o 23fixup-y += ../../i386/pci/fixup.o
24i386-y += ../../i386/pci/i386.o 24i386-y += ../../i386/pci/i386.o
25init-y += ../../i386/pci/init.o 25init-y += ../../i386/pci/init.o
26early-y += ../../i386/pci/early.o
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index 3c55c76c6fd5..7732f4254d21 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -156,15 +156,45 @@ static __init void unreachable_devices(void)
156 addr = pci_dev_base(0, k, PCI_DEVFN(i, 0)); 156 addr = pci_dev_base(0, k, PCI_DEVFN(i, 0));
157 if (addr == NULL|| readl(addr) != val1) { 157 if (addr == NULL|| readl(addr) != val1) {
158 set_bit(i + 32*k, fallback_slots); 158 set_bit(i + 32*k, fallback_slots);
159 printk(KERN_NOTICE 159 printk(KERN_NOTICE "PCI: No mmconfig possible"
160 "PCI: No mmconfig possible on device %x:%x\n", 160 " on device %02x:%02x\n", k, i);
161 k, i);
162 } 161 }
163 } 162 }
164 } 163 }
165} 164}
166 165
167void __init pci_mmcfg_init(void) 166static __init void pci_mmcfg_insert_resources(void)
167{
168#define PCI_MMCFG_RESOURCE_NAME_LEN 19
169 int i;
170 struct resource *res;
171 char *names;
172 unsigned num_buses;
173
174 res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res),
175 pci_mmcfg_config_num, GFP_KERNEL);
176
177 if (!res) {
178 printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n");
179 return;
180 }
181
182 names = (void *)&res[pci_mmcfg_config_num];
183 for (i = 0; i < pci_mmcfg_config_num; i++, res++) {
184 num_buses = pci_mmcfg_config[i].end_bus_number -
185 pci_mmcfg_config[i].start_bus_number + 1;
186 res->name = names;
187 snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u",
188 pci_mmcfg_config[i].pci_segment_group_number);
189 res->start = pci_mmcfg_config[i].base_address;
190 res->end = res->start + (num_buses << 20) - 1;
191 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
192 insert_resource(&iomem_resource, res);
193 names += PCI_MMCFG_RESOURCE_NAME_LEN;
194 }
195}
196
197void __init pci_mmcfg_init(int type)
168{ 198{
169 int i; 199 int i;
170 200
@@ -177,7 +207,9 @@ void __init pci_mmcfg_init(void)
177 (pci_mmcfg_config[0].base_address == 0)) 207 (pci_mmcfg_config[0].base_address == 0))
178 return; 208 return;
179 209
180 if (!e820_all_mapped(pci_mmcfg_config[0].base_address, 210 /* Only do this check when type 1 works. If it doesn't work
211 assume we run on a Mac and always use MCFG */
212 if (type == 1 && !e820_all_mapped(pci_mmcfg_config[0].base_address,
181 pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN, 213 pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
182 E820_RESERVED)) { 214 E820_RESERVED)) {
183 printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n", 215 printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
@@ -186,7 +218,6 @@ void __init pci_mmcfg_init(void)
186 return; 218 return;
187 } 219 }
188 220
189 /* RED-PEN i386 doesn't do _nocache right now */
190 pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); 221 pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL);
191 if (pci_mmcfg_virt == NULL) { 222 if (pci_mmcfg_virt == NULL) {
192 printk("PCI: Can not allocate memory for mmconfig structures\n"); 223 printk("PCI: Can not allocate memory for mmconfig structures\n");
@@ -205,6 +236,7 @@ void __init pci_mmcfg_init(void)
205 } 236 }
206 237
207 unreachable_devices(); 238 unreachable_devices();
239 pci_mmcfg_insert_resources();
208 240
209 raw_pci_ops = &pci_mmcfg; 241 raw_pci_ops = &pci_mmcfg;
210 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; 242 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 8afba339f05a..58b0eb581114 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -868,8 +868,8 @@ int hpet_alloc(struct hpet_data *hdp)
868 do_div(temp, period); 868 do_div(temp, period);
869 hpetp->hp_tick_freq = temp; /* ticks per second */ 869 hpetp->hp_tick_freq = temp; /* ticks per second */
870 870
871 printk(KERN_INFO "hpet%d: at MMIO 0x%lx (virtual 0x%p), IRQ%s", 871 printk(KERN_INFO "hpet%d: at MMIO 0x%lx, IRQ%s",
872 hpetp->hp_which, hdp->hd_phys_address, hdp->hd_address, 872 hpetp->hp_which, hdp->hd_phys_address,
873 hpetp->hp_ntimer > 1 ? "s" : ""); 873 hpetp->hp_ntimer > 1 ? "s" : "");
874 for (i = 0; i < hpetp->hp_ntimer; i++) 874 for (i = 0; i < hpetp->hp_ntimer; i++)
875 printk("%s %d", i > 0 ? "," : "", hdp->hd_irq[i]); 875 printk("%s %d", i > 0 ? "," : "", hdp->hd_irq[i]);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 8ab027886034..590f4e6f505d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -955,13 +955,12 @@ static int __devinit pci_setup(char *str)
955 } 955 }
956 str = k; 956 str = k;
957 } 957 }
958 return 1; 958 return 0;
959} 959}
960early_param("pci", pci_setup);
960 961
961device_initcall(pci_init); 962device_initcall(pci_init);
962 963
963__setup("pci=", pci_setup);
964
965#if defined(CONFIG_ISA) || defined(CONFIG_EISA) 964#if defined(CONFIG_ISA) || defined(CONFIG_EISA)
966/* FIXME: Some boxes have multiple ISA bridges! */ 965/* FIXME: Some boxes have multiple ISA bridges! */
967struct pci_dev *isa_bridge; 966struct pci_dev *isa_bridge;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 64802aabd1ac..dfd8cfb7fb5d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -515,7 +515,8 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
515{ 515{
516 unsigned int random_variable = 0; 516 unsigned int random_variable = 0;
517 517
518 if (current->flags & PF_RANDOMIZE) { 518 if ((current->flags & PF_RANDOMIZE) &&
519 !(current->personality & ADDR_NO_RANDOMIZE)) {
519 random_variable = get_random_int() & STACK_RND_MASK; 520 random_variable = get_random_int() & STACK_RND_MASK;
520 random_variable <<= PAGE_SHIFT; 521 random_variable <<= PAGE_SHIFT;
521 } 522 }
diff --git a/fs/compat.c b/fs/compat.c
index e31e9cf96647..ce982f6e8c80 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1855,7 +1855,7 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1855 1855
1856 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec)); 1856 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
1857 1857
1858 if (tsp && !(current->personality & STICKY_TIMEOUTS)) { 1858 if (ret == 0 && tsp && !(current->personality & STICKY_TIMEOUTS)) {
1859 struct compat_timespec rts; 1859 struct compat_timespec rts;
1860 1860
1861 rts.tv_sec = timeout / HZ; 1861 rts.tv_sec = timeout / HZ;
@@ -1866,7 +1866,8 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1866 } 1866 }
1867 if (compat_timespec_compare(&rts, &ts) >= 0) 1867 if (compat_timespec_compare(&rts, &ts) >= 0)
1868 rts = ts; 1868 rts = ts;
1869 copy_to_user(tsp, &rts, sizeof(rts)); 1869 if (copy_to_user(tsp, &rts, sizeof(rts)))
1870 ret = -EFAULT;
1870 } 1871 }
1871 1872
1872 if (ret == -ERESTARTNOHAND) { 1873 if (ret == -ERESTARTNOHAND) {
diff --git a/include/asm-i386/acpi.h b/include/asm-i386/acpi.h
index 20f523954218..6016632d032f 100644
--- a/include/asm-i386/acpi.h
+++ b/include/asm-i386/acpi.h
@@ -131,21 +131,7 @@ static inline void disable_acpi(void)
131extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); 131extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
132 132
133#ifdef CONFIG_X86_IO_APIC 133#ifdef CONFIG_X86_IO_APIC
134extern int skip_ioapic_setup;
135extern int acpi_skip_timer_override; 134extern int acpi_skip_timer_override;
136
137static inline void disable_ioapic_setup(void)
138{
139 skip_ioapic_setup = 1;
140}
141
142static inline int ioapic_setup_disabled(void)
143{
144 return skip_ioapic_setup;
145}
146
147#else
148static inline void disable_ioapic_setup(void) { }
149#endif 135#endif
150 136
151static inline void acpi_noirq_set(void) { acpi_noirq = 1; } 137static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
diff --git a/include/asm-i386/alternative-asm.i b/include/asm-i386/alternative-asm.i
new file mode 100644
index 000000000000..6c47e3b9484b
--- /dev/null
+++ b/include/asm-i386/alternative-asm.i
@@ -0,0 +1,14 @@
1#include <linux/config.h>
2
3#ifdef CONFIG_SMP
4 .macro LOCK_PREFIX
51: lock
6 .section .smp_locks,"a"
7 .align 4
8 .long 1b
9 .previous
10 .endm
11#else
12 .macro LOCK_PREFIX
13 .endm
14#endif
diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h
index 2c1e371cebb6..3a42b7d6fc92 100644
--- a/include/asm-i386/apic.h
+++ b/include/asm-i386/apic.h
@@ -16,20 +16,8 @@
16#define APIC_VERBOSE 1 16#define APIC_VERBOSE 1
17#define APIC_DEBUG 2 17#define APIC_DEBUG 2
18 18
19extern int enable_local_apic;
20extern int apic_verbosity; 19extern int apic_verbosity;
21 20
22static inline void lapic_disable(void)
23{
24 enable_local_apic = -1;
25 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
26}
27
28static inline void lapic_enable(void)
29{
30 enable_local_apic = 1;
31}
32
33/* 21/*
34 * Define the default level of output to be very little 22 * Define the default level of output to be very little
35 * This can be turned up by using apic=verbose for more 23 * This can be turned up by using apic=verbose for more
@@ -42,6 +30,8 @@ static inline void lapic_enable(void)
42 } while (0) 30 } while (0)
43 31
44 32
33extern void generic_apic_probe(void);
34
45#ifdef CONFIG_X86_LOCAL_APIC 35#ifdef CONFIG_X86_LOCAL_APIC
46 36
47/* 37/*
@@ -117,8 +107,6 @@ extern void enable_APIC_timer(void);
117 107
118extern void enable_NMI_through_LVT0 (void * dummy); 108extern void enable_NMI_through_LVT0 (void * dummy);
119 109
120extern int disable_timer_pin_1;
121
122void smp_send_timer_broadcast_ipi(struct pt_regs *regs); 110void smp_send_timer_broadcast_ipi(struct pt_regs *regs);
123void switch_APIC_timer_to_ipi(void *cpumask); 111void switch_APIC_timer_to_ipi(void *cpumask);
124void switch_ipi_to_APIC_timer(void *cpumask); 112void switch_ipi_to_APIC_timer(void *cpumask);
diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h
index 89b8b82c82b3..5874ef119ffd 100644
--- a/include/asm-i386/desc.h
+++ b/include/asm-i386/desc.h
@@ -33,50 +33,99 @@ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
33 return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; 33 return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
34} 34}
35 35
36/*
37 * This is the ldt that every process will get unless we need
38 * something other than this.
39 */
40extern struct desc_struct default_ldt[];
41extern struct desc_struct idt_table[];
42extern void set_intr_gate(unsigned int irq, void * addr);
43
44static inline void pack_descriptor(__u32 *a, __u32 *b,
45 unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
46{
47 *a = ((base & 0xffff) << 16) | (limit & 0xffff);
48 *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
49 (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
50}
51
52static inline void pack_gate(__u32 *a, __u32 *b,
53 unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
54{
55 *a = (seg << 16) | (base & 0xffff);
56 *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
57}
58
59#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */
60#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */
61#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */
62#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */
63#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */
64#define DESCTYPE_DPL3 0x60 /* DPL-3 */
65#define DESCTYPE_S 0x10 /* !system */
66
36#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) 67#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
37#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)) 68#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
38 69
39#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) 70#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
40#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) 71#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
41#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr)) 72#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
42#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt)) 73#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
43 74
44#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr)) 75#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
45#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) 76#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
46#define store_tr(tr) __asm__ ("str %0":"=mr" (tr)) 77#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
47#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt)) 78#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
48 79
49/* 80#if TLS_SIZE != 24
50 * This is the ldt that every process will get unless we need 81# error update this code.
51 * something other than this. 82#endif
52 */
53extern struct desc_struct default_ldt[];
54extern void set_intr_gate(unsigned int irq, void * addr);
55 83
56#define _set_tssldt_desc(n,addr,limit,type) \ 84static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
57__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
58 "movw %w1,2(%2)\n\t" \
59 "rorl $16,%1\n\t" \
60 "movb %b1,4(%2)\n\t" \
61 "movb %4,5(%2)\n\t" \
62 "movb $0,6(%2)\n\t" \
63 "movb %h1,7(%2)\n\t" \
64 "rorl $16,%1" \
65 : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type))
66
67static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr)
68{ 85{
69 _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr, 86#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
70 offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89); 87 C(0); C(1); C(2);
88#undef C
71} 89}
72 90
73#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) 91static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
92{
93 __u32 *lp = (__u32 *)((char *)dt + entry*8);
94 *lp = entry_a;
95 *(lp+1) = entry_b;
96}
97
98#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
99#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
100#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
101
102static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
103{
104 __u32 a, b;
105 pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
106 write_idt_entry(idt_table, gate, a, b);
107}
74 108
75static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) 109static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
76{ 110{
77 _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); 111 __u32 a, b;
112 pack_descriptor(&a, &b, (unsigned long)addr,
113 offsetof(struct tss_struct, __cacheline_filler) - 1,
114 DESCTYPE_TSS, 0);
115 write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
78} 116}
79 117
118static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
119{
120 __u32 a, b;
121 pack_descriptor(&a, &b, (unsigned long)addr,
122 entries * sizeof(struct desc_struct) - 1,
123 DESCTYPE_LDT, 0);
124 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
125}
126
127#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
128
80#define LDT_entry_a(info) \ 129#define LDT_entry_a(info) \
81 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) 130 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
82 131
@@ -102,24 +151,6 @@ static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
102 (info)->seg_not_present == 1 && \ 151 (info)->seg_not_present == 1 && \
103 (info)->useable == 0 ) 152 (info)->useable == 0 )
104 153
105static inline void write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b)
106{
107 __u32 *lp = (__u32 *)((char *)ldt + entry*8);
108 *lp = entry_a;
109 *(lp+1) = entry_b;
110}
111
112#if TLS_SIZE != 24
113# error update this code.
114#endif
115
116static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
117{
118#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
119 C(0); C(1); C(2);
120#undef C
121}
122
123static inline void clear_LDT(void) 154static inline void clear_LDT(void)
124{ 155{
125 int cpu = get_cpu(); 156 int cpu = get_cpu();
diff --git a/include/asm-i386/dwarf2.h b/include/asm-i386/dwarf2.h
index 2280f6272f80..6d66398a307d 100644
--- a/include/asm-i386/dwarf2.h
+++ b/include/asm-i386/dwarf2.h
@@ -1,8 +1,6 @@
1#ifndef _DWARF2_H 1#ifndef _DWARF2_H
2#define _DWARF2_H 2#define _DWARF2_H
3 3
4#include <linux/config.h>
5
6#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
7#warning "asm/dwarf2.h should be only included in pure assembly files" 5#warning "asm/dwarf2.h should be only included in pure assembly files"
8#endif 6#endif
@@ -28,6 +26,13 @@
28#define CFI_RESTORE .cfi_restore 26#define CFI_RESTORE .cfi_restore
29#define CFI_REMEMBER_STATE .cfi_remember_state 27#define CFI_REMEMBER_STATE .cfi_remember_state
30#define CFI_RESTORE_STATE .cfi_restore_state 28#define CFI_RESTORE_STATE .cfi_restore_state
29#define CFI_UNDEFINED .cfi_undefined
30
31#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
32#define CFI_SIGNAL_FRAME .cfi_signal_frame
33#else
34#define CFI_SIGNAL_FRAME
35#endif
31 36
32#else 37#else
33 38
@@ -48,6 +53,8 @@
48#define CFI_RESTORE ignore 53#define CFI_RESTORE ignore
49#define CFI_REMEMBER_STATE ignore 54#define CFI_REMEMBER_STATE ignore
50#define CFI_RESTORE_STATE ignore 55#define CFI_RESTORE_STATE ignore
56#define CFI_UNDEFINED ignore
57#define CFI_SIGNAL_FRAME ignore
51 58
52#endif 59#endif
53 60
diff --git a/include/asm-i386/e820.h b/include/asm-i386/e820.h
index ca82acb8cb1f..f7514fb6e8e4 100644
--- a/include/asm-i386/e820.h
+++ b/include/asm-i386/e820.h
@@ -18,7 +18,7 @@
18 18
19#define E820_RAM 1 19#define E820_RAM 1
20#define E820_RESERVED 2 20#define E820_RESERVED 2
21#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */ 21#define E820_ACPI 3
22#define E820_NVS 4 22#define E820_NVS 4
23 23
24#define HIGH_MEMORY (1024*1024) 24#define HIGH_MEMORY (1024*1024)
diff --git a/include/asm-i386/frame.i b/include/asm-i386/frame.i
new file mode 100644
index 000000000000..4d68ddce18b6
--- /dev/null
+++ b/include/asm-i386/frame.i
@@ -0,0 +1,24 @@
1#include <linux/config.h>
2#include <asm/dwarf2.h>
3
4/* The annotation hides the frame from the unwinder and makes it look
5 like a ordinary ebp save/restore. This avoids some special cases for
6 frame pointer later */
7#ifdef CONFIG_FRAME_POINTER
8 .macro FRAME
9 pushl %ebp
10 CFI_ADJUST_CFA_OFFSET 4
11 CFI_REL_OFFSET ebp,0
12 movl %esp,%ebp
13 .endm
14 .macro ENDFRAME
15 popl %ebp
16 CFI_ADJUST_CFA_OFFSET -4
17 CFI_RESTORE ebp
18 .endm
19#else
20 .macro FRAME
21 .endm
22 .macro ENDFRAME
23 .endm
24#endif
diff --git a/include/asm-i386/genapic.h b/include/asm-i386/genapic.h
index b3783a32abee..8ffbb0f07457 100644
--- a/include/asm-i386/genapic.h
+++ b/include/asm-i386/genapic.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_GENAPIC_H 1#ifndef _ASM_GENAPIC_H
2#define _ASM_GENAPIC_H 1 2#define _ASM_GENAPIC_H 1
3 3
4#include <asm/mpspec.h>
5
4/* 6/*
5 * Generic APIC driver interface. 7 * Generic APIC driver interface.
6 * 8 *
@@ -63,14 +65,25 @@ struct genapic {
63 unsigned (*get_apic_id)(unsigned long x); 65 unsigned (*get_apic_id)(unsigned long x);
64 unsigned long apic_id_mask; 66 unsigned long apic_id_mask;
65 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); 67 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
66 68
69#ifdef CONFIG_SMP
67 /* ipi */ 70 /* ipi */
68 void (*send_IPI_mask)(cpumask_t mask, int vector); 71 void (*send_IPI_mask)(cpumask_t mask, int vector);
69 void (*send_IPI_allbutself)(int vector); 72 void (*send_IPI_allbutself)(int vector);
70 void (*send_IPI_all)(int vector); 73 void (*send_IPI_all)(int vector);
74#endif
71}; 75};
72 76
73#define APICFUNC(x) .x = x 77#define APICFUNC(x) .x = x,
78
79/* More functions could be probably marked IPIFUNC and save some space
80 in UP GENERICARCH kernels, but I don't have the nerve right now
81 to untangle this mess. -AK */
82#ifdef CONFIG_SMP
83#define IPIFUNC(x) APICFUNC(x)
84#else
85#define IPIFUNC(x)
86#endif
74 87
75#define APIC_INIT(aname, aprobe) { \ 88#define APIC_INIT(aname, aprobe) { \
76 .name = aname, \ 89 .name = aname, \
@@ -80,33 +93,33 @@ struct genapic {
80 .no_balance_irq = NO_BALANCE_IRQ, \ 93 .no_balance_irq = NO_BALANCE_IRQ, \
81 .ESR_DISABLE = esr_disable, \ 94 .ESR_DISABLE = esr_disable, \
82 .apic_destination_logical = APIC_DEST_LOGICAL, \ 95 .apic_destination_logical = APIC_DEST_LOGICAL, \
83 APICFUNC(apic_id_registered), \ 96 APICFUNC(apic_id_registered) \
84 APICFUNC(target_cpus), \ 97 APICFUNC(target_cpus) \
85 APICFUNC(check_apicid_used), \ 98 APICFUNC(check_apicid_used) \
86 APICFUNC(check_apicid_present), \ 99 APICFUNC(check_apicid_present) \
87 APICFUNC(init_apic_ldr), \ 100 APICFUNC(init_apic_ldr) \
88 APICFUNC(ioapic_phys_id_map), \ 101 APICFUNC(ioapic_phys_id_map) \
89 APICFUNC(clustered_apic_check), \ 102 APICFUNC(clustered_apic_check) \
90 APICFUNC(multi_timer_check), \ 103 APICFUNC(multi_timer_check) \
91 APICFUNC(apicid_to_node), \ 104 APICFUNC(apicid_to_node) \
92 APICFUNC(cpu_to_logical_apicid), \ 105 APICFUNC(cpu_to_logical_apicid) \
93 APICFUNC(cpu_present_to_apicid), \ 106 APICFUNC(cpu_present_to_apicid) \
94 APICFUNC(apicid_to_cpu_present), \ 107 APICFUNC(apicid_to_cpu_present) \
95 APICFUNC(mpc_apic_id), \ 108 APICFUNC(mpc_apic_id) \
96 APICFUNC(setup_portio_remap), \ 109 APICFUNC(setup_portio_remap) \
97 APICFUNC(check_phys_apicid_present), \ 110 APICFUNC(check_phys_apicid_present) \
98 APICFUNC(mpc_oem_bus_info), \ 111 APICFUNC(mpc_oem_bus_info) \
99 APICFUNC(mpc_oem_pci_bus), \ 112 APICFUNC(mpc_oem_pci_bus) \
100 APICFUNC(mps_oem_check), \ 113 APICFUNC(mps_oem_check) \
101 APICFUNC(get_apic_id), \ 114 APICFUNC(get_apic_id) \
102 .apic_id_mask = APIC_ID_MASK, \ 115 .apic_id_mask = APIC_ID_MASK, \
103 APICFUNC(cpu_mask_to_apicid), \ 116 APICFUNC(cpu_mask_to_apicid) \
104 APICFUNC(acpi_madt_oem_check), \ 117 APICFUNC(acpi_madt_oem_check) \
105 APICFUNC(send_IPI_mask), \ 118 IPIFUNC(send_IPI_mask) \
106 APICFUNC(send_IPI_allbutself), \ 119 IPIFUNC(send_IPI_allbutself) \
107 APICFUNC(send_IPI_all), \ 120 IPIFUNC(send_IPI_all) \
108 APICFUNC(enable_apic_mode), \ 121 APICFUNC(enable_apic_mode) \
109 APICFUNC(phys_pkg_id), \ 122 APICFUNC(phys_pkg_id) \
110 } 123 }
111 124
112extern struct genapic *genapic; 125extern struct genapic *genapic;
diff --git a/include/asm-i386/intel_arch_perfmon.h b/include/asm-i386/intel_arch_perfmon.h
index 134ea9cc5283..b52cd60a075b 100644
--- a/include/asm-i386/intel_arch_perfmon.h
+++ b/include/asm-i386/intel_arch_perfmon.h
@@ -14,6 +14,18 @@
14 14
15#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c) 15#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
16#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 16#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
17#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0) 17#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
18#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
19 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
20
21union cpuid10_eax {
22 struct {
23 unsigned int version_id:8;
24 unsigned int num_counters:8;
25 unsigned int bit_width:8;
26 unsigned int mask_length:8;
27 } split;
28 unsigned int full;
29};
18 30
19#endif /* X86_INTEL_ARCH_PERFMON_H */ 31#endif /* X86_INTEL_ARCH_PERFMON_H */
diff --git a/include/asm-i386/io_apic.h b/include/asm-i386/io_apic.h
index 5092e819b8a2..5d309275a1dc 100644
--- a/include/asm-i386/io_apic.h
+++ b/include/asm-i386/io_apic.h
@@ -188,6 +188,16 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
188/* 1 if "noapic" boot option passed */ 188/* 1 if "noapic" boot option passed */
189extern int skip_ioapic_setup; 189extern int skip_ioapic_setup;
190 190
191static inline void disable_ioapic_setup(void)
192{
193 skip_ioapic_setup = 1;
194}
195
196static inline int ioapic_setup_disabled(void)
197{
198 return skip_ioapic_setup;
199}
200
191/* 201/*
192 * If we use the IO-APIC for IRQ routing, disable automatic 202 * If we use the IO-APIC for IRQ routing, disable automatic
193 * assignment of PCI IRQ's. 203 * assignment of PCI IRQ's.
@@ -206,6 +216,7 @@ extern int (*ioapic_renumber_irq)(int ioapic, int irq);
206 216
207#else /* !CONFIG_X86_IO_APIC */ 217#else /* !CONFIG_X86_IO_APIC */
208#define io_apic_assign_pci_irqs 0 218#define io_apic_assign_pci_irqs 0
219static inline void disable_ioapic_setup(void) { }
209#endif 220#endif
210 221
211extern int assign_irq_vector(int irq); 222extern int assign_irq_vector(int irq);
diff --git a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h
index 53f0e06672dc..4dfc9f5ed031 100644
--- a/include/asm-i386/kexec.h
+++ b/include/asm-i386/kexec.h
@@ -1,6 +1,26 @@
1#ifndef _I386_KEXEC_H 1#ifndef _I386_KEXEC_H
2#define _I386_KEXEC_H 2#define _I386_KEXEC_H
3 3
4#define PA_CONTROL_PAGE 0
5#define VA_CONTROL_PAGE 1
6#define PA_PGD 2
7#define VA_PGD 3
8#define PA_PTE_0 4
9#define VA_PTE_0 5
10#define PA_PTE_1 6
11#define VA_PTE_1 7
12#ifdef CONFIG_X86_PAE
13#define PA_PMD_0 8
14#define VA_PMD_0 9
15#define PA_PMD_1 10
16#define VA_PMD_1 11
17#define PAGES_NR 12
18#else
19#define PAGES_NR 8
20#endif
21
22#ifndef __ASSEMBLY__
23
4#include <asm/fixmap.h> 24#include <asm/fixmap.h>
5#include <asm/ptrace.h> 25#include <asm/ptrace.h>
6#include <asm/string.h> 26#include <asm/string.h>
@@ -72,5 +92,12 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
72 newregs->eip = (unsigned long)current_text_addr(); 92 newregs->eip = (unsigned long)current_text_addr();
73 } 93 }
74} 94}
95asmlinkage NORET_TYPE void
96relocate_kernel(unsigned long indirection_page,
97 unsigned long control_page,
98 unsigned long start_address,
99 unsigned int has_pae) ATTRIB_NORET;
100
101#endif /* __ASSEMBLY__ */
75 102
76#endif /* _I386_KEXEC_H */ 103#endif /* _I386_KEXEC_H */
diff --git a/include/asm-i386/mach-es7000/mach_apic.h b/include/asm-i386/mach-es7000/mach_apic.h
index b5f3f0d0b2bc..26333685a7fb 100644
--- a/include/asm-i386/mach-es7000/mach_apic.h
+++ b/include/asm-i386/mach-es7000/mach_apic.h
@@ -123,9 +123,13 @@ extern u8 cpu_2_logical_apicid[];
123/* Mapping from cpu number to logical apicid */ 123/* Mapping from cpu number to logical apicid */
124static inline int cpu_to_logical_apicid(int cpu) 124static inline int cpu_to_logical_apicid(int cpu)
125{ 125{
126#ifdef CONFIG_SMP
126 if (cpu >= NR_CPUS) 127 if (cpu >= NR_CPUS)
127 return BAD_APICID; 128 return BAD_APICID;
128 return (int)cpu_2_logical_apicid[cpu]; 129 return (int)cpu_2_logical_apicid[cpu];
130#else
131 return logical_smp_processor_id();
132#endif
129} 133}
130 134
131static inline int mpc_apic_id(struct mpc_config_processor *m, struct mpc_config_translation *unused) 135static inline int mpc_apic_id(struct mpc_config_processor *m, struct mpc_config_translation *unused)
diff --git a/include/asm-i386/mach-summit/mach_apic.h b/include/asm-i386/mach-summit/mach_apic.h
index 9fd073286289..a81b05961595 100644
--- a/include/asm-i386/mach-summit/mach_apic.h
+++ b/include/asm-i386/mach-summit/mach_apic.h
@@ -46,10 +46,12 @@ extern u8 cpu_2_logical_apicid[];
46static inline void init_apic_ldr(void) 46static inline void init_apic_ldr(void)
47{ 47{
48 unsigned long val, id; 48 unsigned long val, id;
49 int i, count; 49 int count = 0;
50 u8 lid;
51 u8 my_id = (u8)hard_smp_processor_id(); 50 u8 my_id = (u8)hard_smp_processor_id();
52 u8 my_cluster = (u8)apicid_cluster(my_id); 51 u8 my_cluster = (u8)apicid_cluster(my_id);
52#ifdef CONFIG_SMP
53 u8 lid;
54 int i;
53 55
54 /* Create logical APIC IDs by counting CPUs already in cluster. */ 56 /* Create logical APIC IDs by counting CPUs already in cluster. */
55 for (count = 0, i = NR_CPUS; --i >= 0; ) { 57 for (count = 0, i = NR_CPUS; --i >= 0; ) {
@@ -57,6 +59,7 @@ static inline void init_apic_ldr(void)
57 if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) 59 if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster)
58 ++count; 60 ++count;
59 } 61 }
62#endif
60 /* We only have a 4 wide bitmap in cluster mode. If a deranged 63 /* We only have a 4 wide bitmap in cluster mode. If a deranged
61 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ 64 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
62 BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); 65 BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
@@ -91,9 +94,13 @@ static inline int apicid_to_node(int logical_apicid)
91/* Mapping from cpu number to logical apicid */ 94/* Mapping from cpu number to logical apicid */
92static inline int cpu_to_logical_apicid(int cpu) 95static inline int cpu_to_logical_apicid(int cpu)
93{ 96{
97#ifdef CONFIG_SMP
94 if (cpu >= NR_CPUS) 98 if (cpu >= NR_CPUS)
95 return BAD_APICID; 99 return BAD_APICID;
96 return (int)cpu_2_logical_apicid[cpu]; 100 return (int)cpu_2_logical_apicid[cpu];
101#else
102 return logical_smp_processor_id();
103#endif
97} 104}
98 105
99static inline int cpu_present_to_apicid(int mps_cpu) 106static inline int cpu_present_to_apicid(int mps_cpu)
diff --git a/include/asm-i386/mutex.h b/include/asm-i386/mutex.h
index 05a538531229..7a17d9e58ad6 100644
--- a/include/asm-i386/mutex.h
+++ b/include/asm-i386/mutex.h
@@ -30,14 +30,10 @@ do { \
30 \ 30 \
31 __asm__ __volatile__( \ 31 __asm__ __volatile__( \
32 LOCK_PREFIX " decl (%%eax) \n" \ 32 LOCK_PREFIX " decl (%%eax) \n" \
33 " js 2f \n" \ 33 " jns 1f \n" \
34 " call "#fail_fn" \n" \
34 "1: \n" \ 35 "1: \n" \
35 \ 36 \
36 LOCK_SECTION_START("") \
37 "2: call "#fail_fn" \n" \
38 " jmp 1b \n" \
39 LOCK_SECTION_END \
40 \
41 :"=a" (dummy) \ 37 :"=a" (dummy) \
42 : "a" (count) \ 38 : "a" (count) \
43 : "memory", "ecx", "edx"); \ 39 : "memory", "ecx", "edx"); \
@@ -86,14 +82,10 @@ do { \
86 \ 82 \
87 __asm__ __volatile__( \ 83 __asm__ __volatile__( \
88 LOCK_PREFIX " incl (%%eax) \n" \ 84 LOCK_PREFIX " incl (%%eax) \n" \
89 " jle 2f \n" \ 85 " jg 1f \n" \
86 " call "#fail_fn" \n" \
90 "1: \n" \ 87 "1: \n" \
91 \ 88 \
92 LOCK_SECTION_START("") \
93 "2: call "#fail_fn" \n" \
94 " jmp 1b \n" \
95 LOCK_SECTION_END \
96 \
97 :"=a" (dummy) \ 89 :"=a" (dummy) \
98 : "a" (count) \ 90 : "a" (count) \
99 : "memory", "ecx", "edx"); \ 91 : "memory", "ecx", "edx"); \
diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h
index 67d994799999..303bcd4592bb 100644
--- a/include/asm-i386/nmi.h
+++ b/include/asm-i386/nmi.h
@@ -6,32 +6,29 @@
6 6
7#include <linux/pm.h> 7#include <linux/pm.h>
8 8
9struct pt_regs;
10
11typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
12
13/** 9/**
14 * set_nmi_callback 10 * do_nmi_callback
15 * 11 *
16 * Set a handler for an NMI. Only one handler may be 12 * Check to see if a callback exists and execute it. Return 1
17 * set. Return 1 if the NMI was handled. 13 * if the handler exists and was handled successfully.
18 */ 14 */
19void set_nmi_callback(nmi_callback_t callback); 15int do_nmi_callback(struct pt_regs *regs, int cpu);
20 16
21/** 17extern int nmi_watchdog_enabled;
22 * unset_nmi_callback 18extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
23 * 19extern int avail_to_resrv_perfctr_nmi(unsigned int);
24 * Remove the handler previously set. 20extern int reserve_perfctr_nmi(unsigned int);
25 */ 21extern void release_perfctr_nmi(unsigned int);
26void unset_nmi_callback(void); 22extern int reserve_evntsel_nmi(unsigned int);
27 23extern void release_evntsel_nmi(unsigned int);
28extern void setup_apic_nmi_watchdog (void); 24
29extern int reserve_lapic_nmi(void); 25extern void setup_apic_nmi_watchdog (void *);
30extern void release_lapic_nmi(void); 26extern void stop_apic_nmi_watchdog (void *);
31extern void disable_timer_nmi_watchdog(void); 27extern void disable_timer_nmi_watchdog(void);
32extern void enable_timer_nmi_watchdog(void); 28extern void enable_timer_nmi_watchdog(void);
33extern void nmi_watchdog_tick (struct pt_regs * regs); 29extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
34 30
31extern atomic_t nmi_active;
35extern unsigned int nmi_watchdog; 32extern unsigned int nmi_watchdog;
36#define NMI_DEFAULT -1 33#define NMI_DEFAULT -1
37#define NMI_NONE 0 34#define NMI_NONE 0
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 0dc051a8078b..541b3e234335 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -411,8 +411,6 @@ extern pte_t *lookup_address(unsigned long address);
411 static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;} 411 static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
412#endif 412#endif
413 413
414extern void noexec_setup(const char *str);
415
416#if defined(CONFIG_HIGHPTE) 414#if defined(CONFIG_HIGHPTE)
417#define pte_offset_map(dir, address) \ 415#define pte_offset_map(dir, address) \
418 ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) 416 ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h
index 1910880fcd40..a4a0e5207db5 100644
--- a/include/asm-i386/ptrace.h
+++ b/include/asm-i386/ptrace.h
@@ -27,6 +27,7 @@ struct pt_regs {
27#ifdef __KERNEL__ 27#ifdef __KERNEL__
28 28
29#include <asm/vm86.h> 29#include <asm/vm86.h>
30#include <asm/segment.h>
30 31
31struct task_struct; 32struct task_struct;
32extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code); 33extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
@@ -40,18 +41,14 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int erro
40 */ 41 */
41static inline int user_mode(struct pt_regs *regs) 42static inline int user_mode(struct pt_regs *regs)
42{ 43{
43 return (regs->xcs & 3) != 0; 44 return (regs->xcs & SEGMENT_RPL_MASK) == USER_RPL;
44} 45}
45static inline int user_mode_vm(struct pt_regs *regs) 46static inline int user_mode_vm(struct pt_regs *regs)
46{ 47{
47 return ((regs->xcs & 3) | (regs->eflags & VM_MASK)) != 0; 48 return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL;
48} 49}
49#define instruction_pointer(regs) ((regs)->eip) 50#define instruction_pointer(regs) ((regs)->eip)
50#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
51extern unsigned long profile_pc(struct pt_regs *regs); 51extern unsigned long profile_pc(struct pt_regs *regs);
52#else
53#define profile_pc(regs) instruction_pointer(regs)
54#endif
55#endif /* __KERNEL__ */ 52#endif /* __KERNEL__ */
56 53
57#endif 54#endif
diff --git a/include/asm-i386/rwlock.h b/include/asm-i386/rwlock.h
index 87c069ccba08..c3e5db32fa48 100644
--- a/include/asm-i386/rwlock.h
+++ b/include/asm-i386/rwlock.h
@@ -20,52 +20,6 @@
20#define RW_LOCK_BIAS 0x01000000 20#define RW_LOCK_BIAS 0x01000000
21#define RW_LOCK_BIAS_STR "0x01000000" 21#define RW_LOCK_BIAS_STR "0x01000000"
22 22
23#define __build_read_lock_ptr(rw, helper) \ 23/* Code is in asm-i386/spinlock.h */
24 asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" \
25 "jns 1f\n" \
26 "call " helper "\n\t" \
27 "1:\n" \
28 ::"a" (rw) : "memory")
29
30#define __build_read_lock_const(rw, helper) \
31 asm volatile(LOCK_PREFIX " subl $1,%0\n\t" \
32 "jns 1f\n" \
33 "pushl %%eax\n\t" \
34 "leal %0,%%eax\n\t" \
35 "call " helper "\n\t" \
36 "popl %%eax\n\t" \
37 "1:\n" \
38 :"+m" (*(volatile int *)rw) : : "memory")
39
40#define __build_read_lock(rw, helper) do { \
41 if (__builtin_constant_p(rw)) \
42 __build_read_lock_const(rw, helper); \
43 else \
44 __build_read_lock_ptr(rw, helper); \
45 } while (0)
46
47#define __build_write_lock_ptr(rw, helper) \
48 asm volatile(LOCK_PREFIX " subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
49 "jz 1f\n" \
50 "call " helper "\n\t" \
51 "1:\n" \
52 ::"a" (rw) : "memory")
53
54#define __build_write_lock_const(rw, helper) \
55 asm volatile(LOCK_PREFIX " subl $" RW_LOCK_BIAS_STR ",%0\n\t" \
56 "jz 1f\n" \
57 "pushl %%eax\n\t" \
58 "leal %0,%%eax\n\t" \
59 "call " helper "\n\t" \
60 "popl %%eax\n\t" \
61 "1:\n" \
62 :"+m" (*(volatile int *)rw) : : "memory")
63
64#define __build_write_lock(rw, helper) do { \
65 if (__builtin_constant_p(rw)) \
66 __build_write_lock_const(rw, helper); \
67 else \
68 __build_write_lock_ptr(rw, helper); \
69 } while (0)
70 24
71#endif 25#endif
diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h
index 43113f5608eb..bc598d6388e3 100644
--- a/include/asm-i386/rwsem.h
+++ b/include/asm-i386/rwsem.h
@@ -99,17 +99,9 @@ static inline void __down_read(struct rw_semaphore *sem)
99 __asm__ __volatile__( 99 __asm__ __volatile__(
100 "# beginning down_read\n\t" 100 "# beginning down_read\n\t"
101LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ 101LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */
102 " js 2f\n\t" /* jump if we weren't granted the lock */ 102 " jns 1f\n"
103 " call call_rwsem_down_read_failed\n"
103 "1:\n\t" 104 "1:\n\t"
104 LOCK_SECTION_START("")
105 "2:\n\t"
106 " pushl %%ecx\n\t"
107 " pushl %%edx\n\t"
108 " call rwsem_down_read_failed\n\t"
109 " popl %%edx\n\t"
110 " popl %%ecx\n\t"
111 " jmp 1b\n"
112 LOCK_SECTION_END
113 "# ending down_read\n\t" 105 "# ending down_read\n\t"
114 : "+m" (sem->count) 106 : "+m" (sem->count)
115 : "a" (sem) 107 : "a" (sem)
@@ -151,15 +143,9 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
151 "# beginning down_write\n\t" 143 "# beginning down_write\n\t"
152LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ 144LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */
153 " testl %%edx,%%edx\n\t" /* was the count 0 before? */ 145 " testl %%edx,%%edx\n\t" /* was the count 0 before? */
154 " jnz 2f\n\t" /* jump if we weren't granted the lock */ 146 " jz 1f\n"
155 "1:\n\t" 147 " call call_rwsem_down_write_failed\n"
156 LOCK_SECTION_START("") 148 "1:\n"
157 "2:\n\t"
158 " pushl %%ecx\n\t"
159 " call rwsem_down_write_failed\n\t"
160 " popl %%ecx\n\t"
161 " jmp 1b\n"
162 LOCK_SECTION_END
163 "# ending down_write" 149 "# ending down_write"
164 : "+m" (sem->count), "=d" (tmp) 150 : "+m" (sem->count), "=d" (tmp)
165 : "a" (sem), "1" (tmp) 151 : "a" (sem), "1" (tmp)
@@ -193,17 +179,9 @@ static inline void __up_read(struct rw_semaphore *sem)
193 __asm__ __volatile__( 179 __asm__ __volatile__(
194 "# beginning __up_read\n\t" 180 "# beginning __up_read\n\t"
195LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ 181LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */
196 " js 2f\n\t" /* jump if the lock is being waited upon */ 182 " jns 1f\n\t"
197 "1:\n\t" 183 " call call_rwsem_wake\n"
198 LOCK_SECTION_START("") 184 "1:\n"
199 "2:\n\t"
200 " decw %%dx\n\t" /* do nothing if still outstanding active readers */
201 " jnz 1b\n\t"
202 " pushl %%ecx\n\t"
203 " call rwsem_wake\n\t"
204 " popl %%ecx\n\t"
205 " jmp 1b\n"
206 LOCK_SECTION_END
207 "# ending __up_read\n" 185 "# ending __up_read\n"
208 : "+m" (sem->count), "=d" (tmp) 186 : "+m" (sem->count), "=d" (tmp)
209 : "a" (sem), "1" (tmp) 187 : "a" (sem), "1" (tmp)
@@ -219,17 +197,9 @@ static inline void __up_write(struct rw_semaphore *sem)
219 "# beginning __up_write\n\t" 197 "# beginning __up_write\n\t"
220 " movl %2,%%edx\n\t" 198 " movl %2,%%edx\n\t"
221LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ 199LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
222 " jnz 2f\n\t" /* jump if the lock is being waited upon */ 200 " jz 1f\n"
201 " call call_rwsem_wake\n"
223 "1:\n\t" 202 "1:\n\t"
224 LOCK_SECTION_START("")
225 "2:\n\t"
226 " decw %%dx\n\t" /* did the active count reduce to 0? */
227 " jnz 1b\n\t" /* jump back if not */
228 " pushl %%ecx\n\t"
229 " call rwsem_wake\n\t"
230 " popl %%ecx\n\t"
231 " jmp 1b\n"
232 LOCK_SECTION_END
233 "# ending __up_write\n" 203 "# ending __up_write\n"
234 : "+m" (sem->count) 204 : "+m" (sem->count)
235 : "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS) 205 : "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS)
@@ -244,17 +214,9 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
244 __asm__ __volatile__( 214 __asm__ __volatile__(
245 "# beginning __downgrade_write\n\t" 215 "# beginning __downgrade_write\n\t"
246LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ 216LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
247 " js 2f\n\t" /* jump if the lock is being waited upon */ 217 " jns 1f\n\t"
218 " call call_rwsem_downgrade_wake\n"
248 "1:\n\t" 219 "1:\n\t"
249 LOCK_SECTION_START("")
250 "2:\n\t"
251 " pushl %%ecx\n\t"
252 " pushl %%edx\n\t"
253 " call rwsem_downgrade_wake\n\t"
254 " popl %%edx\n\t"
255 " popl %%ecx\n\t"
256 " jmp 1b\n"
257 LOCK_SECTION_END
258 "# ending __downgrade_write\n" 220 "# ending __downgrade_write\n"
259 : "+m" (sem->count) 221 : "+m" (sem->count)
260 : "a" (sem), "i" (-RWSEM_WAITING_BIAS) 222 : "a" (sem), "i" (-RWSEM_WAITING_BIAS)
diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h
index faf995307b9e..b7ab59685ba7 100644
--- a/include/asm-i386/segment.h
+++ b/include/asm-i386/segment.h
@@ -83,6 +83,11 @@
83 83
84#define GDT_SIZE (GDT_ENTRIES * 8) 84#define GDT_SIZE (GDT_ENTRIES * 8)
85 85
86/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
87#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
88/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
89#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
90
86/* Simple and small GDT entries for booting only */ 91/* Simple and small GDT entries for booting only */
87 92
88#define GDT_ENTRY_BOOT_CS 2 93#define GDT_ENTRY_BOOT_CS 2
@@ -112,4 +117,16 @@
112 */ 117 */
113#define IDT_ENTRIES 256 118#define IDT_ENTRIES 256
114 119
120/* Bottom two bits of selector give the ring privilege level */
121#define SEGMENT_RPL_MASK 0x3
122/* Bit 2 is table indicator (LDT/GDT) */
123#define SEGMENT_TI_MASK 0x4
124
125/* User mode is privilege level 3 */
126#define USER_RPL 0x3
127/* LDT segment has TI set, GDT has it cleared */
128#define SEGMENT_LDT 0x4
129#define SEGMENT_GDT 0x0
130
131#define get_kernel_rpl() 0
115#endif 132#endif
diff --git a/include/asm-i386/semaphore.h b/include/asm-i386/semaphore.h
index d51e800acf29..e63b6a68f04c 100644
--- a/include/asm-i386/semaphore.h
+++ b/include/asm-i386/semaphore.h
@@ -100,13 +100,10 @@ static inline void down(struct semaphore * sem)
100 __asm__ __volatile__( 100 __asm__ __volatile__(
101 "# atomic down operation\n\t" 101 "# atomic down operation\n\t"
102 LOCK_PREFIX "decl %0\n\t" /* --sem->count */ 102 LOCK_PREFIX "decl %0\n\t" /* --sem->count */
103 "js 2f\n" 103 "jns 2f\n"
104 "1:\n" 104 "\tlea %0,%%eax\n\t"
105 LOCK_SECTION_START("") 105 "call __down_failed\n"
106 "2:\tlea %0,%%eax\n\t" 106 "2:"
107 "call __down_failed\n\t"
108 "jmp 1b\n"
109 LOCK_SECTION_END
110 :"+m" (sem->count) 107 :"+m" (sem->count)
111 : 108 :
112 :"memory","ax"); 109 :"memory","ax");
@@ -123,15 +120,12 @@ static inline int down_interruptible(struct semaphore * sem)
123 might_sleep(); 120 might_sleep();
124 __asm__ __volatile__( 121 __asm__ __volatile__(
125 "# atomic interruptible down operation\n\t" 122 "# atomic interruptible down operation\n\t"
123 "xorl %0,%0\n\t"
126 LOCK_PREFIX "decl %1\n\t" /* --sem->count */ 124 LOCK_PREFIX "decl %1\n\t" /* --sem->count */
127 "js 2f\n\t" 125 "jns 2f\n\t"
128 "xorl %0,%0\n" 126 "lea %1,%%eax\n\t"
129 "1:\n" 127 "call __down_failed_interruptible\n"
130 LOCK_SECTION_START("") 128 "2:"
131 "2:\tlea %1,%%eax\n\t"
132 "call __down_failed_interruptible\n\t"
133 "jmp 1b\n"
134 LOCK_SECTION_END
135 :"=a" (result), "+m" (sem->count) 129 :"=a" (result), "+m" (sem->count)
136 : 130 :
137 :"memory"); 131 :"memory");
@@ -148,15 +142,12 @@ static inline int down_trylock(struct semaphore * sem)
148 142
149 __asm__ __volatile__( 143 __asm__ __volatile__(
150 "# atomic interruptible down operation\n\t" 144 "# atomic interruptible down operation\n\t"
145 "xorl %0,%0\n\t"
151 LOCK_PREFIX "decl %1\n\t" /* --sem->count */ 146 LOCK_PREFIX "decl %1\n\t" /* --sem->count */
152 "js 2f\n\t" 147 "jns 2f\n\t"
153 "xorl %0,%0\n" 148 "lea %1,%%eax\n\t"
154 "1:\n"
155 LOCK_SECTION_START("")
156 "2:\tlea %1,%%eax\n\t"
157 "call __down_failed_trylock\n\t" 149 "call __down_failed_trylock\n\t"
158 "jmp 1b\n" 150 "2:\n"
159 LOCK_SECTION_END
160 :"=a" (result), "+m" (sem->count) 151 :"=a" (result), "+m" (sem->count)
161 : 152 :
162 :"memory"); 153 :"memory");
@@ -166,22 +157,16 @@ static inline int down_trylock(struct semaphore * sem)
166/* 157/*
167 * Note! This is subtle. We jump to wake people up only if 158 * Note! This is subtle. We jump to wake people up only if
168 * the semaphore was negative (== somebody was waiting on it). 159 * the semaphore was negative (== somebody was waiting on it).
169 * The default case (no contention) will result in NO
170 * jumps for both down() and up().
171 */ 160 */
172static inline void up(struct semaphore * sem) 161static inline void up(struct semaphore * sem)
173{ 162{
174 __asm__ __volatile__( 163 __asm__ __volatile__(
175 "# atomic up operation\n\t" 164 "# atomic up operation\n\t"
176 LOCK_PREFIX "incl %0\n\t" /* ++sem->count */ 165 LOCK_PREFIX "incl %0\n\t" /* ++sem->count */
177 "jle 2f\n" 166 "jg 1f\n\t"
178 "1:\n" 167 "lea %0,%%eax\n\t"
179 LOCK_SECTION_START("") 168 "call __up_wakeup\n"
180 "2:\tlea %0,%%eax\n\t" 169 "1:"
181 "call __up_wakeup\n\t"
182 "jmp 1b\n"
183 LOCK_SECTION_END
184 ".subsection 0\n"
185 :"+m" (sem->count) 170 :"+m" (sem->count)
186 : 171 :
187 :"memory","ax"); 172 :"memory","ax");
diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h
index 142d10e34ade..32ac8c91d5c5 100644
--- a/include/asm-i386/smp.h
+++ b/include/asm-i386/smp.h
@@ -80,17 +80,12 @@ static inline int hard_smp_processor_id(void)
80 return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); 80 return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
81} 81}
82#endif 82#endif
83
84static __inline int logical_smp_processor_id(void)
85{
86 /* we don't want to mark this access volatile - bad code generation */
87 return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
88}
89
90#endif 83#endif
91 84
92extern int __cpu_disable(void); 85extern int __cpu_disable(void);
93extern void __cpu_die(unsigned int cpu); 86extern void __cpu_die(unsigned int cpu);
87extern unsigned int num_processors;
88
94#endif /* !__ASSEMBLY__ */ 89#endif /* !__ASSEMBLY__ */
95 90
96#else /* CONFIG_SMP */ 91#else /* CONFIG_SMP */
@@ -100,4 +95,15 @@ extern void __cpu_die(unsigned int cpu);
100#define NO_PROC_ID 0xFF /* No processor magic marker */ 95#define NO_PROC_ID 0xFF /* No processor magic marker */
101 96
102#endif 97#endif
98
99#ifndef __ASSEMBLY__
100#ifdef CONFIG_X86_LOCAL_APIC
101static __inline int logical_smp_processor_id(void)
102{
103 /* we don't want to mark this access volatile - bad code generation */
104 return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
105}
106#endif
107#endif
108
103#endif 109#endif
diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h
index d1020363c41a..b0b3043f05e1 100644
--- a/include/asm-i386/spinlock.h
+++ b/include/asm-i386/spinlock.h
@@ -4,8 +4,12 @@
4#include <asm/atomic.h> 4#include <asm/atomic.h>
5#include <asm/rwlock.h> 5#include <asm/rwlock.h>
6#include <asm/page.h> 6#include <asm/page.h>
7#include <asm/processor.h>
7#include <linux/compiler.h> 8#include <linux/compiler.h>
8 9
10#define CLI_STRING "cli"
11#define STI_STRING "sti"
12
9/* 13/*
10 * Your basic SMP spinlocks, allowing only a single CPU anywhere 14 * Your basic SMP spinlocks, allowing only a single CPU anywhere
11 * 15 *
@@ -17,67 +21,64 @@
17 * (the type definitions are in asm/spinlock_types.h) 21 * (the type definitions are in asm/spinlock_types.h)
18 */ 22 */
19 23
20#define __raw_spin_is_locked(x) \ 24static inline int __raw_spin_is_locked(raw_spinlock_t *x)
21 (*(volatile signed char *)(&(x)->slock) <= 0) 25{
22 26 return *(volatile signed char *)(&(x)->slock) <= 0;
23#define __raw_spin_lock_string \ 27}
24 "\n1:\t" \
25 LOCK_PREFIX " ; decb %0\n\t" \
26 "jns 3f\n" \
27 "2:\t" \
28 "rep;nop\n\t" \
29 "cmpb $0,%0\n\t" \
30 "jle 2b\n\t" \
31 "jmp 1b\n" \
32 "3:\n\t"
33
34/*
35 * NOTE: there's an irqs-on section here, which normally would have to be
36 * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use
37 * __raw_spin_lock_string_flags().
38 */
39#define __raw_spin_lock_string_flags \
40 "\n1:\t" \
41 LOCK_PREFIX " ; decb %0\n\t" \
42 "jns 5f\n" \
43 "2:\t" \
44 "testl $0x200, %1\n\t" \
45 "jz 4f\n\t" \
46 "sti\n" \
47 "3:\t" \
48 "rep;nop\n\t" \
49 "cmpb $0, %0\n\t" \
50 "jle 3b\n\t" \
51 "cli\n\t" \
52 "jmp 1b\n" \
53 "4:\t" \
54 "rep;nop\n\t" \
55 "cmpb $0, %0\n\t" \
56 "jg 1b\n\t" \
57 "jmp 4b\n" \
58 "5:\n\t"
59 28
60static inline void __raw_spin_lock(raw_spinlock_t *lock) 29static inline void __raw_spin_lock(raw_spinlock_t *lock)
61{ 30{
62 asm(__raw_spin_lock_string : "+m" (lock->slock) : : "memory"); 31 asm volatile("\n1:\t"
32 LOCK_PREFIX " ; decb %0\n\t"
33 "jns 3f\n"
34 "2:\t"
35 "rep;nop\n\t"
36 "cmpb $0,%0\n\t"
37 "jle 2b\n\t"
38 "jmp 1b\n"
39 "3:\n\t"
40 : "+m" (lock->slock) : : "memory");
63} 41}
64 42
65/* 43/*
66 * It is easier for the lock validator if interrupts are not re-enabled 44 * It is easier for the lock validator if interrupts are not re-enabled
67 * in the middle of a lock-acquire. This is a performance feature anyway 45 * in the middle of a lock-acquire. This is a performance feature anyway
68 * so we turn it off: 46 * so we turn it off:
47 *
48 * NOTE: there's an irqs-on section here, which normally would have to be
49 * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant.
69 */ 50 */
70#ifndef CONFIG_PROVE_LOCKING 51#ifndef CONFIG_PROVE_LOCKING
71static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) 52static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
72{ 53{
73 asm(__raw_spin_lock_string_flags : "+m" (lock->slock) : "r" (flags) : "memory"); 54 asm volatile(
55 "\n1:\t"
56 LOCK_PREFIX " ; decb %0\n\t"
57 "jns 5f\n"
58 "2:\t"
59 "testl $0x200, %1\n\t"
60 "jz 4f\n\t"
61 STI_STRING "\n"
62 "3:\t"
63 "rep;nop\n\t"
64 "cmpb $0, %0\n\t"
65 "jle 3b\n\t"
66 CLI_STRING "\n\t"
67 "jmp 1b\n"
68 "4:\t"
69 "rep;nop\n\t"
70 "cmpb $0, %0\n\t"
71 "jg 1b\n\t"
72 "jmp 4b\n"
73 "5:\n\t"
74 : "+m" (lock->slock) : "r" (flags) : "memory");
74} 75}
75#endif 76#endif
76 77
77static inline int __raw_spin_trylock(raw_spinlock_t *lock) 78static inline int __raw_spin_trylock(raw_spinlock_t *lock)
78{ 79{
79 char oldval; 80 char oldval;
80 __asm__ __volatile__( 81 asm volatile(
81 "xchgb %b0,%1" 82 "xchgb %b0,%1"
82 :"=q" (oldval), "+m" (lock->slock) 83 :"=q" (oldval), "+m" (lock->slock)
83 :"0" (0) : "memory"); 84 :"0" (0) : "memory");
@@ -93,38 +94,29 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
93 94
94#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) 95#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
95 96
96#define __raw_spin_unlock_string \
97 "movb $1,%0" \
98 :"+m" (lock->slock) : : "memory"
99
100
101static inline void __raw_spin_unlock(raw_spinlock_t *lock) 97static inline void __raw_spin_unlock(raw_spinlock_t *lock)
102{ 98{
103 __asm__ __volatile__( 99 asm volatile("movb $1,%0" : "+m" (lock->slock) :: "memory");
104 __raw_spin_unlock_string
105 );
106} 100}
107 101
108#else 102#else
109 103
110#define __raw_spin_unlock_string \
111 "xchgb %b0, %1" \
112 :"=q" (oldval), "+m" (lock->slock) \
113 :"0" (oldval) : "memory"
114
115static inline void __raw_spin_unlock(raw_spinlock_t *lock) 104static inline void __raw_spin_unlock(raw_spinlock_t *lock)
116{ 105{
117 char oldval = 1; 106 char oldval = 1;
118 107
119 __asm__ __volatile__( 108 asm volatile("xchgb %b0, %1"
120 __raw_spin_unlock_string 109 : "=q" (oldval), "+m" (lock->slock)
121 ); 110 : "0" (oldval) : "memory");
122} 111}
123 112
124#endif 113#endif
125 114
126#define __raw_spin_unlock_wait(lock) \ 115static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
127 do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0) 116{
117 while (__raw_spin_is_locked(lock))
118 cpu_relax();
119}
128 120
129/* 121/*
130 * Read-write spinlocks, allowing multiple readers 122 * Read-write spinlocks, allowing multiple readers
@@ -151,22 +143,36 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
151 * read_can_lock - would read_trylock() succeed? 143 * read_can_lock - would read_trylock() succeed?
152 * @lock: the rwlock in question. 144 * @lock: the rwlock in question.
153 */ 145 */
154#define __raw_read_can_lock(x) ((int)(x)->lock > 0) 146static inline int __raw_read_can_lock(raw_rwlock_t *x)
147{
148 return (int)(x)->lock > 0;
149}
155 150
156/** 151/**
157 * write_can_lock - would write_trylock() succeed? 152 * write_can_lock - would write_trylock() succeed?
158 * @lock: the rwlock in question. 153 * @lock: the rwlock in question.
159 */ 154 */
160#define __raw_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) 155static inline int __raw_write_can_lock(raw_rwlock_t *x)
156{
157 return (x)->lock == RW_LOCK_BIAS;
158}
161 159
162static inline void __raw_read_lock(raw_rwlock_t *rw) 160static inline void __raw_read_lock(raw_rwlock_t *rw)
163{ 161{
164 __build_read_lock(rw, "__read_lock_failed"); 162 asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
163 "jns 1f\n"
164 "call __read_lock_failed\n\t"
165 "1:\n"
166 ::"a" (rw) : "memory");
165} 167}
166 168
167static inline void __raw_write_lock(raw_rwlock_t *rw) 169static inline void __raw_write_lock(raw_rwlock_t *rw)
168{ 170{
169 __build_write_lock(rw, "__write_lock_failed"); 171 asm volatile(LOCK_PREFIX " subl $" RW_LOCK_BIAS_STR ",(%0)\n\t"
172 "jz 1f\n"
173 "call __write_lock_failed\n\t"
174 "1:\n"
175 ::"a" (rw) : "memory");
170} 176}
171 177
172static inline int __raw_read_trylock(raw_rwlock_t *lock) 178static inline int __raw_read_trylock(raw_rwlock_t *lock)
diff --git a/include/asm-i386/stacktrace.h b/include/asm-i386/stacktrace.h
new file mode 100644
index 000000000000..7d1f6a5cbfca
--- /dev/null
+++ b/include/asm-i386/stacktrace.h
@@ -0,0 +1 @@
#include <asm-x86_64/stacktrace.h>
diff --git a/include/asm-i386/therm_throt.h b/include/asm-i386/therm_throt.h
new file mode 100644
index 000000000000..399bf6026b16
--- /dev/null
+++ b/include/asm-i386/therm_throt.h
@@ -0,0 +1,9 @@
1#ifndef __ASM_I386_THERM_THROT_H__
2#define __ASM_I386_THERM_THROT_H__ 1
3
4#include <asm/atomic.h>
5
6extern atomic_t therm_throt_en;
7int therm_throt_process(int curr);
8
9#endif /* __ASM_I386_THERM_THROT_H__ */
diff --git a/include/asm-i386/tlbflush.h b/include/asm-i386/tlbflush.h
index d57ca5c540b6..360648b0f2b3 100644
--- a/include/asm-i386/tlbflush.h
+++ b/include/asm-i386/tlbflush.h
@@ -36,8 +36,6 @@
36 : "memory"); \ 36 : "memory"); \
37 } while (0) 37 } while (0)
38 38
39extern unsigned long pgkern_mask;
40
41# define __flush_tlb_all() \ 39# define __flush_tlb_all() \
42 do { \ 40 do { \
43 if (cpu_has_pge) \ 41 if (cpu_has_pge) \
@@ -49,7 +47,7 @@ extern unsigned long pgkern_mask;
49#define cpu_has_invlpg (boot_cpu_data.x86 > 3) 47#define cpu_has_invlpg (boot_cpu_data.x86 > 3)
50 48
51#define __flush_tlb_single(addr) \ 49#define __flush_tlb_single(addr) \
52 __asm__ __volatile__("invlpg %0": :"m" (*(char *) addr)) 50 __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory")
53 51
54#ifdef CONFIG_X86_INVLPG 52#ifdef CONFIG_X86_INVLPG
55# define __flush_tlb_one(addr) __flush_tlb_single(addr) 53# define __flush_tlb_one(addr) __flush_tlb_single(addr)
diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h
index 97b828ce31e0..c13933185c1c 100644
--- a/include/asm-i386/tsc.h
+++ b/include/asm-i386/tsc.h
@@ -6,7 +6,6 @@
6#ifndef _ASM_i386_TSC_H 6#ifndef _ASM_i386_TSC_H
7#define _ASM_i386_TSC_H 7#define _ASM_i386_TSC_H
8 8
9#include <linux/config.h>
10#include <asm/processor.h> 9#include <asm/processor.h>
11 10
12/* 11/*
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index fc1c8ddae149..565d0897b205 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -323,10 +323,11 @@
323#define __NR_tee 315 323#define __NR_tee 315
324#define __NR_vmsplice 316 324#define __NR_vmsplice 316
325#define __NR_move_pages 317 325#define __NR_move_pages 317
326#define __NR_getcpu 318
326 327
327#ifdef __KERNEL__ 328#ifdef __KERNEL__
328 329
329#define NR_syscalls 318 330#define NR_syscalls 319
330 331
331/* 332/*
332 * user-visible error numbers are in the range -1 - -128: see 333 * user-visible error numbers are in the range -1 - -128: see
diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h
index 4c1a0b968569..5031d693b89d 100644
--- a/include/asm-i386/unwind.h
+++ b/include/asm-i386/unwind.h
@@ -18,6 +18,7 @@ struct unwind_frame_info
18{ 18{
19 struct pt_regs regs; 19 struct pt_regs regs;
20 struct task_struct *task; 20 struct task_struct *task;
21 unsigned call_frame:1;
21}; 22};
22 23
23#define UNW_PC(frame) (frame)->regs.eip 24#define UNW_PC(frame) (frame)->regs.eip
@@ -28,6 +29,8 @@ struct unwind_frame_info
28#define FRAME_LINK_OFFSET 0 29#define FRAME_LINK_OFFSET 0
29#define STACK_BOTTOM(tsk) STACK_LIMIT((tsk)->thread.esp0) 30#define STACK_BOTTOM(tsk) STACK_LIMIT((tsk)->thread.esp0)
30#define STACK_TOP(tsk) ((tsk)->thread.esp0) 31#define STACK_TOP(tsk) ((tsk)->thread.esp0)
32#else
33#define UNW_FP(frame) ((void)(frame), 0)
31#endif 34#endif
32#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1)) 35#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1))
33 36
@@ -42,6 +45,10 @@ struct unwind_frame_info
42 PTREGS_INFO(edi), \ 45 PTREGS_INFO(edi), \
43 PTREGS_INFO(eip) 46 PTREGS_INFO(eip)
44 47
48#define UNW_DEFAULT_RA(raItem, dataAlign) \
49 ((raItem).where == Memory && \
50 !((raItem).value * (dataAlign) + 4))
51
45static inline void arch_unw_init_frame_info(struct unwind_frame_info *info, 52static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
46 /*const*/ struct pt_regs *regs) 53 /*const*/ struct pt_regs *regs)
47{ 54{
@@ -88,6 +95,7 @@ static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
88 95
89#define UNW_PC(frame) ((void)(frame), 0) 96#define UNW_PC(frame) ((void)(frame), 0)
90#define UNW_SP(frame) ((void)(frame), 0) 97#define UNW_SP(frame) ((void)(frame), 0)
98#define UNW_FP(frame) ((void)(frame), 0)
91 99
92static inline int arch_unw_user_mode(const void *info) 100static inline int arch_unw_user_mode(const void *info)
93{ 101{
diff --git a/include/asm-ia64/module.h b/include/asm-ia64/module.h
index 85c82bd819f2..d2da61e4c49b 100644
--- a/include/asm-ia64/module.h
+++ b/include/asm-ia64/module.h
@@ -28,7 +28,8 @@ struct mod_arch_specific {
28#define Elf_Ehdr Elf64_Ehdr 28#define Elf_Ehdr Elf64_Ehdr
29 29
30#define MODULE_PROC_FAMILY "ia64" 30#define MODULE_PROC_FAMILY "ia64"
31#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY 31#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY \
32 "gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__)
32 33
33#define ARCH_SHF_SMALL SHF_IA_64_SHORT 34#define ARCH_SHF_SMALL SHF_IA_64_SHORT
34 35
diff --git a/include/asm-um/alternative-asm.i b/include/asm-um/alternative-asm.i
new file mode 100644
index 000000000000..cae9faca132f
--- /dev/null
+++ b/include/asm-um/alternative-asm.i
@@ -0,0 +1,6 @@
1#ifndef __UM_ALTERNATIVE_ASM_I
2#define __UM_ALTERNATIVE_ASM_I
3
4#include "asm/arch/alternative-asm.i"
5
6#endif
diff --git a/include/asm-um/frame.i b/include/asm-um/frame.i
new file mode 100644
index 000000000000..09d5dca5d928
--- /dev/null
+++ b/include/asm-um/frame.i
@@ -0,0 +1,6 @@
1#ifndef __UM_FRAME_I
2#define __UM_FRAME_I
3
4#include "asm/arch/frame.i"
5
6#endif
diff --git a/include/asm-x86_64/acpi.h b/include/asm-x86_64/acpi.h
index 2c95a319c056..ed59aa4c6ff9 100644
--- a/include/asm-x86_64/acpi.h
+++ b/include/asm-x86_64/acpi.h
@@ -155,8 +155,6 @@ extern void acpi_reserve_bootmem(void);
155 155
156#endif /*CONFIG_ACPI_SLEEP*/ 156#endif /*CONFIG_ACPI_SLEEP*/
157 157
158#define boot_cpu_physical_apicid boot_cpu_id
159
160extern int acpi_disabled; 158extern int acpi_disabled;
161extern int acpi_pci_disabled; 159extern int acpi_pci_disabled;
162 160
diff --git a/include/asm-x86_64/alternative-asm.i b/include/asm-x86_64/alternative-asm.i
new file mode 100644
index 000000000000..e4041f4fa4dc
--- /dev/null
+++ b/include/asm-x86_64/alternative-asm.i
@@ -0,0 +1,14 @@
1#include <linux/config.h>
2
3#ifdef CONFIG_SMP
4 .macro LOCK_PREFIX
51: lock
6 .section .smp_locks,"a"
7 .align 8
8 .quad 1b
9 .previous
10 .endm
11#else
12 .macro LOCK_PREFIX
13 .endm
14#endif
diff --git a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h
index 9c96a0a8d1bd..9e66d32330c9 100644
--- a/include/asm-x86_64/apic.h
+++ b/include/asm-x86_64/apic.h
@@ -17,6 +17,8 @@
17 17
18extern int apic_verbosity; 18extern int apic_verbosity;
19extern int apic_runs_main_timer; 19extern int apic_runs_main_timer;
20extern int ioapic_force;
21extern int apic_mapped;
20 22
21/* 23/*
22 * Define the default level of output to be very little 24 * Define the default level of output to be very little
@@ -29,8 +31,6 @@ extern int apic_runs_main_timer;
29 printk(s, ##a); \ 31 printk(s, ##a); \
30 } while (0) 32 } while (0)
31 33
32#ifdef CONFIG_X86_LOCAL_APIC
33
34struct pt_regs; 34struct pt_regs;
35 35
36/* 36/*
@@ -95,17 +95,12 @@ extern void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector,
95#define K8_APIC_EXT_INT_MSG_EXT 0x7 95#define K8_APIC_EXT_INT_MSG_EXT 0x7
96#define K8_APIC_EXT_LVT_ENTRY_THRESHOLD 0 96#define K8_APIC_EXT_LVT_ENTRY_THRESHOLD 0
97 97
98extern int disable_timer_pin_1;
99
100
101void smp_send_timer_broadcast_ipi(void); 98void smp_send_timer_broadcast_ipi(void);
102void switch_APIC_timer_to_ipi(void *cpumask); 99void switch_APIC_timer_to_ipi(void *cpumask);
103void switch_ipi_to_APIC_timer(void *cpumask); 100void switch_ipi_to_APIC_timer(void *cpumask);
104 101
105#define ARCH_APICTIMER_STOPS_ON_C3 1 102#define ARCH_APICTIMER_STOPS_ON_C3 1
106 103
107#endif /* CONFIG_X86_LOCAL_APIC */
108
109extern unsigned boot_cpu_id; 104extern unsigned boot_cpu_id;
110 105
111#endif /* __ASM_APIC_H */ 106#endif /* __ASM_APIC_H */
diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h
index f7ba57b1cc08..5b535eaf5309 100644
--- a/include/asm-x86_64/bitops.h
+++ b/include/asm-x86_64/bitops.h
@@ -399,6 +399,8 @@ static __inline__ int fls(int x)
399 return r+1; 399 return r+1;
400} 400}
401 401
402#define ARCH_HAS_FAST_MULTIPLIER 1
403
402#include <asm-generic/bitops/hweight.h> 404#include <asm-generic/bitops/hweight.h>
403 405
404#endif /* __KERNEL__ */ 406#endif /* __KERNEL__ */
diff --git a/include/asm-x86_64/calgary.h b/include/asm-x86_64/calgary.h
index 4e3919524240..6b93f5a3a5c8 100644
--- a/include/asm-x86_64/calgary.h
+++ b/include/asm-x86_64/calgary.h
@@ -24,7 +24,6 @@
24#ifndef _ASM_X86_64_CALGARY_H 24#ifndef _ASM_X86_64_CALGARY_H
25#define _ASM_X86_64_CALGARY_H 25#define _ASM_X86_64_CALGARY_H
26 26
27#include <linux/config.h>
28#include <linux/spinlock.h> 27#include <linux/spinlock.h>
29#include <linux/device.h> 28#include <linux/device.h>
30#include <linux/dma-mapping.h> 29#include <linux/dma-mapping.h>
@@ -34,12 +33,12 @@ struct iommu_table {
34 unsigned long it_base; /* mapped address of tce table */ 33 unsigned long it_base; /* mapped address of tce table */
35 unsigned long it_hint; /* Hint for next alloc */ 34 unsigned long it_hint; /* Hint for next alloc */
36 unsigned long *it_map; /* A simple allocation bitmap for now */ 35 unsigned long *it_map; /* A simple allocation bitmap for now */
36 void __iomem *bbar; /* Bridge BAR */
37 u64 tar_val; /* Table Address Register */
38 struct timer_list watchdog_timer;
37 spinlock_t it_lock; /* Protects it_map */ 39 spinlock_t it_lock; /* Protects it_map */
38 unsigned int it_size; /* Size of iommu table in entries */ 40 unsigned int it_size; /* Size of iommu table in entries */
39 unsigned char it_busno; /* Bus number this table belongs to */ 41 unsigned char it_busno; /* Bus number this table belongs to */
40 void __iomem *bbar;
41 u64 tar_val;
42 struct timer_list watchdog_timer;
43}; 42};
44 43
45#define TCE_TABLE_SIZE_UNSPECIFIED ~0 44#define TCE_TABLE_SIZE_UNSPECIFIED ~0
diff --git a/include/asm-x86_64/dwarf2.h b/include/asm-x86_64/dwarf2.h
index 0744db777676..eedc08526b0b 100644
--- a/include/asm-x86_64/dwarf2.h
+++ b/include/asm-x86_64/dwarf2.h
@@ -13,7 +13,7 @@
13 away for older version. 13 away for older version.
14 */ 14 */
15 15
16#ifdef CONFIG_UNWIND_INFO 16#ifdef CONFIG_AS_CFI
17 17
18#define CFI_STARTPROC .cfi_startproc 18#define CFI_STARTPROC .cfi_startproc
19#define CFI_ENDPROC .cfi_endproc 19#define CFI_ENDPROC .cfi_endproc
@@ -28,6 +28,11 @@
28#define CFI_REMEMBER_STATE .cfi_remember_state 28#define CFI_REMEMBER_STATE .cfi_remember_state
29#define CFI_RESTORE_STATE .cfi_restore_state 29#define CFI_RESTORE_STATE .cfi_restore_state
30#define CFI_UNDEFINED .cfi_undefined 30#define CFI_UNDEFINED .cfi_undefined
31#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
32#define CFI_SIGNAL_FRAME .cfi_signal_frame
33#else
34#define CFI_SIGNAL_FRAME
35#endif
31 36
32#else 37#else
33 38
@@ -45,6 +50,7 @@
45#define CFI_REMEMBER_STATE # 50#define CFI_REMEMBER_STATE #
46#define CFI_RESTORE_STATE # 51#define CFI_RESTORE_STATE #
47#define CFI_UNDEFINED # 52#define CFI_UNDEFINED #
53#define CFI_SIGNAL_FRAME #
48 54
49#endif 55#endif
50 56
diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h
index f65674832318..e15d3c8628f3 100644
--- a/include/asm-x86_64/e820.h
+++ b/include/asm-x86_64/e820.h
@@ -19,13 +19,9 @@
19 19
20#define E820_RAM 1 20#define E820_RAM 1
21#define E820_RESERVED 2 21#define E820_RESERVED 2
22#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */ 22#define E820_ACPI 3
23#define E820_NVS 4 23#define E820_NVS 4
24 24
25#define HIGH_MEMORY (1024*1024)
26
27#define LOWMEMSIZE() (0x9f000)
28
29#ifndef __ASSEMBLY__ 25#ifndef __ASSEMBLY__
30struct e820entry { 26struct e820entry {
31 u64 addr; /* start of memory segment */ 27 u64 addr; /* start of memory segment */
@@ -56,8 +52,7 @@ extern void e820_setup_gap(void);
56extern unsigned long e820_hole_size(unsigned long start_pfn, 52extern unsigned long e820_hole_size(unsigned long start_pfn,
57 unsigned long end_pfn); 53 unsigned long end_pfn);
58 54
59extern void __init parse_memopt(char *p, char **end); 55extern void finish_e820_parsing(void);
60extern void __init parse_memmapopt(char *p, char **end);
61 56
62extern struct e820map e820; 57extern struct e820map e820;
63 58
diff --git a/include/asm-x86_64/fixmap.h b/include/asm-x86_64/fixmap.h
index 0b4ffbd1a125..1b620db5b9e3 100644
--- a/include/asm-x86_64/fixmap.h
+++ b/include/asm-x86_64/fixmap.h
@@ -37,13 +37,9 @@ enum fixed_addresses {
37 VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, 37 VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
38 VSYSCALL_HPET, 38 VSYSCALL_HPET,
39 FIX_HPET_BASE, 39 FIX_HPET_BASE,
40#ifdef CONFIG_X86_LOCAL_APIC
41 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ 40 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
42#endif
43#ifdef CONFIG_X86_IO_APIC
44 FIX_IO_APIC_BASE_0, 41 FIX_IO_APIC_BASE_0,
45 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, 42 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
46#endif
47 __end_of_fixed_addresses 43 __end_of_fixed_addresses
48}; 44};
49 45
diff --git a/include/asm-x86_64/genapic.h b/include/asm-x86_64/genapic.h
index 50b38e7c58e4..81e714665344 100644
--- a/include/asm-x86_64/genapic.h
+++ b/include/asm-x86_64/genapic.h
@@ -16,7 +16,6 @@ struct genapic {
16 char *name; 16 char *name;
17 u32 int_delivery_mode; 17 u32 int_delivery_mode;
18 u32 int_dest_mode; 18 u32 int_dest_mode;
19 u32 int_delivery_dest; /* for quick IPIs */
20 int (*apic_id_registered)(void); 19 int (*apic_id_registered)(void);
21 cpumask_t (*target_cpus)(void); 20 cpumask_t (*target_cpus)(void);
22 void (*init_apic_ldr)(void); 21 void (*init_apic_ldr)(void);
diff --git a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h
index cba8a3b0cded..0217b74cc9fc 100644
--- a/include/asm-x86_64/i387.h
+++ b/include/asm-x86_64/i387.h
@@ -24,6 +24,7 @@ extern unsigned int mxcsr_feature_mask;
24extern void mxcsr_feature_mask_init(void); 24extern void mxcsr_feature_mask_init(void);
25extern void init_fpu(struct task_struct *child); 25extern void init_fpu(struct task_struct *child);
26extern int save_i387(struct _fpstate __user *buf); 26extern int save_i387(struct _fpstate __user *buf);
27extern asmlinkage void math_state_restore(void);
27 28
28/* 29/*
29 * FPU lazy state save handling... 30 * FPU lazy state save handling...
@@ -31,7 +32,9 @@ extern int save_i387(struct _fpstate __user *buf);
31 32
32#define unlazy_fpu(tsk) do { \ 33#define unlazy_fpu(tsk) do { \
33 if (task_thread_info(tsk)->status & TS_USEDFPU) \ 34 if (task_thread_info(tsk)->status & TS_USEDFPU) \
34 save_init_fpu(tsk); \ 35 save_init_fpu(tsk); \
36 else \
37 tsk->fpu_counter = 0; \
35} while (0) 38} while (0)
36 39
37/* Ignore delayed exceptions from user space */ 40/* Ignore delayed exceptions from user space */
@@ -134,8 +137,8 @@ static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
134#else 137#else
135 : [fx] "cdaSDb" (fx), "0" (0)); 138 : [fx] "cdaSDb" (fx), "0" (0));
136#endif 139#endif
137 if (unlikely(err)) 140 if (unlikely(err) && __clear_user(fx, sizeof(struct i387_fxsave_struct)))
138 __clear_user(fx, sizeof(struct i387_fxsave_struct)); 141 err = -EFAULT;
139 /* No need to clear here because the caller clears USED_MATH */ 142 /* No need to clear here because the caller clears USED_MATH */
140 return err; 143 return err;
141} 144}
diff --git a/include/asm-x86_64/intel_arch_perfmon.h b/include/asm-x86_64/intel_arch_perfmon.h
index 59c396431569..8633331420ec 100644
--- a/include/asm-x86_64/intel_arch_perfmon.h
+++ b/include/asm-x86_64/intel_arch_perfmon.h
@@ -14,6 +14,18 @@
14 14
15#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c) 15#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c)
16#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 16#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
17#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0) 17#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
18#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
19 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
20
21union cpuid10_eax {
22 struct {
23 unsigned int version_id:8;
24 unsigned int num_counters:8;
25 unsigned int bit_width:8;
26 unsigned int mask_length:8;
27 } split;
28 unsigned int full;
29};
18 30
19#endif /* X86_64_INTEL_ARCH_PERFMON_H */ 31#endif /* X86_64_INTEL_ARCH_PERFMON_H */
diff --git a/include/asm-x86_64/io_apic.h b/include/asm-x86_64/io_apic.h
index fb7a0909a174..5d1b5c68e36e 100644
--- a/include/asm-x86_64/io_apic.h
+++ b/include/asm-x86_64/io_apic.h
@@ -10,8 +10,6 @@
10 * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar 10 * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
11 */ 11 */
12 12
13#ifdef CONFIG_X86_IO_APIC
14
15#ifdef CONFIG_PCI_MSI 13#ifdef CONFIG_PCI_MSI
16static inline int use_pci_vector(void) {return 1;} 14static inline int use_pci_vector(void) {return 1;}
17static inline void disable_edge_ioapic_vector(unsigned int vector) { } 15static inline void disable_edge_ioapic_vector(unsigned int vector) { }
@@ -209,10 +207,6 @@ extern int timer_uses_ioapic_pin_0;
209 207
210extern int sis_apic_bug; /* dummy */ 208extern int sis_apic_bug; /* dummy */
211 209
212#else /* !CONFIG_X86_IO_APIC */
213#define io_apic_assign_pci_irqs 0
214#endif
215
216extern int assign_irq_vector(int irq); 210extern int assign_irq_vector(int irq);
217 211
218void enable_NMI_through_LVT0 (void * dummy); 212void enable_NMI_through_LVT0 (void * dummy);
diff --git a/include/asm-x86_64/irq.h b/include/asm-x86_64/irq.h
index 9db5a1b4f7b1..43469d8ab71a 100644
--- a/include/asm-x86_64/irq.h
+++ b/include/asm-x86_64/irq.h
@@ -44,9 +44,7 @@ static __inline__ int irq_canonicalize(int irq)
44 return ((irq == 2) ? 9 : irq); 44 return ((irq == 2) ? 9 : irq);
45} 45}
46 46
47#ifdef CONFIG_X86_LOCAL_APIC
48#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ 47#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */
49#endif
50 48
51#ifdef CONFIG_HOTPLUG_CPU 49#ifdef CONFIG_HOTPLUG_CPU
52#include <linux/cpumask.h> 50#include <linux/cpumask.h>
diff --git a/include/asm-x86_64/kexec.h b/include/asm-x86_64/kexec.h
index c564bae03433..5fab957e1091 100644
--- a/include/asm-x86_64/kexec.h
+++ b/include/asm-x86_64/kexec.h
@@ -1,6 +1,27 @@
1#ifndef _X86_64_KEXEC_H 1#ifndef _X86_64_KEXEC_H
2#define _X86_64_KEXEC_H 2#define _X86_64_KEXEC_H
3 3
4#define PA_CONTROL_PAGE 0
5#define VA_CONTROL_PAGE 1
6#define PA_PGD 2
7#define VA_PGD 3
8#define PA_PUD_0 4
9#define VA_PUD_0 5
10#define PA_PMD_0 6
11#define VA_PMD_0 7
12#define PA_PTE_0 8
13#define VA_PTE_0 9
14#define PA_PUD_1 10
15#define VA_PUD_1 11
16#define PA_PMD_1 12
17#define VA_PMD_1 13
18#define PA_PTE_1 14
19#define VA_PTE_1 15
20#define PA_TABLE_PAGE 16
21#define PAGES_NR 17
22
23#ifndef __ASSEMBLY__
24
4#include <linux/string.h> 25#include <linux/string.h>
5 26
6#include <asm/page.h> 27#include <asm/page.h>
@@ -64,4 +85,12 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
64 newregs->rip = (unsigned long)current_text_addr(); 85 newregs->rip = (unsigned long)current_text_addr();
65 } 86 }
66} 87}
88
89NORET_TYPE void
90relocate_kernel(unsigned long indirection_page,
91 unsigned long page_list,
92 unsigned long start_address) ATTRIB_NORET;
93
94#endif /* __ASSEMBLY__ */
95
67#endif /* _X86_64_KEXEC_H */ 96#endif /* _X86_64_KEXEC_H */
diff --git a/include/asm-x86_64/linkage.h b/include/asm-x86_64/linkage.h
index 291c2d01c44f..b5f39d0189ce 100644
--- a/include/asm-x86_64/linkage.h
+++ b/include/asm-x86_64/linkage.h
@@ -1,6 +1,6 @@
1#ifndef __ASM_LINKAGE_H 1#ifndef __ASM_LINKAGE_H
2#define __ASM_LINKAGE_H 2#define __ASM_LINKAGE_H
3 3
4/* Nothing to see here... */ 4#define __ALIGN .p2align 4,,15
5 5
6#endif 6#endif
diff --git a/include/asm-x86_64/mach_apic.h b/include/asm-x86_64/mach_apic.h
index 0acea44c9377..d33422450c00 100644
--- a/include/asm-x86_64/mach_apic.h
+++ b/include/asm-x86_64/mach_apic.h
@@ -16,7 +16,6 @@
16 16
17#define INT_DELIVERY_MODE (genapic->int_delivery_mode) 17#define INT_DELIVERY_MODE (genapic->int_delivery_mode)
18#define INT_DEST_MODE (genapic->int_dest_mode) 18#define INT_DEST_MODE (genapic->int_dest_mode)
19#define INT_DELIVERY_DEST (genapic->int_delivery_dest)
20#define TARGET_CPUS (genapic->target_cpus()) 19#define TARGET_CPUS (genapic->target_cpus())
21#define apic_id_registered (genapic->apic_id_registered) 20#define apic_id_registered (genapic->apic_id_registered)
22#define init_apic_ldr (genapic->init_apic_ldr) 21#define init_apic_ldr (genapic->init_apic_ldr)
diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h
index d13687dfd691..5a11146d6d9c 100644
--- a/include/asm-x86_64/mce.h
+++ b/include/asm-x86_64/mce.h
@@ -99,6 +99,8 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)
99} 99}
100#endif 100#endif
101 101
102void mce_log_therm_throt_event(unsigned int cpu, __u64 status);
103
102extern atomic_t mce_entry; 104extern atomic_t mce_entry;
103 105
104#endif 106#endif
diff --git a/include/asm-x86_64/mmx.h b/include/asm-x86_64/mmx.h
deleted file mode 100644
index 46b71da99869..000000000000
--- a/include/asm-x86_64/mmx.h
+++ /dev/null
@@ -1,14 +0,0 @@
1#ifndef _ASM_MMX_H
2#define _ASM_MMX_H
3
4/*
5 * MMX 3Dnow! helper operations
6 */
7
8#include <linux/types.h>
9
10extern void *_mmx_memcpy(void *to, const void *from, size_t size);
11extern void mmx_clear_page(void *page);
12extern void mmx_copy_page(void *to, void *from);
13
14#endif
diff --git a/include/asm-x86_64/mpspec.h b/include/asm-x86_64/mpspec.h
index 14fc3ddd9031..017fddb61dc5 100644
--- a/include/asm-x86_64/mpspec.h
+++ b/include/asm-x86_64/mpspec.h
@@ -159,13 +159,7 @@ struct mpc_config_lintsrc
159#define MAX_MP_BUSSES 256 159#define MAX_MP_BUSSES 256
160/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */ 160/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */
161#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) 161#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
162enum mp_bustype { 162extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
163 MP_BUS_ISA = 1,
164 MP_BUS_EISA,
165 MP_BUS_PCI,
166 MP_BUS_MCA
167};
168extern unsigned char mp_bus_id_to_type [MAX_MP_BUSSES];
169extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; 163extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
170 164
171extern unsigned int boot_cpu_physical_apicid; 165extern unsigned int boot_cpu_physical_apicid;
@@ -178,18 +172,15 @@ extern int mp_irq_entries;
178extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; 172extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
179extern int mpc_default_type; 173extern int mpc_default_type;
180extern unsigned long mp_lapic_addr; 174extern unsigned long mp_lapic_addr;
181extern int pic_mode;
182 175
183#ifdef CONFIG_ACPI 176#ifdef CONFIG_ACPI
184extern void mp_register_lapic (u8 id, u8 enabled); 177extern void mp_register_lapic (u8 id, u8 enabled);
185extern void mp_register_lapic_address (u64 address); 178extern void mp_register_lapic_address (u64 address);
186 179
187#ifdef CONFIG_X86_IO_APIC
188extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base); 180extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base);
189extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); 181extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 gsi);
190extern void mp_config_acpi_legacy_irqs (void); 182extern void mp_config_acpi_legacy_irqs (void);
191extern int mp_register_gsi (u32 gsi, int triggering, int polarity); 183extern int mp_register_gsi (u32 gsi, int triggering, int polarity);
192#endif /*CONFIG_X86_IO_APIC*/
193#endif 184#endif
194 185
195extern int using_apic_timer; 186extern int using_apic_timer;
diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h
index 10f8b51cec8b..37e194169fac 100644
--- a/include/asm-x86_64/msr.h
+++ b/include/asm-x86_64/msr.h
@@ -66,14 +66,25 @@
66#define rdtscl(low) \ 66#define rdtscl(low) \
67 __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx") 67 __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx")
68 68
69#define rdtscp(low,high,aux) \
70 asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (aux))
71
69#define rdtscll(val) do { \ 72#define rdtscll(val) do { \
70 unsigned int __a,__d; \ 73 unsigned int __a,__d; \
71 asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \ 74 asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
72 (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \ 75 (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
73} while(0) 76} while(0)
74 77
78#define rdtscpll(val, aux) do { \
79 unsigned long __a, __d; \
80 asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (__a), "=d" (__d), "=c" (aux)); \
81 (val) = (__d << 32) | __a; \
82} while (0)
83
75#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) 84#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
76 85
86#define write_rdtscp_aux(val) wrmsr(0xc0000103, val, 0)
87
77#define rdpmc(counter,low,high) \ 88#define rdpmc(counter,low,high) \
78 __asm__ __volatile__("rdpmc" \ 89 __asm__ __volatile__("rdpmc" \
79 : "=a" (low), "=d" (high) \ 90 : "=a" (low), "=d" (high) \
diff --git a/include/asm-x86_64/mutex.h b/include/asm-x86_64/mutex.h
index 06fab6de2a88..16396b1de3e4 100644
--- a/include/asm-x86_64/mutex.h
+++ b/include/asm-x86_64/mutex.h
@@ -25,13 +25,9 @@ do { \
25 \ 25 \
26 __asm__ __volatile__( \ 26 __asm__ __volatile__( \
27 LOCK_PREFIX " decl (%%rdi) \n" \ 27 LOCK_PREFIX " decl (%%rdi) \n" \
28 " js 2f \n" \ 28 " jns 1f \n" \
29 "1: \n" \ 29 " call "#fail_fn" \n" \
30 \ 30 "1:" \
31 LOCK_SECTION_START("") \
32 "2: call "#fail_fn" \n" \
33 " jmp 1b \n" \
34 LOCK_SECTION_END \
35 \ 31 \
36 :"=D" (dummy) \ 32 :"=D" (dummy) \
37 : "D" (v) \ 33 : "D" (v) \
@@ -75,13 +71,9 @@ do { \
75 \ 71 \
76 __asm__ __volatile__( \ 72 __asm__ __volatile__( \
77 LOCK_PREFIX " incl (%%rdi) \n" \ 73 LOCK_PREFIX " incl (%%rdi) \n" \
78 " jle 2f \n" \ 74 " jg 1f \n" \
79 "1: \n" \ 75 " call "#fail_fn" \n" \
80 \ 76 "1: " \
81 LOCK_SECTION_START("") \
82 "2: call "#fail_fn" \n" \
83 " jmp 1b \n" \
84 LOCK_SECTION_END \
85 \ 77 \
86 :"=D" (dummy) \ 78 :"=D" (dummy) \
87 : "D" (v) \ 79 : "D" (v) \
diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h
index efb45c894d76..cbf2669bca71 100644
--- a/include/asm-x86_64/nmi.h
+++ b/include/asm-x86_64/nmi.h
@@ -7,24 +7,13 @@
7#include <linux/pm.h> 7#include <linux/pm.h>
8#include <asm/io.h> 8#include <asm/io.h>
9 9
10struct pt_regs;
11
12typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
13
14/**
15 * set_nmi_callback
16 *
17 * Set a handler for an NMI. Only one handler may be
18 * set. Return 1 if the NMI was handled.
19 */
20void set_nmi_callback(nmi_callback_t callback);
21
22/** 10/**
23 * unset_nmi_callback 11 * do_nmi_callback
24 * 12 *
25 * Remove the handler previously set. 13 * Check to see if a callback exists and execute it. Return 1
14 * if the handler exists and was handled successfully.
26 */ 15 */
27void unset_nmi_callback(void); 16int do_nmi_callback(struct pt_regs *regs, int cpu);
28 17
29#ifdef CONFIG_PM 18#ifdef CONFIG_PM
30 19
@@ -48,25 +37,32 @@ static inline void unset_nmi_pm_callback(struct pm_dev * dev)
48#endif /* CONFIG_PM */ 37#endif /* CONFIG_PM */
49 38
50extern void default_do_nmi(struct pt_regs *); 39extern void default_do_nmi(struct pt_regs *);
51extern void die_nmi(char *str, struct pt_regs *regs); 40extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
52 41
53#define get_nmi_reason() inb(0x61) 42#define get_nmi_reason() inb(0x61)
54 43
55extern int panic_on_timeout; 44extern int panic_on_timeout;
56extern int unknown_nmi_panic; 45extern int unknown_nmi_panic;
46extern int nmi_watchdog_enabled;
57 47
58extern int check_nmi_watchdog(void); 48extern int check_nmi_watchdog(void);
59 49extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
60extern void setup_apic_nmi_watchdog (void); 50extern int avail_to_resrv_perfctr_nmi(unsigned int);
61extern int reserve_lapic_nmi(void); 51extern int reserve_perfctr_nmi(unsigned int);
62extern void release_lapic_nmi(void); 52extern void release_perfctr_nmi(unsigned int);
53extern int reserve_evntsel_nmi(unsigned int);
54extern void release_evntsel_nmi(unsigned int);
55
56extern void setup_apic_nmi_watchdog (void *);
57extern void stop_apic_nmi_watchdog (void *);
63extern void disable_timer_nmi_watchdog(void); 58extern void disable_timer_nmi_watchdog(void);
64extern void enable_timer_nmi_watchdog(void); 59extern void enable_timer_nmi_watchdog(void);
65extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason); 60extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
66 61
67extern void nmi_watchdog_default(void); 62extern void nmi_watchdog_default(void);
68extern int setup_nmi_watchdog(char *); 63extern int setup_nmi_watchdog(char *);
69 64
65extern atomic_t nmi_active;
70extern unsigned int nmi_watchdog; 66extern unsigned int nmi_watchdog;
71#define NMI_DEFAULT -1 67#define NMI_DEFAULT -1
72#define NMI_NONE 0 68#define NMI_NONE 0
diff --git a/include/asm-x86_64/pci-direct.h b/include/asm-x86_64/pci-direct.h
index 036b6ca5b53b..eba9cb471df3 100644
--- a/include/asm-x86_64/pci-direct.h
+++ b/include/asm-x86_64/pci-direct.h
@@ -2,47 +2,15 @@
2#define ASM_PCI_DIRECT_H 1 2#define ASM_PCI_DIRECT_H 1
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <asm/io.h>
6 5
7/* Direct PCI access. This is used for PCI accesses in early boot before 6/* Direct PCI access. This is used for PCI accesses in early boot before
8 the PCI subsystem works. */ 7 the PCI subsystem works. */
9 8
10#define PDprintk(x...) 9extern u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset);
10extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset);
11extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset);
12extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val);
11 13
12static inline u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) 14extern int early_pci_allowed(void);
13{
14 u32 v;
15 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
16 v = inl(0xcfc);
17 if (v != 0xffffffff)
18 PDprintk("%x reading 4 from %x: %x\n", slot, offset, v);
19 return v;
20}
21
22static inline u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset)
23{
24 u8 v;
25 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
26 v = inb(0xcfc + (offset&3));
27 PDprintk("%x reading 1 from %x: %x\n", slot, offset, v);
28 return v;
29}
30
31static inline u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset)
32{
33 u16 v;
34 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
35 v = inw(0xcfc + (offset&2));
36 PDprintk("%x reading 2 from %x: %x\n", slot, offset, v);
37 return v;
38}
39
40static inline void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset,
41 u32 val)
42{
43 PDprintk("%x writing to %x: %x\n", slot, offset, val);
44 outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
45 outl(val, 0xcfc);
46}
47 15
48#endif 16#endif
diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h
index b47c3df9ed1d..14996d962bac 100644
--- a/include/asm-x86_64/pda.h
+++ b/include/asm-x86_64/pda.h
@@ -9,20 +9,24 @@
9 9
10/* Per processor datastructure. %gs points to it while the kernel runs */ 10/* Per processor datastructure. %gs points to it while the kernel runs */
11struct x8664_pda { 11struct x8664_pda {
12 struct task_struct *pcurrent; /* Current process */ 12 struct task_struct *pcurrent; /* 0 Current process */
13 unsigned long data_offset; /* Per cpu data offset from linker address */ 13 unsigned long data_offset; /* 8 Per cpu data offset from linker
14 unsigned long kernelstack; /* top of kernel stack for current */ 14 address */
15 unsigned long oldrsp; /* user rsp for system call */ 15 unsigned long kernelstack; /* 16 top of kernel stack for current */
16#if DEBUG_STKSZ > EXCEPTION_STKSZ 16 unsigned long oldrsp; /* 24 user rsp for system call */
17 unsigned long debugstack; /* #DB/#BP stack. */ 17 int irqcount; /* 32 Irq nesting counter. Starts with -1 */
18 int cpunumber; /* 36 Logical CPU number */
19#ifdef CONFIG_CC_STACKPROTECTOR
20 unsigned long stack_canary; /* 40 stack canary value */
21 /* gcc-ABI: this canary MUST be at
22 offset 40!!! */
18#endif 23#endif
19 int irqcount; /* Irq nesting counter. Starts with -1 */ 24 char *irqstackptr;
20 int cpunumber; /* Logical CPU number */
21 char *irqstackptr; /* top of irqstack */
22 int nodenumber; /* number of current node */ 25 int nodenumber; /* number of current node */
23 unsigned int __softirq_pending; 26 unsigned int __softirq_pending;
24 unsigned int __nmi_count; /* number of NMI on this CPUs */ 27 unsigned int __nmi_count; /* number of NMI on this CPUs */
25 int mmu_state; 28 short mmu_state;
29 short isidle;
26 struct mm_struct *active_mm; 30 struct mm_struct *active_mm;
27 unsigned apic_timer_irqs; 31 unsigned apic_timer_irqs;
28} ____cacheline_aligned_in_smp; 32} ____cacheline_aligned_in_smp;
@@ -36,44 +40,69 @@ extern struct x8664_pda boot_cpu_pda[];
36 * There is no fast way to get the base address of the PDA, all the accesses 40 * There is no fast way to get the base address of the PDA, all the accesses
37 * have to mention %fs/%gs. So it needs to be done this Torvaldian way. 41 * have to mention %fs/%gs. So it needs to be done this Torvaldian way.
38 */ 42 */
39#define sizeof_field(type,field) (sizeof(((type *)0)->field)) 43extern void __bad_pda_field(void) __attribute__((noreturn));
40#define typeof_field(type,field) typeof(((type *)0)->field)
41 44
42extern void __bad_pda_field(void); 45/*
46 * proxy_pda doesn't actually exist, but tell gcc it is accessed for
47 * all PDA accesses so it gets read/write dependencies right.
48 */
49extern struct x8664_pda _proxy_pda;
43 50
44#define pda_offset(field) offsetof(struct x8664_pda, field) 51#define pda_offset(field) offsetof(struct x8664_pda, field)
45 52
46#define pda_to_op(op,field,val) do { \ 53#define pda_to_op(op,field,val) do { \
47 typedef typeof_field(struct x8664_pda, field) T__; \ 54 typedef typeof(_proxy_pda.field) T__; \
48 switch (sizeof_field(struct x8664_pda, field)) { \ 55 if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \
49case 2: \ 56 switch (sizeof(_proxy_pda.field)) { \
50asm volatile(op "w %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ 57 case 2: \
51case 4: \ 58 asm(op "w %1,%%gs:%c2" : \
52asm volatile(op "l %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ 59 "+m" (_proxy_pda.field) : \
53case 8: \ 60 "ri" ((T__)val), \
54asm volatile(op "q %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ 61 "i"(pda_offset(field))); \
55 default: __bad_pda_field(); \ 62 break; \
56 } \ 63 case 4: \
64 asm(op "l %1,%%gs:%c2" : \
65 "+m" (_proxy_pda.field) : \
66 "ri" ((T__)val), \
67 "i" (pda_offset(field))); \
68 break; \
69 case 8: \
70 asm(op "q %1,%%gs:%c2": \
71 "+m" (_proxy_pda.field) : \
72 "ri" ((T__)val), \
73 "i"(pda_offset(field))); \
74 break; \
75 default: \
76 __bad_pda_field(); \
77 } \
57 } while (0) 78 } while (0)
58 79
59/* 80#define pda_from_op(op,field) ({ \
60 * AK: PDA read accesses should be neither volatile nor have an memory clobber. 81 typeof(_proxy_pda.field) ret__; \
61 * Unfortunately removing them causes all hell to break lose currently. 82 switch (sizeof(_proxy_pda.field)) { \
62 */ 83 case 2: \
63#define pda_from_op(op,field) ({ \ 84 asm(op "w %%gs:%c1,%0" : \
64 typeof_field(struct x8664_pda, field) ret__; \ 85 "=r" (ret__) : \
65 switch (sizeof_field(struct x8664_pda, field)) { \ 86 "i" (pda_offset(field)), \
66case 2: \ 87 "m" (_proxy_pda.field)); \
67asm volatile(op "w %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ 88 break; \
68case 4: \ 89 case 4: \
69asm volatile(op "l %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ 90 asm(op "l %%gs:%c1,%0": \
70case 8: \ 91 "=r" (ret__): \
71asm volatile(op "q %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ 92 "i" (pda_offset(field)), \
72 default: __bad_pda_field(); \ 93 "m" (_proxy_pda.field)); \
73 } \ 94 break; \
95 case 8: \
96 asm(op "q %%gs:%c1,%0": \
97 "=r" (ret__) : \
98 "i" (pda_offset(field)), \
99 "m" (_proxy_pda.field)); \
100 break; \
101 default: \
102 __bad_pda_field(); \
103 } \
74 ret__; }) 104 ret__; })
75 105
76
77#define read_pda(field) pda_from_op("mov",field) 106#define read_pda(field) pda_from_op("mov",field)
78#define write_pda(field,val) pda_to_op("mov",field,val) 107#define write_pda(field,val) pda_to_op("mov",field,val)
79#define add_pda(field,val) pda_to_op("add",field,val) 108#define add_pda(field,val) pda_to_op("add",field,val)
diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h
index bffb2f886a51..285756010c51 100644
--- a/include/asm-x86_64/percpu.h
+++ b/include/asm-x86_64/percpu.h
@@ -11,6 +11,16 @@
11 11
12#include <asm/pda.h> 12#include <asm/pda.h>
13 13
14#ifdef CONFIG_MODULES
15# define PERCPU_MODULE_RESERVE 8192
16#else
17# define PERCPU_MODULE_RESERVE 0
18#endif
19
20#define PERCPU_ENOUGH_ROOM \
21 (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \
22 PERCPU_MODULE_RESERVE)
23
14#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) 24#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
15#define __my_cpu_offset() read_pda(data_offset) 25#define __my_cpu_offset() read_pda(data_offset)
16 26
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index 51eba2395171..6899e770b173 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -21,12 +21,9 @@ extern unsigned long __supported_pte_mask;
21 21
22#define swapper_pg_dir init_level4_pgt 22#define swapper_pg_dir init_level4_pgt
23 23
24extern int nonx_setup(char *str);
25extern void paging_init(void); 24extern void paging_init(void);
26extern void clear_kernel_mapping(unsigned long addr, unsigned long size); 25extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
27 26
28extern unsigned long pgkern_mask;
29
30/* 27/*
31 * ZERO_PAGE is a global shared page that is always zero: used 28 * ZERO_PAGE is a global shared page that is always zero: used
32 * for zero-mapped memory areas etc.. 29 * for zero-mapped memory areas etc..
@@ -265,7 +262,7 @@ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
265#define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT) 262#define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
266static inline int pte_user(pte_t pte) { return pte_val(pte) & _PAGE_USER; } 263static inline int pte_user(pte_t pte) { return pte_val(pte) & _PAGE_USER; }
267static inline int pte_read(pte_t pte) { return pte_val(pte) & _PAGE_USER; } 264static inline int pte_read(pte_t pte) { return pte_val(pte) & _PAGE_USER; }
268static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_USER; } 265static inline int pte_exec(pte_t pte) { return !(pte_val(pte) & _PAGE_NX); }
269static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } 266static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
270static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } 267static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
271static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } 268static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
@@ -278,11 +275,12 @@ static inline pte_t pte_mkclean(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) &
278static inline pte_t pte_mkold(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED)); return pte; } 275static inline pte_t pte_mkold(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED)); return pte; }
279static inline pte_t pte_wrprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW)); return pte; } 276static inline pte_t pte_wrprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW)); return pte; }
280static inline pte_t pte_mkread(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER)); return pte; } 277static inline pte_t pte_mkread(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER)); return pte; }
281static inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER)); return pte; } 278static inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_NX)); return pte; }
282static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } 279static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; }
283static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } 280static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; }
284static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } 281static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; }
285static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_PSE)); return pte; } 282static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_PSE)); return pte; }
283static inline pte_t pte_clrhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_PSE)); return pte; }
286 284
287struct vm_area_struct; 285struct vm_area_struct;
288 286
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index 038fe1f47e6f..b73d0c76613c 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -51,10 +51,8 @@ extern unsigned long long monotonic_base;
51extern int sysctl_vsyscall; 51extern int sysctl_vsyscall;
52extern int nohpet; 52extern int nohpet;
53extern unsigned long vxtime_hz; 53extern unsigned long vxtime_hz;
54extern void time_init_gtod(void);
54 55
55extern int numa_setup(char *opt);
56
57extern int setup_early_printk(char *);
58extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2))); 56extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
59 57
60extern void early_identify_cpu(struct cpuinfo_x86 *c); 58extern void early_identify_cpu(struct cpuinfo_x86 *c);
@@ -91,7 +89,7 @@ extern void syscall32_cpu_init(void);
91 89
92extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); 90extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
93 91
94extern void check_ioapic(void); 92extern void early_quirks(void);
95extern void check_efer(void); 93extern void check_efer(void);
96 94
97extern int unhandled_signal(struct task_struct *tsk, int sig); 95extern int unhandled_signal(struct task_struct *tsk, int sig);
@@ -103,13 +101,7 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c);
103extern unsigned long table_start, table_end; 101extern unsigned long table_start, table_end;
104 102
105extern int exception_trace; 103extern int exception_trace;
106extern int using_apic_timer;
107extern int disable_apic;
108extern unsigned cpu_khz; 104extern unsigned cpu_khz;
109extern int ioapic_force;
110extern int skip_ioapic_setup;
111extern int acpi_ht;
112extern int acpi_disabled;
113 105
114extern void no_iommu_init(void); 106extern void no_iommu_init(void);
115extern int force_iommu, no_iommu; 107extern int force_iommu, no_iommu;
@@ -131,7 +123,8 @@ extern int fix_aperture;
131 123
132extern int reboot_force; 124extern int reboot_force;
133extern int notsc_setup(char *); 125extern int notsc_setup(char *);
134extern int setup_additional_cpus(char *); 126
127extern int gsi_irq_sharing(int gsi);
135 128
136extern void smp_local_timer_interrupt(struct pt_regs * regs); 129extern void smp_local_timer_interrupt(struct pt_regs * regs);
137 130
diff --git a/include/asm-x86_64/rwlock.h b/include/asm-x86_64/rwlock.h
index dea0e9459264..72aeebed920b 100644
--- a/include/asm-x86_64/rwlock.h
+++ b/include/asm-x86_64/rwlock.h
@@ -18,69 +18,9 @@
18#ifndef _ASM_X86_64_RWLOCK_H 18#ifndef _ASM_X86_64_RWLOCK_H
19#define _ASM_X86_64_RWLOCK_H 19#define _ASM_X86_64_RWLOCK_H
20 20
21#include <linux/stringify.h>
22
23#define RW_LOCK_BIAS 0x01000000 21#define RW_LOCK_BIAS 0x01000000
24#define RW_LOCK_BIAS_STR "0x01000000" 22#define RW_LOCK_BIAS_STR "0x01000000"
25
26#define __build_read_lock_ptr(rw, helper) \
27 asm volatile(LOCK_PREFIX "subl $1,(%0)\n\t" \
28 "js 2f\n" \
29 "1:\n" \
30 LOCK_SECTION_START("") \
31 "2:\tcall " helper "\n\t" \
32 "jmp 1b\n" \
33 LOCK_SECTION_END \
34 ::"a" (rw) : "memory")
35
36#define __build_read_lock_const(rw, helper) \
37 asm volatile(LOCK_PREFIX "subl $1,%0\n\t" \
38 "js 2f\n" \
39 "1:\n" \
40 LOCK_SECTION_START("") \
41 "2:\tpushq %%rax\n\t" \
42 "leaq %0,%%rax\n\t" \
43 "call " helper "\n\t" \
44 "popq %%rax\n\t" \
45 "jmp 1b\n" \
46 LOCK_SECTION_END \
47 :"=m" (*((volatile int *)rw))::"memory")
48
49#define __build_read_lock(rw, helper) do { \
50 if (__builtin_constant_p(rw)) \
51 __build_read_lock_const(rw, helper); \
52 else \
53 __build_read_lock_ptr(rw, helper); \
54 } while (0)
55
56#define __build_write_lock_ptr(rw, helper) \
57 asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
58 "jnz 2f\n" \
59 "1:\n" \
60 LOCK_SECTION_START("") \
61 "2:\tcall " helper "\n\t" \
62 "jmp 1b\n" \
63 LOCK_SECTION_END \
64 ::"a" (rw) : "memory")
65
66#define __build_write_lock_const(rw, helper) \
67 asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \
68 "jnz 2f\n" \
69 "1:\n" \
70 LOCK_SECTION_START("") \
71 "2:\tpushq %%rax\n\t" \
72 "leaq %0,%%rax\n\t" \
73 "call " helper "\n\t" \
74 "popq %%rax\n\t" \
75 "jmp 1b\n" \
76 LOCK_SECTION_END \
77 :"=m" (*((volatile long *)rw))::"memory")
78 23
79#define __build_write_lock(rw, helper) do { \ 24/* Actual code is in asm/spinlock.h or in arch/x86_64/lib/rwlock.S */
80 if (__builtin_constant_p(rw)) \
81 __build_write_lock_const(rw, helper); \
82 else \
83 __build_write_lock_ptr(rw, helper); \
84 } while (0)
85 25
86#endif 26#endif
diff --git a/include/asm-x86_64/segment.h b/include/asm-x86_64/segment.h
index d4bed33fb32c..334ddcdd8f92 100644
--- a/include/asm-x86_64/segment.h
+++ b/include/asm-x86_64/segment.h
@@ -20,15 +20,16 @@
20#define __USER_CS 0x33 /* 6*8+3 */ 20#define __USER_CS 0x33 /* 6*8+3 */
21#define __USER32_DS __USER_DS 21#define __USER32_DS __USER_DS
22 22
23#define GDT_ENTRY_TLS 1
24#define GDT_ENTRY_TSS 8 /* needs two entries */ 23#define GDT_ENTRY_TSS 8 /* needs two entries */
25#define GDT_ENTRY_LDT 10 /* needs two entries */ 24#define GDT_ENTRY_LDT 10 /* needs two entries */
26#define GDT_ENTRY_TLS_MIN 12 25#define GDT_ENTRY_TLS_MIN 12
27#define GDT_ENTRY_TLS_MAX 14 26#define GDT_ENTRY_TLS_MAX 14
28/* 15 free */
29 27
30#define GDT_ENTRY_TLS_ENTRIES 3 28#define GDT_ENTRY_TLS_ENTRIES 3
31 29
30#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */
31#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3)
32
32/* TLS indexes for 64bit - hardcoded in arch_prctl */ 33/* TLS indexes for 64bit - hardcoded in arch_prctl */
33#define FS_TLS 0 34#define FS_TLS 0
34#define GS_TLS 1 35#define GS_TLS 1
diff --git a/include/asm-x86_64/semaphore.h b/include/asm-x86_64/semaphore.h
index 064df08b9a0f..107bd90429e8 100644
--- a/include/asm-x86_64/semaphore.h
+++ b/include/asm-x86_64/semaphore.h
@@ -107,12 +107,9 @@ static inline void down(struct semaphore * sem)
107 __asm__ __volatile__( 107 __asm__ __volatile__(
108 "# atomic down operation\n\t" 108 "# atomic down operation\n\t"
109 LOCK_PREFIX "decl %0\n\t" /* --sem->count */ 109 LOCK_PREFIX "decl %0\n\t" /* --sem->count */
110 "js 2f\n" 110 "jns 1f\n\t"
111 "1:\n" 111 "call __down_failed\n"
112 LOCK_SECTION_START("") 112 "1:"
113 "2:\tcall __down_failed\n\t"
114 "jmp 1b\n"
115 LOCK_SECTION_END
116 :"=m" (sem->count) 113 :"=m" (sem->count)
117 :"D" (sem) 114 :"D" (sem)
118 :"memory"); 115 :"memory");
@@ -130,14 +127,11 @@ static inline int down_interruptible(struct semaphore * sem)
130 127
131 __asm__ __volatile__( 128 __asm__ __volatile__(
132 "# atomic interruptible down operation\n\t" 129 "# atomic interruptible down operation\n\t"
130 "xorl %0,%0\n\t"
133 LOCK_PREFIX "decl %1\n\t" /* --sem->count */ 131 LOCK_PREFIX "decl %1\n\t" /* --sem->count */
134 "js 2f\n\t" 132 "jns 2f\n\t"
135 "xorl %0,%0\n" 133 "call __down_failed_interruptible\n"
136 "1:\n" 134 "2:\n"
137 LOCK_SECTION_START("")
138 "2:\tcall __down_failed_interruptible\n\t"
139 "jmp 1b\n"
140 LOCK_SECTION_END
141 :"=a" (result), "=m" (sem->count) 135 :"=a" (result), "=m" (sem->count)
142 :"D" (sem) 136 :"D" (sem)
143 :"memory"); 137 :"memory");
@@ -154,14 +148,11 @@ static inline int down_trylock(struct semaphore * sem)
154 148
155 __asm__ __volatile__( 149 __asm__ __volatile__(
156 "# atomic interruptible down operation\n\t" 150 "# atomic interruptible down operation\n\t"
151 "xorl %0,%0\n\t"
157 LOCK_PREFIX "decl %1\n\t" /* --sem->count */ 152 LOCK_PREFIX "decl %1\n\t" /* --sem->count */
158 "js 2f\n\t" 153 "jns 2f\n\t"
159 "xorl %0,%0\n" 154 "call __down_failed_trylock\n\t"
160 "1:\n" 155 "2:\n"
161 LOCK_SECTION_START("")
162 "2:\tcall __down_failed_trylock\n\t"
163 "jmp 1b\n"
164 LOCK_SECTION_END
165 :"=a" (result), "=m" (sem->count) 156 :"=a" (result), "=m" (sem->count)
166 :"D" (sem) 157 :"D" (sem)
167 :"memory","cc"); 158 :"memory","cc");
@@ -179,12 +170,9 @@ static inline void up(struct semaphore * sem)
179 __asm__ __volatile__( 170 __asm__ __volatile__(
180 "# atomic up operation\n\t" 171 "# atomic up operation\n\t"
181 LOCK_PREFIX "incl %0\n\t" /* ++sem->count */ 172 LOCK_PREFIX "incl %0\n\t" /* ++sem->count */
182 "jle 2f\n" 173 "jg 1f\n\t"
183 "1:\n" 174 "call __up_wakeup\n"
184 LOCK_SECTION_START("") 175 "1:"
185 "2:\tcall __up_wakeup\n\t"
186 "jmp 1b\n"
187 LOCK_SECTION_END
188 :"=m" (sem->count) 176 :"=m" (sem->count)
189 :"D" (sem) 177 :"D" (sem)
190 :"memory"); 178 :"memory");
diff --git a/include/asm-x86_64/signal.h b/include/asm-x86_64/signal.h
index 3ede2a61973a..4581f978b299 100644
--- a/include/asm-x86_64/signal.h
+++ b/include/asm-x86_64/signal.h
@@ -24,10 +24,6 @@ typedef struct {
24} sigset_t; 24} sigset_t;
25 25
26 26
27struct pt_regs;
28asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
29
30
31#else 27#else
32/* Here we must cater to libcs that poke about in kernel headers. */ 28/* Here we must cater to libcs that poke about in kernel headers. */
33 29
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index ce97f65e1d10..d6b7c057edba 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -4,27 +4,18 @@
4/* 4/*
5 * We need the APIC definitions automatically as part of 'smp.h' 5 * We need the APIC definitions automatically as part of 'smp.h'
6 */ 6 */
7#ifndef __ASSEMBLY__
8#include <linux/threads.h> 7#include <linux/threads.h>
9#include <linux/cpumask.h> 8#include <linux/cpumask.h>
10#include <linux/bitops.h> 9#include <linux/bitops.h>
11extern int disable_apic; 10extern int disable_apic;
12#endif
13 11
14#ifdef CONFIG_X86_LOCAL_APIC
15#ifndef __ASSEMBLY__
16#include <asm/fixmap.h> 12#include <asm/fixmap.h>
17#include <asm/mpspec.h> 13#include <asm/mpspec.h>
18#ifdef CONFIG_X86_IO_APIC
19#include <asm/io_apic.h> 14#include <asm/io_apic.h>
20#endif
21#include <asm/apic.h> 15#include <asm/apic.h>
22#include <asm/thread_info.h> 16#include <asm/thread_info.h>
23#endif
24#endif
25 17
26#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
27#ifndef ASSEMBLY
28 19
29#include <asm/pda.h> 20#include <asm/pda.h>
30 21
@@ -42,7 +33,6 @@ extern cpumask_t cpu_initialized;
42 33
43extern void smp_alloc_memory(void); 34extern void smp_alloc_memory(void);
44extern volatile unsigned long smp_invalidate_needed; 35extern volatile unsigned long smp_invalidate_needed;
45extern int pic_mode;
46extern void lock_ipi_call_lock(void); 36extern void lock_ipi_call_lock(void);
47extern void unlock_ipi_call_lock(void); 37extern void unlock_ipi_call_lock(void);
48extern int smp_num_siblings; 38extern int smp_num_siblings;
@@ -74,20 +64,16 @@ static inline int hard_smp_processor_id(void)
74 return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); 64 return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
75} 65}
76 66
77extern int safe_smp_processor_id(void);
78extern int __cpu_disable(void); 67extern int __cpu_disable(void);
79extern void __cpu_die(unsigned int cpu); 68extern void __cpu_die(unsigned int cpu);
80extern void prefill_possible_map(void); 69extern void prefill_possible_map(void);
81extern unsigned num_processors; 70extern unsigned num_processors;
82extern unsigned disabled_cpus; 71extern unsigned disabled_cpus;
83 72
84#endif /* !ASSEMBLY */
85
86#define NO_PROC_ID 0xFF /* No processor magic marker */ 73#define NO_PROC_ID 0xFF /* No processor magic marker */
87 74
88#endif 75#endif
89 76
90#ifndef ASSEMBLY
91/* 77/*
92 * Some lowlevel functions might want to know about 78 * Some lowlevel functions might want to know about
93 * the real APIC ID <-> CPU # mapping. 79 * the real APIC ID <-> CPU # mapping.
@@ -109,11 +95,8 @@ static inline int cpu_present_to_apicid(int mps_cpu)
109 return BAD_APICID; 95 return BAD_APICID;
110} 96}
111 97
112#endif /* !ASSEMBLY */
113
114#ifndef CONFIG_SMP 98#ifndef CONFIG_SMP
115#define stack_smp_processor_id() 0 99#define stack_smp_processor_id() 0
116#define safe_smp_processor_id() 0
117#define cpu_logical_map(x) (x) 100#define cpu_logical_map(x) (x)
118#else 101#else
119#include <asm/thread_info.h> 102#include <asm/thread_info.h>
@@ -125,19 +108,23 @@ static inline int cpu_present_to_apicid(int mps_cpu)
125}) 108})
126#endif 109#endif
127 110
128#ifndef __ASSEMBLY__
129static __inline int logical_smp_processor_id(void) 111static __inline int logical_smp_processor_id(void)
130{ 112{
131 /* we don't want to mark this access volatile - bad code generation */ 113 /* we don't want to mark this access volatile - bad code generation */
132 return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); 114 return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
133} 115}
134#endif
135 116
136#ifdef CONFIG_SMP 117#ifdef CONFIG_SMP
137#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] 118#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
138#else 119#else
139#define cpu_physical_id(cpu) boot_cpu_id 120#define cpu_physical_id(cpu) boot_cpu_id
140#endif 121static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
141 122 void *info, int retry, int wait)
123{
124 /* Disable interrupts here? */
125 func(info);
126 return 0;
127}
128#endif /* !CONFIG_SMP */
142#endif 129#endif
143 130
diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h
index 248a79f0eaff..be7a9e629fb2 100644
--- a/include/asm-x86_64/spinlock.h
+++ b/include/asm-x86_64/spinlock.h
@@ -16,31 +16,23 @@
16 * (the type definitions are in asm/spinlock_types.h) 16 * (the type definitions are in asm/spinlock_types.h)
17 */ 17 */
18 18
19#define __raw_spin_is_locked(x) \ 19static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
20 (*(volatile signed int *)(&(x)->slock) <= 0) 20{
21 21 return *(volatile signed int *)(&(lock)->slock) <= 0;
22#define __raw_spin_lock_string \ 22}
23 "\n1:\t" \
24 LOCK_PREFIX " ; decl %0\n\t" \
25 "js 2f\n" \
26 LOCK_SECTION_START("") \
27 "2:\t" \
28 "rep;nop\n\t" \
29 "cmpl $0,%0\n\t" \
30 "jle 2b\n\t" \
31 "jmp 1b\n" \
32 LOCK_SECTION_END
33
34#define __raw_spin_lock_string_up \
35 "\n\tdecl %0"
36
37#define __raw_spin_unlock_string \
38 "movl $1,%0" \
39 :"=m" (lock->slock) : : "memory"
40 23
41static inline void __raw_spin_lock(raw_spinlock_t *lock) 24static inline void __raw_spin_lock(raw_spinlock_t *lock)
42{ 25{
43 asm volatile(__raw_spin_lock_string : "=m" (lock->slock) : : "memory"); 26 asm volatile(
27 "\n1:\t"
28 LOCK_PREFIX " ; decl %0\n\t"
29 "jns 2f\n"
30 "3:\n"
31 "rep;nop\n\t"
32 "cmpl $0,%0\n\t"
33 "jle 3b\n\t"
34 "jmp 1b\n"
35 "2:\t" : "=m" (lock->slock) : : "memory");
44} 36}
45 37
46#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) 38#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
@@ -49,7 +41,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
49{ 41{
50 int oldval; 42 int oldval;
51 43
52 __asm__ __volatile__( 44 asm volatile(
53 "xchgl %0,%1" 45 "xchgl %0,%1"
54 :"=q" (oldval), "=m" (lock->slock) 46 :"=q" (oldval), "=m" (lock->slock)
55 :"0" (0) : "memory"); 47 :"0" (0) : "memory");
@@ -59,13 +51,14 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
59 51
60static inline void __raw_spin_unlock(raw_spinlock_t *lock) 52static inline void __raw_spin_unlock(raw_spinlock_t *lock)
61{ 53{
62 __asm__ __volatile__( 54 asm volatile("movl $1,%0" :"=m" (lock->slock) :: "memory");
63 __raw_spin_unlock_string
64 );
65} 55}
66 56
67#define __raw_spin_unlock_wait(lock) \ 57static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
68 do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0) 58{
59 while (__raw_spin_is_locked(lock))
60 cpu_relax();
61}
69 62
70/* 63/*
71 * Read-write spinlocks, allowing multiple readers 64 * Read-write spinlocks, allowing multiple readers
@@ -79,26 +72,34 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
79 * 72 *
80 * On x86, we implement read-write locks as a 32-bit counter 73 * On x86, we implement read-write locks as a 32-bit counter
81 * with the high bit (sign) being the "contended" bit. 74 * with the high bit (sign) being the "contended" bit.
82 *
83 * The inline assembly is non-obvious. Think about it.
84 *
85 * Changed to use the same technique as rw semaphores. See
86 * semaphore.h for details. -ben
87 *
88 * the helpers are in arch/i386/kernel/semaphore.c
89 */ 75 */
90 76
91#define __raw_read_can_lock(x) ((int)(x)->lock > 0) 77static inline int __raw_read_can_lock(raw_rwlock_t *lock)
92#define __raw_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) 78{
79 return (int)(lock)->lock > 0;
80}
81
82static inline int __raw_write_can_lock(raw_rwlock_t *lock)
83{
84 return (lock)->lock == RW_LOCK_BIAS;
85}
93 86
94static inline void __raw_read_lock(raw_rwlock_t *rw) 87static inline void __raw_read_lock(raw_rwlock_t *rw)
95{ 88{
96 __build_read_lock(rw, "__read_lock_failed"); 89 asm volatile(LOCK_PREFIX "subl $1,(%0)\n\t"
90 "jns 1f\n"
91 "call __read_lock_failed\n"
92 "1:\n"
93 ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory");
97} 94}
98 95
99static inline void __raw_write_lock(raw_rwlock_t *rw) 96static inline void __raw_write_lock(raw_rwlock_t *rw)
100{ 97{
101 __build_write_lock(rw, "__write_lock_failed"); 98 asm volatile(LOCK_PREFIX "subl %1,(%0)\n\t"
99 "jz 1f\n"
100 "\tcall __write_lock_failed\n\t"
101 "1:\n"
102 ::"D" (rw), "i" (RW_LOCK_BIAS) : "memory");
102} 103}
103 104
104static inline int __raw_read_trylock(raw_rwlock_t *lock) 105static inline int __raw_read_trylock(raw_rwlock_t *lock)
diff --git a/include/asm-x86_64/stacktrace.h b/include/asm-x86_64/stacktrace.h
new file mode 100644
index 000000000000..5eb9799bef76
--- /dev/null
+++ b/include/asm-x86_64/stacktrace.h
@@ -0,0 +1,18 @@
1#ifndef _ASM_STACKTRACE_H
2#define _ASM_STACKTRACE_H 1
3
4/* Generic stack tracer with callbacks */
5
6struct stacktrace_ops {
7 void (*warning)(void *data, char *msg);
8 /* msg must contain %s for the symbol */
9 void (*warning_symbol)(void *data, char *msg, unsigned long symbol);
10 void (*address)(void *data, unsigned long address);
11 /* On negative return stop dumping */
12 int (*stack)(void *data, char *name);
13};
14
15void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack,
16 struct stacktrace_ops *ops, void *data);
17
18#endif
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h
index 6bf170bceae1..bd376bc8c4ab 100644
--- a/include/asm-x86_64/system.h
+++ b/include/asm-x86_64/system.h
@@ -14,12 +14,13 @@
14#define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" 14#define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
15 15
16/* frame pointer must be last for get_wchan */ 16/* frame pointer must be last for get_wchan */
17#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t" 17#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
18#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\n\t" 18#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
19 19
20#define __EXTRA_CLOBBER \ 20#define __EXTRA_CLOBBER \
21 ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" 21 ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
22 22
23/* Save restore flags to clear handle leaking NT */
23#define switch_to(prev,next,last) \ 24#define switch_to(prev,next,last) \
24 asm volatile(SAVE_CONTEXT \ 25 asm volatile(SAVE_CONTEXT \
25 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ 26 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
diff --git a/include/asm-x86_64/tce.h b/include/asm-x86_64/tce.h
index 53e9a68b3336..dbb047febc5e 100644
--- a/include/asm-x86_64/tce.h
+++ b/include/asm-x86_64/tce.h
@@ -24,7 +24,6 @@
24#ifndef _ASM_X86_64_TCE_H 24#ifndef _ASM_X86_64_TCE_H
25#define _ASM_X86_64_TCE_H 25#define _ASM_X86_64_TCE_H
26 26
27extern void* tce_table_kva[];
28extern unsigned int specified_table_size; 27extern unsigned int specified_table_size;
29struct iommu_table; 28struct iommu_table;
30 29
diff --git a/include/asm-x86_64/therm_throt.h b/include/asm-x86_64/therm_throt.h
new file mode 100644
index 000000000000..5aac059007ba
--- /dev/null
+++ b/include/asm-x86_64/therm_throt.h
@@ -0,0 +1 @@
#include <asm-i386/therm_throt.h>
diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h
index 2029b00351f3..787a08114b48 100644
--- a/include/asm-x86_64/thread_info.h
+++ b/include/asm-x86_64/thread_info.h
@@ -114,11 +114,14 @@ static inline struct thread_info *stack_thread_info(void)
114#define TIF_IRET 5 /* force IRET */ 114#define TIF_IRET 5 /* force IRET */
115#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ 115#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
116#define TIF_SECCOMP 8 /* secure computing */ 116#define TIF_SECCOMP 8 /* secure computing */
117#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
117/* 16 free */ 118/* 16 free */
118#define TIF_IA32 17 /* 32bit process */ 119#define TIF_IA32 17 /* 32bit process */
119#define TIF_FORK 18 /* ret_from_fork */ 120#define TIF_FORK 18 /* ret_from_fork */
120#define TIF_ABI_PENDING 19 121#define TIF_ABI_PENDING 19
121#define TIF_MEMDIE 20 122#define TIF_MEMDIE 20
123#define TIF_DEBUG 21 /* uses debug registers */
124#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
122 125
123#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) 126#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
124#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) 127#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
@@ -128,9 +131,12 @@ static inline struct thread_info *stack_thread_info(void)
128#define _TIF_IRET (1<<TIF_IRET) 131#define _TIF_IRET (1<<TIF_IRET)
129#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) 132#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
130#define _TIF_SECCOMP (1<<TIF_SECCOMP) 133#define _TIF_SECCOMP (1<<TIF_SECCOMP)
134#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
131#define _TIF_IA32 (1<<TIF_IA32) 135#define _TIF_IA32 (1<<TIF_IA32)
132#define _TIF_FORK (1<<TIF_FORK) 136#define _TIF_FORK (1<<TIF_FORK)
133#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING) 137#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
138#define _TIF_DEBUG (1<<TIF_DEBUG)
139#define _TIF_IO_BITMAP (1<<TIF_IO_BITMAP)
134 140
135/* work to do on interrupt/exception return */ 141/* work to do on interrupt/exception return */
136#define _TIF_WORK_MASK \ 142#define _TIF_WORK_MASK \
@@ -138,6 +144,9 @@ static inline struct thread_info *stack_thread_info(void)
138/* work to do on any return to user space */ 144/* work to do on any return to user space */
139#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) 145#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
140 146
147/* flags to check in __switch_to() */
148#define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP)
149
141#define PREEMPT_ACTIVE 0x10000000 150#define PREEMPT_ACTIVE 0x10000000
142 151
143/* 152/*
diff --git a/include/asm-x86_64/tlbflush.h b/include/asm-x86_64/tlbflush.h
index d16d5b60f419..983bd296c81a 100644
--- a/include/asm-x86_64/tlbflush.h
+++ b/include/asm-x86_64/tlbflush.h
@@ -4,44 +4,44 @@
4#include <linux/mm.h> 4#include <linux/mm.h>
5#include <asm/processor.h> 5#include <asm/processor.h>
6 6
7#define __flush_tlb() \ 7static inline unsigned long get_cr3(void)
8 do { \ 8{
9 unsigned long tmpreg; \ 9 unsigned long cr3;
10 \ 10 asm volatile("mov %%cr3,%0" : "=r" (cr3));
11 __asm__ __volatile__( \ 11 return cr3;
12 "movq %%cr3, %0; # flush TLB \n" \ 12}
13 "movq %0, %%cr3; \n" \
14 : "=r" (tmpreg) \
15 :: "memory"); \
16 } while (0)
17 13
18/* 14static inline void set_cr3(unsigned long cr3)
19 * Global pages have to be flushed a bit differently. Not a real 15{
20 * performance problem because this does not happen often. 16 asm volatile("mov %0,%%cr3" :: "r" (cr3) : "memory");
21 */ 17}
22#define __flush_tlb_global() \ 18
23 do { \ 19static inline void __flush_tlb(void)
24 unsigned long tmpreg, cr4, cr4_orig; \ 20{
25 \ 21 set_cr3(get_cr3());
26 __asm__ __volatile__( \ 22}
27 "movq %%cr4, %2; # turn off PGE \n" \ 23
28 "movq %2, %1; \n" \ 24static inline unsigned long get_cr4(void)
29 "andq %3, %1; \n" \ 25{
30 "movq %1, %%cr4; \n" \ 26 unsigned long cr4;
31 "movq %%cr3, %0; # flush TLB \n" \ 27 asm volatile("mov %%cr4,%0" : "=r" (cr4));
32 "movq %0, %%cr3; \n" \ 28 return cr4;
33 "movq %2, %%cr4; # turn PGE back on \n" \ 29}
34 : "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig) \ 30
35 : "i" (~X86_CR4_PGE) \ 31static inline void set_cr4(unsigned long cr4)
36 : "memory"); \ 32{
37 } while (0) 33 asm volatile("mov %0,%%cr4" :: "r" (cr4) : "memory");
38 34}
39extern unsigned long pgkern_mask; 35
40 36static inline void __flush_tlb_all(void)
41#define __flush_tlb_all() __flush_tlb_global() 37{
38 unsigned long cr4 = get_cr4();
39 set_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */
40 set_cr4(cr4); /* write old PGE again and flush TLBs */
41}
42 42
43#define __flush_tlb_one(addr) \ 43#define __flush_tlb_one(addr) \
44 __asm__ __volatile__("invlpg %0": :"m" (*(char *) addr)) 44 __asm__ __volatile__("invlpg (%0)" :: "r" (addr) : "memory")
45 45
46 46
47/* 47/*
diff --git a/include/asm-x86_64/uaccess.h b/include/asm-x86_64/uaccess.h
index 1e1fa003daa3..e856570c0689 100644
--- a/include/asm-x86_64/uaccess.h
+++ b/include/asm-x86_64/uaccess.h
@@ -84,7 +84,7 @@ struct exception_table_entry
84 */ 84 */
85 85
86#define __get_user_x(size,ret,x,ptr) \ 86#define __get_user_x(size,ret,x,ptr) \
87 __asm__ __volatile__("call __get_user_" #size \ 87 asm volatile("call __get_user_" #size \
88 :"=a" (ret),"=d" (x) \ 88 :"=a" (ret),"=d" (x) \
89 :"c" (ptr) \ 89 :"c" (ptr) \
90 :"r8") 90 :"r8")
@@ -101,7 +101,7 @@ struct exception_table_entry
101 case 8: __get_user_x(8,__ret_gu,__val_gu,ptr); break; \ 101 case 8: __get_user_x(8,__ret_gu,__val_gu,ptr); break; \
102 default: __get_user_bad(); break; \ 102 default: __get_user_bad(); break; \
103 } \ 103 } \
104 (x) = (__typeof__(*(ptr)))__val_gu; \ 104 (x) = (typeof(*(ptr)))__val_gu; \
105 __ret_gu; \ 105 __ret_gu; \
106}) 106})
107 107
@@ -112,7 +112,7 @@ extern void __put_user_8(void);
112extern void __put_user_bad(void); 112extern void __put_user_bad(void);
113 113
114#define __put_user_x(size,ret,x,ptr) \ 114#define __put_user_x(size,ret,x,ptr) \
115 __asm__ __volatile__("call __put_user_" #size \ 115 asm volatile("call __put_user_" #size \
116 :"=a" (ret) \ 116 :"=a" (ret) \
117 :"c" (ptr),"d" (x) \ 117 :"c" (ptr),"d" (x) \
118 :"r8") 118 :"r8")
@@ -139,7 +139,7 @@ extern void __put_user_bad(void);
139#define __put_user_check(x,ptr,size) \ 139#define __put_user_check(x,ptr,size) \
140({ \ 140({ \
141 int __pu_err; \ 141 int __pu_err; \
142 __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ 142 typeof(*(ptr)) __user *__pu_addr = (ptr); \
143 switch (size) { \ 143 switch (size) { \
144 case 1: __put_user_x(1,__pu_err,x,__pu_addr); break; \ 144 case 1: __put_user_x(1,__pu_err,x,__pu_addr); break; \
145 case 2: __put_user_x(2,__pu_err,x,__pu_addr); break; \ 145 case 2: __put_user_x(2,__pu_err,x,__pu_addr); break; \
@@ -173,7 +173,7 @@ struct __large_struct { unsigned long buf[100]; };
173 * aliasing issues. 173 * aliasing issues.
174 */ 174 */
175#define __put_user_asm(x, addr, err, itype, rtype, ltype, errno) \ 175#define __put_user_asm(x, addr, err, itype, rtype, ltype, errno) \
176 __asm__ __volatile__( \ 176 asm volatile( \
177 "1: mov"itype" %"rtype"1,%2\n" \ 177 "1: mov"itype" %"rtype"1,%2\n" \
178 "2:\n" \ 178 "2:\n" \
179 ".section .fixup,\"ax\"\n" \ 179 ".section .fixup,\"ax\"\n" \
@@ -193,7 +193,7 @@ struct __large_struct { unsigned long buf[100]; };
193 int __gu_err; \ 193 int __gu_err; \
194 unsigned long __gu_val; \ 194 unsigned long __gu_val; \
195 __get_user_size(__gu_val,(ptr),(size),__gu_err); \ 195 __get_user_size(__gu_val,(ptr),(size),__gu_err); \
196 (x) = (__typeof__(*(ptr)))__gu_val; \ 196 (x) = (typeof(*(ptr)))__gu_val; \
197 __gu_err; \ 197 __gu_err; \
198}) 198})
199 199
@@ -217,7 +217,7 @@ do { \
217} while (0) 217} while (0)
218 218
219#define __get_user_asm(x, addr, err, itype, rtype, ltype, errno) \ 219#define __get_user_asm(x, addr, err, itype, rtype, ltype, errno) \
220 __asm__ __volatile__( \ 220 asm volatile( \
221 "1: mov"itype" %2,%"rtype"1\n" \ 221 "1: mov"itype" %2,%"rtype"1\n" \
222 "2:\n" \ 222 "2:\n" \
223 ".section .fixup,\"ax\"\n" \ 223 ".section .fixup,\"ax\"\n" \
@@ -237,15 +237,20 @@ do { \
237 */ 237 */
238 238
239/* Handles exceptions in both to and from, but doesn't do access_ok */ 239/* Handles exceptions in both to and from, but doesn't do access_ok */
240extern unsigned long copy_user_generic(void *to, const void *from, unsigned len); 240__must_check unsigned long
241 241copy_user_generic(void *to, const void *from, unsigned len);
242extern unsigned long copy_to_user(void __user *to, const void *from, unsigned len); 242
243extern unsigned long copy_from_user(void *to, const void __user *from, unsigned len); 243__must_check unsigned long
244extern unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len); 244copy_to_user(void __user *to, const void *from, unsigned len);
245 245__must_check unsigned long
246static __always_inline int __copy_from_user(void *dst, const void __user *src, unsigned size) 246copy_from_user(void *to, const void __user *from, unsigned len);
247__must_check unsigned long
248copy_in_user(void __user *to, const void __user *from, unsigned len);
249
250static __always_inline __must_check
251int __copy_from_user(void *dst, const void __user *src, unsigned size)
247{ 252{
248 int ret = 0; 253 int ret = 0;
249 if (!__builtin_constant_p(size)) 254 if (!__builtin_constant_p(size))
250 return copy_user_generic(dst,(__force void *)src,size); 255 return copy_user_generic(dst,(__force void *)src,size);
251 switch (size) { 256 switch (size) {
@@ -272,9 +277,10 @@ static __always_inline int __copy_from_user(void *dst, const void __user *src, u
272 } 277 }
273} 278}
274 279
275static __always_inline int __copy_to_user(void __user *dst, const void *src, unsigned size) 280static __always_inline __must_check
281int __copy_to_user(void __user *dst, const void *src, unsigned size)
276{ 282{
277 int ret = 0; 283 int ret = 0;
278 if (!__builtin_constant_p(size)) 284 if (!__builtin_constant_p(size))
279 return copy_user_generic((__force void *)dst,src,size); 285 return copy_user_generic((__force void *)dst,src,size);
280 switch (size) { 286 switch (size) {
@@ -303,10 +309,10 @@ static __always_inline int __copy_to_user(void __user *dst, const void *src, uns
303 } 309 }
304} 310}
305 311
306 312static __always_inline __must_check
307static __always_inline int __copy_in_user(void __user *dst, const void __user *src, unsigned size) 313int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
308{ 314{
309 int ret = 0; 315 int ret = 0;
310 if (!__builtin_constant_p(size)) 316 if (!__builtin_constant_p(size))
311 return copy_user_generic((__force void *)dst,(__force void *)src,size); 317 return copy_user_generic((__force void *)dst,(__force void *)src,size);
312 switch (size) { 318 switch (size) {
@@ -344,15 +350,17 @@ static __always_inline int __copy_in_user(void __user *dst, const void __user *s
344 } 350 }
345} 351}
346 352
347long strncpy_from_user(char *dst, const char __user *src, long count); 353__must_check long
348long __strncpy_from_user(char *dst, const char __user *src, long count); 354strncpy_from_user(char *dst, const char __user *src, long count);
349long strnlen_user(const char __user *str, long n); 355__must_check long
350long __strnlen_user(const char __user *str, long n); 356__strncpy_from_user(char *dst, const char __user *src, long count);
351long strlen_user(const char __user *str); 357__must_check long strnlen_user(const char __user *str, long n);
352unsigned long clear_user(void __user *mem, unsigned long len); 358__must_check long __strnlen_user(const char __user *str, long n);
353unsigned long __clear_user(void __user *mem, unsigned long len); 359__must_check long strlen_user(const char __user *str);
354 360__must_check unsigned long clear_user(void __user *mem, unsigned long len);
355#define __copy_to_user_inatomic __copy_to_user 361__must_check unsigned long __clear_user(void __user *mem, unsigned long len);
356#define __copy_from_user_inatomic __copy_from_user 362
363__must_check long __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size);
364#define __copy_to_user_inatomic copy_user_generic
357 365
358#endif /* __X86_64_UACCESS_H */ 366#endif /* __X86_64_UACCESS_H */
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 80fd48e84bbb..eeb98c168e98 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -600,9 +600,9 @@ __SYSCALL(__NR_fchmodat, sys_fchmodat)
600#define __NR_faccessat 269 600#define __NR_faccessat 269
601__SYSCALL(__NR_faccessat, sys_faccessat) 601__SYSCALL(__NR_faccessat, sys_faccessat)
602#define __NR_pselect6 270 602#define __NR_pselect6 270
603__SYSCALL(__NR_pselect6, sys_ni_syscall) /* for now */ 603__SYSCALL(__NR_pselect6, sys_pselect6)
604#define __NR_ppoll 271 604#define __NR_ppoll 271
605__SYSCALL(__NR_ppoll, sys_ni_syscall) /* for now */ 605__SYSCALL(__NR_ppoll, sys_ppoll)
606#define __NR_unshare 272 606#define __NR_unshare 272
607__SYSCALL(__NR_unshare, sys_unshare) 607__SYSCALL(__NR_unshare, sys_unshare)
608#define __NR_set_robust_list 273 608#define __NR_set_robust_list 273
@@ -658,6 +658,7 @@ do { \
658#define __ARCH_WANT_SYS_SIGPENDING 658#define __ARCH_WANT_SYS_SIGPENDING
659#define __ARCH_WANT_SYS_SIGPROCMASK 659#define __ARCH_WANT_SYS_SIGPROCMASK
660#define __ARCH_WANT_SYS_RT_SIGACTION 660#define __ARCH_WANT_SYS_RT_SIGACTION
661#define __ARCH_WANT_SYS_RT_SIGSUSPEND
661#define __ARCH_WANT_SYS_TIME 662#define __ARCH_WANT_SYS_TIME
662#define __ARCH_WANT_COMPAT_SYS_TIME 663#define __ARCH_WANT_COMPAT_SYS_TIME
663 664
diff --git a/include/asm-x86_64/unwind.h b/include/asm-x86_64/unwind.h
index 1f6e9bfb569e..2e7ff10fd775 100644
--- a/include/asm-x86_64/unwind.h
+++ b/include/asm-x86_64/unwind.h
@@ -18,6 +18,7 @@ struct unwind_frame_info
18{ 18{
19 struct pt_regs regs; 19 struct pt_regs regs;
20 struct task_struct *task; 20 struct task_struct *task;
21 unsigned call_frame:1;
21}; 22};
22 23
23#define UNW_PC(frame) (frame)->regs.rip 24#define UNW_PC(frame) (frame)->regs.rip
@@ -57,6 +58,10 @@ struct unwind_frame_info
57 PTREGS_INFO(r15), \ 58 PTREGS_INFO(r15), \
58 PTREGS_INFO(rip) 59 PTREGS_INFO(rip)
59 60
61#define UNW_DEFAULT_RA(raItem, dataAlign) \
62 ((raItem).where == Memory && \
63 !((raItem).value * (dataAlign) + 8))
64
60static inline void arch_unw_init_frame_info(struct unwind_frame_info *info, 65static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
61 /*const*/ struct pt_regs *regs) 66 /*const*/ struct pt_regs *regs)
62{ 67{
@@ -94,8 +99,8 @@ static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
94 99
95#else 100#else
96 101
97#define UNW_PC(frame) ((void)(frame), 0) 102#define UNW_PC(frame) ((void)(frame), 0UL)
98#define UNW_SP(frame) ((void)(frame), 0) 103#define UNW_SP(frame) ((void)(frame), 0UL)
99 104
100static inline int arch_unw_user_mode(const void *info) 105static inline int arch_unw_user_mode(const void *info)
101{ 106{
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h
index 146b24402a5f..2281e9399b96 100644
--- a/include/asm-x86_64/vsyscall.h
+++ b/include/asm-x86_64/vsyscall.h
@@ -4,6 +4,7 @@
4enum vsyscall_num { 4enum vsyscall_num {
5 __NR_vgettimeofday, 5 __NR_vgettimeofday,
6 __NR_vtime, 6 __NR_vtime,
7 __NR_vgetcpu,
7}; 8};
8 9
9#define VSYSCALL_START (-10UL << 20) 10#define VSYSCALL_START (-10UL << 20)
@@ -15,6 +16,7 @@ enum vsyscall_num {
15#include <linux/seqlock.h> 16#include <linux/seqlock.h>
16 17
17#define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16))) 18#define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16)))
19#define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16)))
18#define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16))) 20#define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
19#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) 21#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
20#define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16))) 22#define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
@@ -26,6 +28,9 @@ enum vsyscall_num {
26#define VXTIME_HPET 2 28#define VXTIME_HPET 2
27#define VXTIME_PMTMR 3 29#define VXTIME_PMTMR 3
28 30
31#define VGETCPU_RDTSCP 1
32#define VGETCPU_LSL 2
33
29struct vxtime_data { 34struct vxtime_data {
30 long hpet_address; /* HPET base address */ 35 long hpet_address; /* HPET base address */
31 int last; 36 int last;
@@ -40,6 +45,7 @@ struct vxtime_data {
40 45
41/* vsyscall space (readonly) */ 46/* vsyscall space (readonly) */
42extern struct vxtime_data __vxtime; 47extern struct vxtime_data __vxtime;
48extern int __vgetcpu_mode;
43extern struct timespec __xtime; 49extern struct timespec __xtime;
44extern volatile unsigned long __jiffies; 50extern volatile unsigned long __jiffies;
45extern unsigned long __wall_jiffies; 51extern unsigned long __wall_jiffies;
@@ -48,6 +54,7 @@ extern seqlock_t __xtime_lock;
48 54
49/* kernel space (writeable) */ 55/* kernel space (writeable) */
50extern struct vxtime_data vxtime; 56extern struct vxtime_data vxtime;
57extern int vgetcpu_mode;
51extern unsigned long wall_jiffies; 58extern unsigned long wall_jiffies;
52extern struct timezone sys_tz; 59extern struct timezone sys_tz;
53extern int sysctl_vsyscall; 60extern int sysctl_vsyscall;
@@ -55,6 +62,8 @@ extern seqlock_t xtime_lock;
55 62
56extern int sysctl_vsyscall; 63extern int sysctl_vsyscall;
57 64
65extern void vsyscall_set_cpu(int cpu);
66
58#define ARCH_HAVE_XTIME_LOCK 1 67#define ARCH_HAVE_XTIME_LOCK 1
59 68
60#endif /* __KERNEL__ */ 69#endif /* __KERNEL__ */
diff --git a/include/linux/edd.h b/include/linux/edd.h
index 162512b886f7..b2b3e68aa512 100644
--- a/include/linux/edd.h
+++ b/include/linux/edd.h
@@ -52,6 +52,7 @@
52#define EDD_CL_EQUALS 0x3d646465 /* "edd=" */ 52#define EDD_CL_EQUALS 0x3d646465 /* "edd=" */
53#define EDD_CL_OFF 0x666f /* "of" for off */ 53#define EDD_CL_OFF 0x666f /* "of" for off */
54#define EDD_CL_SKIP 0x6b73 /* "sk" for skipmbr */ 54#define EDD_CL_SKIP 0x6b73 /* "sk" for skipmbr */
55#define EDD_CL_ON 0x6e6f /* "on" for on */
55 56
56#ifndef __ASSEMBLY__ 57#ifndef __ASSEMBLY__
57 58
diff --git a/include/linux/getcpu.h b/include/linux/getcpu.h
new file mode 100644
index 000000000000..031ed3780e45
--- /dev/null
+++ b/include/linux/getcpu.h
@@ -0,0 +1,16 @@
1#ifndef _LINUX_GETCPU_H
2#define _LINUX_GETCPU_H 1
3
4/* Cache for getcpu() to speed it up. Results might be upto a jiffie
5 out of date, but will be faster.
6 User programs should not refer to the contents of this structure.
7 It is only a cache for vgetcpu(). It might change in future kernels.
8 The user program must store this information per thread (__thread)
9 If you want 100% accurate information pass NULL instead. */
10struct getcpu_cache {
11 unsigned long t0;
12 unsigned long t1;
13 unsigned long res[4];
14};
15
16#endif
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 329ebcffa106..c8d5f207c3d4 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -115,6 +115,21 @@ static inline u64 get_jiffies_64(void)
115 ((long)(a) - (long)(b) >= 0)) 115 ((long)(a) - (long)(b) >= 0))
116#define time_before_eq(a,b) time_after_eq(b,a) 116#define time_before_eq(a,b) time_after_eq(b,a)
117 117
118/* Same as above, but does so with platform independent 64bit types.
119 * These must be used when utilizing jiffies_64 (i.e. return value of
120 * get_jiffies_64() */
121#define time_after64(a,b) \
122 (typecheck(__u64, a) && \
123 typecheck(__u64, b) && \
124 ((__s64)(b) - (__s64)(a) < 0))
125#define time_before64(a,b) time_after64(b,a)
126
127#define time_after_eq64(a,b) \
128 (typecheck(__u64, a) && \
129 typecheck(__u64, b) && \
130 ((__s64)(a) - (__s64)(b) >= 0))
131#define time_before_eq64(a,b) time_after_eq64(b,a)
132
118/* 133/*
119 * Have the 32 bit jiffies value wrap 5 minutes after boot 134 * Have the 32 bit jiffies value wrap 5 minutes after boot
120 * so jiffies wrap bugs show up earlier. 135 * so jiffies wrap bugs show up earlier.
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e44a37e2c71c..4fa373bb18ac 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -187,6 +187,7 @@ extern void bust_spinlocks(int yes);
187extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ 187extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */
188extern int panic_timeout; 188extern int panic_timeout;
189extern int panic_on_oops; 189extern int panic_on_oops;
190extern int panic_on_unrecovered_nmi;
190extern int tainted; 191extern int tainted;
191extern const char *print_tainted(void); 192extern const char *print_tainted(void);
192extern void add_taint(unsigned); 193extern void add_taint(unsigned);
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 932021f872d5..6c9873f88287 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -35,9 +35,13 @@
35#endif 35#endif
36 36
37#define KPROBE_ENTRY(name) \ 37#define KPROBE_ENTRY(name) \
38 .section .kprobes.text, "ax"; \ 38 .pushsection .kprobes.text, "ax"; \
39 ENTRY(name) 39 ENTRY(name)
40 40
41#define KPROBE_END(name) \
42 END(name); \
43 .popsection
44
41#ifndef END 45#ifndef END
42#define END(name) \ 46#define END(name) \
43 .size name, .-name 47 .size name, .-name
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 34ed0d99b1bd..9d4aa7f95bc8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -819,6 +819,11 @@ struct task_struct {
819 unsigned did_exec:1; 819 unsigned did_exec:1;
820 pid_t pid; 820 pid_t pid;
821 pid_t tgid; 821 pid_t tgid;
822
823#ifdef CONFIG_CC_STACKPROTECTOR
824 /* Canary value for the -fstack-protector gcc feature */
825 unsigned long stack_canary;
826#endif
822 /* 827 /*
823 * pointers to (original) parent process, youngest child, younger sibling, 828 * pointers to (original) parent process, youngest child, younger sibling,
824 * older sibling, respectively. (p->father can be replaced with 829 * older sibling, respectively. (p->father can be replaced with
@@ -865,6 +870,15 @@ struct task_struct {
865 struct key *thread_keyring; /* keyring private to this thread */ 870 struct key *thread_keyring; /* keyring private to this thread */
866 unsigned char jit_keyring; /* default keyring to attach requested keys to */ 871 unsigned char jit_keyring; /* default keyring to attach requested keys to */
867#endif 872#endif
873 /*
874 * fpu_counter contains the number of consecutive context switches
875 * that the FPU is used. If this is over a threshold, the lazy fpu
876 * saving becomes unlazy to save the trap. This is an unsigned char
877 * so that after 256 times the counter wraps and the behavior turns
878 * lazy again; this to deal with bursty apps that only use FPU for
879 * a short time
880 */
881 unsigned char fpu_counter;
868 int oomkilladj; /* OOM kill score adjustment (bit shift). */ 882 int oomkilladj; /* OOM kill score adjustment (bit shift). */
869 char comm[TASK_COMM_LEN]; /* executable name excluding path 883 char comm[TASK_COMM_LEN]; /* executable name excluding path
870 - access with [gs]et_task_comm (which lock 884 - access with [gs]et_task_comm (which lock
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 9cc81e572224..50e2b01e517c 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -5,15 +5,16 @@
5struct stack_trace { 5struct stack_trace {
6 unsigned int nr_entries, max_entries; 6 unsigned int nr_entries, max_entries;
7 unsigned long *entries; 7 unsigned long *entries;
8 int skip; /* input argument: How many entries to skip */
9 int all_contexts; /* input argument: if true do than one stack */
8}; 10};
9 11
10extern void save_stack_trace(struct stack_trace *trace, 12extern void save_stack_trace(struct stack_trace *trace,
11 struct task_struct *task, int all_contexts, 13 struct task_struct *task);
12 unsigned int skip);
13 14
14extern void print_stack_trace(struct stack_trace *trace, int spaces); 15extern void print_stack_trace(struct stack_trace *trace, int spaces);
15#else 16#else
16# define save_stack_trace(trace, task, all, skip) do { } while (0) 17# define save_stack_trace(trace, task) do { } while (0)
17# define print_stack_trace(trace) do { } while (0) 18# define print_stack_trace(trace) do { } while (0)
18#endif 19#endif
19 20
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 008f04c56737..3f0f716225ec 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -53,6 +53,7 @@ struct mq_attr;
53struct compat_stat; 53struct compat_stat;
54struct compat_timeval; 54struct compat_timeval;
55struct robust_list_head; 55struct robust_list_head;
56struct getcpu_cache;
56 57
57#include <linux/types.h> 58#include <linux/types.h>
58#include <linux/aio_abi.h> 59#include <linux/aio_abi.h>
@@ -596,5 +597,6 @@ asmlinkage long sys_get_robust_list(int pid,
596 size_t __user *len_ptr); 597 size_t __user *len_ptr);
597asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, 598asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
598 size_t len); 599 size_t len);
600asmlinkage long sys_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *cache);
599 601
600#endif 602#endif
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index eca555781d05..1b24bd45e080 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -150,6 +150,8 @@ enum
150 KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ 150 KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
151 KERN_COMPAT_LOG=73, /* int: print compat layer messages */ 151 KERN_COMPAT_LOG=73, /* int: print compat layer messages */
152 KERN_MAX_LOCK_DEPTH=74, 152 KERN_MAX_LOCK_DEPTH=74,
153 KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
154 KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
153}; 155};
154 156
155 157
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 46919f9f5eb3..4d0909e53595 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -24,5 +24,5 @@
24#define VERMAGIC_STRING \ 24#define VERMAGIC_STRING \
25 UTS_RELEASE " " \ 25 UTS_RELEASE " " \
26 MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ 26 MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \
27 MODULE_VERMAGIC_MODULE_UNLOAD MODULE_ARCH_VERMAGIC \ 27 MODULE_VERMAGIC_MODULE_UNLOAD MODULE_ARCH_VERMAGIC
28 "gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__) 28
diff --git a/init/main.c b/init/main.c
index 8651a720a092..913e48d658ee 100644
--- a/init/main.c
+++ b/init/main.c
@@ -162,16 +162,19 @@ extern struct obs_kernel_param __setup_start[], __setup_end[];
162static int __init obsolete_checksetup(char *line) 162static int __init obsolete_checksetup(char *line)
163{ 163{
164 struct obs_kernel_param *p; 164 struct obs_kernel_param *p;
165 int had_early_param = 0;
165 166
166 p = __setup_start; 167 p = __setup_start;
167 do { 168 do {
168 int n = strlen(p->str); 169 int n = strlen(p->str);
169 if (!strncmp(line, p->str, n)) { 170 if (!strncmp(line, p->str, n)) {
170 if (p->early) { 171 if (p->early) {
171 /* Already done in parse_early_param? (Needs 172 /* Already done in parse_early_param?
172 * exact match on param part) */ 173 * (Needs exact match on param part).
174 * Keep iterating, as we can have early
175 * params and __setups of same names 8( */
173 if (line[n] == '\0' || line[n] == '=') 176 if (line[n] == '\0' || line[n] == '=')
174 return 1; 177 had_early_param = 1;
175 } else if (!p->setup_func) { 178 } else if (!p->setup_func) {
176 printk(KERN_WARNING "Parameter %s is obsolete," 179 printk(KERN_WARNING "Parameter %s is obsolete,"
177 " ignored\n", p->str); 180 " ignored\n", p->str);
@@ -181,7 +184,8 @@ static int __init obsolete_checksetup(char *line)
181 } 184 }
182 p++; 185 p++;
183 } while (p < __setup_end); 186 } while (p < __setup_end);
184 return 0; 187
188 return had_early_param;
185} 189}
186 190
187/* 191/*
@@ -464,6 +468,7 @@ asmlinkage void __init start_kernel(void)
464 * Need to run as early as possible, to initialize the 468 * Need to run as early as possible, to initialize the
465 * lockdep hash: 469 * lockdep hash:
466 */ 470 */
471 unwind_init();
467 lockdep_init(); 472 lockdep_init();
468 473
469 local_irq_disable(); 474 local_irq_disable();
@@ -502,7 +507,6 @@ asmlinkage void __init start_kernel(void)
502 __stop___param - __start___param, 507 __stop___param - __start___param,
503 &unknown_bootoption); 508 &unknown_bootoption);
504 sort_main_extable(); 509 sort_main_extable();
505 unwind_init();
506 trap_init(); 510 trap_init();
507 rcu_init(); 511 rcu_init();
508 init_IRQ(); 512 init_IRQ();
diff --git a/kernel/fork.c b/kernel/fork.c
index f9b014e3e700..a0dad84567c9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -45,6 +45,7 @@
45#include <linux/cn_proc.h> 45#include <linux/cn_proc.h>
46#include <linux/delayacct.h> 46#include <linux/delayacct.h>
47#include <linux/taskstats_kern.h> 47#include <linux/taskstats_kern.h>
48#include <linux/random.h>
48 49
49#include <asm/pgtable.h> 50#include <asm/pgtable.h>
50#include <asm/pgalloc.h> 51#include <asm/pgalloc.h>
@@ -175,6 +176,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
175 tsk->thread_info = ti; 176 tsk->thread_info = ti;
176 setup_thread_stack(tsk, orig); 177 setup_thread_stack(tsk, orig);
177 178
179#ifdef CONFIG_CC_STACKPROTECTOR
180 tsk->stack_canary = get_random_int();
181#endif
182
178 /* One for us, one for whoever does the "release_task()" (usually parent) */ 183 /* One for us, one for whoever does the "release_task()" (usually parent) */
179 atomic_set(&tsk->usage,2); 184 atomic_set(&tsk->usage,2);
180 atomic_set(&tsk->fs_excl, 0); 185 atomic_set(&tsk->fs_excl, 0);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 9bad17884513..c088e5542e84 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -224,7 +224,14 @@ static int save_trace(struct stack_trace *trace)
224 trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; 224 trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
225 trace->entries = stack_trace + nr_stack_trace_entries; 225 trace->entries = stack_trace + nr_stack_trace_entries;
226 226
227 save_stack_trace(trace, NULL, 0, 3); 227 trace->skip = 3;
228 trace->all_contexts = 0;
229
230 /* Make sure to not recurse in case the the unwinder needs to tak
231e locks. */
232 lockdep_off();
233 save_stack_trace(trace, NULL);
234 lockdep_on();
228 235
229 trace->max_entries = trace->nr_entries; 236 trace->max_entries = trace->nr_entries;
230 237
diff --git a/kernel/panic.c b/kernel/panic.c
index 8010b9b17aca..6ceb664fb52a 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -21,6 +21,7 @@
21#include <linux/debug_locks.h> 21#include <linux/debug_locks.h>
22 22
23int panic_on_oops; 23int panic_on_oops;
24int panic_on_unrecovered_nmi;
24int tainted; 25int tainted;
25static int pause_on_oops; 26static int pause_on_oops;
26static int pause_on_oops_flag; 27static int pause_on_oops_flag;
@@ -270,3 +271,15 @@ void oops_exit(void)
270{ 271{
271 do_oops_enter_exit(); 272 do_oops_enter_exit();
272} 273}
274
275#ifdef CONFIG_CC_STACKPROTECTOR
276/*
277 * Called when gcc's -fstack-protector feature is used, and
278 * gcc detects corruption of the on-stack canary value
279 */
280void __stack_chk_fail(void)
281{
282 panic("stack-protector: Kernel stack is corrupted");
283}
284EXPORT_SYMBOL(__stack_chk_fail);
285#endif
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index fb524b009eef..9644a41e0bef 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -7,6 +7,11 @@
7 * 7 *
8 * This file contains the spinlock/rwlock implementations for the 8 * This file contains the spinlock/rwlock implementations for the
9 * SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them) 9 * SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them)
10 *
11 * Note that some architectures have special knowledge about the
12 * stack frames of these functions in their profile_pc. If you
13 * change anything significant here that could change the stack
14 * frame contact the architecture maintainers.
10 */ 15 */
11 16
12#include <linux/linkage.h> 17#include <linux/linkage.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index e236f98f7ec5..3f894775488d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -28,6 +28,7 @@
28#include <linux/tty.h> 28#include <linux/tty.h>
29#include <linux/signal.h> 29#include <linux/signal.h>
30#include <linux/cn_proc.h> 30#include <linux/cn_proc.h>
31#include <linux/getcpu.h>
31 32
32#include <linux/compat.h> 33#include <linux/compat.h>
33#include <linux/syscalls.h> 34#include <linux/syscalls.h>
@@ -2062,3 +2063,33 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
2062 } 2063 }
2063 return error; 2064 return error;
2064} 2065}
2066
2067asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep,
2068 struct getcpu_cache __user *cache)
2069{
2070 int err = 0;
2071 int cpu = raw_smp_processor_id();
2072 if (cpup)
2073 err |= put_user(cpu, cpup);
2074 if (nodep)
2075 err |= put_user(cpu_to_node(cpu), nodep);
2076 if (cache) {
2077 /*
2078 * The cache is not needed for this implementation,
2079 * but make sure user programs pass something
2080 * valid. vsyscall implementations can instead make
2081 * good use of the cache. Only use t0 and t1 because
2082 * these are available in both 32bit and 64bit ABI (no
2083 * need for a compat_getcpu). 32bit has enough
2084 * padding
2085 */
2086 unsigned long t0, t1;
2087 get_user(t0, &cache->t0);
2088 get_user(t1, &cache->t1);
2089 t0++;
2090 t1++;
2091 put_user(t0, &cache->t0);
2092 put_user(t1, &cache->t1);
2093 }
2094 return err ? -EFAULT : 0;
2095}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index fd43c3e6786b..bcb3a181dbb2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -76,8 +76,9 @@ extern int compat_log;
76 76
77#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) 77#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
78int unknown_nmi_panic; 78int unknown_nmi_panic;
79extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *, 79int nmi_watchdog_enabled;
80 void __user *, size_t *, loff_t *); 80extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
81 void __user *, size_t *, loff_t *);
81#endif 82#endif
82 83
83/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 84/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -628,11 +629,27 @@ static ctl_table kern_table[] = {
628 .data = &unknown_nmi_panic, 629 .data = &unknown_nmi_panic,
629 .maxlen = sizeof (int), 630 .maxlen = sizeof (int),
630 .mode = 0644, 631 .mode = 0644,
631 .proc_handler = &proc_unknown_nmi_panic, 632 .proc_handler = &proc_dointvec,
633 },
634 {
635 .ctl_name = KERN_NMI_WATCHDOG,
636 .procname = "nmi_watchdog",
637 .data = &nmi_watchdog_enabled,
638 .maxlen = sizeof (int),
639 .mode = 0644,
640 .proc_handler = &proc_nmi_enabled,
632 }, 641 },
633#endif 642#endif
634#if defined(CONFIG_X86) 643#if defined(CONFIG_X86)
635 { 644 {
645 .ctl_name = KERN_PANIC_ON_NMI,
646 .procname = "panic_on_unrecovered_nmi",
647 .data = &panic_on_unrecovered_nmi,
648 .maxlen = sizeof(int),
649 .mode = 0644,
650 .proc_handler = &proc_dointvec,
651 },
652 {
636 .ctl_name = KERN_BOOTLOADER_TYPE, 653 .ctl_name = KERN_BOOTLOADER_TYPE,
637 .procname = "bootloader_type", 654 .procname = "bootloader_type",
638 .data = &bootloader_type, 655 .data = &bootloader_type,
diff --git a/kernel/unwind.c b/kernel/unwind.c
index f69c804c8e62..3430475fcd88 100644
--- a/kernel/unwind.c
+++ b/kernel/unwind.c
@@ -603,6 +603,7 @@ int unwind(struct unwind_frame_info *frame)
603#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) 603#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
604 const u32 *fde = NULL, *cie = NULL; 604 const u32 *fde = NULL, *cie = NULL;
605 const u8 *ptr = NULL, *end = NULL; 605 const u8 *ptr = NULL, *end = NULL;
606 unsigned long pc = UNW_PC(frame) - frame->call_frame;
606 unsigned long startLoc = 0, endLoc = 0, cfa; 607 unsigned long startLoc = 0, endLoc = 0, cfa;
607 unsigned i; 608 unsigned i;
608 signed ptrType = -1; 609 signed ptrType = -1;
@@ -612,7 +613,7 @@ int unwind(struct unwind_frame_info *frame)
612 613
613 if (UNW_PC(frame) == 0) 614 if (UNW_PC(frame) == 0)
614 return -EINVAL; 615 return -EINVAL;
615 if ((table = find_table(UNW_PC(frame))) != NULL 616 if ((table = find_table(pc)) != NULL
616 && !(table->size & (sizeof(*fde) - 1))) { 617 && !(table->size & (sizeof(*fde) - 1))) {
617 unsigned long tableSize = table->size; 618 unsigned long tableSize = table->size;
618 619
@@ -647,7 +648,7 @@ int unwind(struct unwind_frame_info *frame)
647 ptrType & DW_EH_PE_indirect 648 ptrType & DW_EH_PE_indirect
648 ? ptrType 649 ? ptrType
649 : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed)); 650 : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed));
650 if (UNW_PC(frame) >= startLoc && UNW_PC(frame) < endLoc) 651 if (pc >= startLoc && pc < endLoc)
651 break; 652 break;
652 cie = NULL; 653 cie = NULL;
653 } 654 }
@@ -657,16 +658,28 @@ int unwind(struct unwind_frame_info *frame)
657 state.cieEnd = ptr; /* keep here temporarily */ 658 state.cieEnd = ptr; /* keep here temporarily */
658 ptr = (const u8 *)(cie + 2); 659 ptr = (const u8 *)(cie + 2);
659 end = (const u8 *)(cie + 1) + *cie; 660 end = (const u8 *)(cie + 1) + *cie;
661 frame->call_frame = 1;
660 if ((state.version = *ptr) != 1) 662 if ((state.version = *ptr) != 1)
661 cie = NULL; /* unsupported version */ 663 cie = NULL; /* unsupported version */
662 else if (*++ptr) { 664 else if (*++ptr) {
663 /* check if augmentation size is first (and thus present) */ 665 /* check if augmentation size is first (and thus present) */
664 if (*ptr == 'z') { 666 if (*ptr == 'z') {
665 /* check for ignorable (or already handled) 667 while (++ptr < end && *ptr) {
666 * nul-terminated augmentation string */ 668 switch(*ptr) {
667 while (++ptr < end && *ptr) 669 /* check for ignorable (or already handled)
668 if (strchr("LPR", *ptr) == NULL) 670 * nul-terminated augmentation string */
671 case 'L':
672 case 'P':
673 case 'R':
674 continue;
675 case 'S':
676 frame->call_frame = 0;
677 continue;
678 default:
669 break; 679 break;
680 }
681 break;
682 }
670 } 683 }
671 if (ptr >= end || *ptr) 684 if (ptr >= end || *ptr)
672 cie = NULL; 685 cie = NULL;
@@ -755,7 +768,7 @@ int unwind(struct unwind_frame_info *frame)
755 state.org = startLoc; 768 state.org = startLoc;
756 memcpy(&state.cfa, &badCFA, sizeof(state.cfa)); 769 memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
757 /* process instructions */ 770 /* process instructions */
758 if (!processCFI(ptr, end, UNW_PC(frame), ptrType, &state) 771 if (!processCFI(ptr, end, pc, ptrType, &state)
759 || state.loc > endLoc 772 || state.loc > endLoc
760 || state.regs[retAddrReg].where == Nowhere 773 || state.regs[retAddrReg].where == Nowhere
761 || state.cfa.reg >= ARRAY_SIZE(reg_info) 774 || state.cfa.reg >= ARRAY_SIZE(reg_info)
@@ -763,6 +776,11 @@ int unwind(struct unwind_frame_info *frame)
763 || state.cfa.offs % sizeof(unsigned long)) 776 || state.cfa.offs % sizeof(unsigned long))
764 return -EIO; 777 return -EIO;
765 /* update frame */ 778 /* update frame */
779#ifndef CONFIG_AS_CFI_SIGNAL_FRAME
780 if(frame->call_frame
781 && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign))
782 frame->call_frame = 0;
783#endif
766 cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs; 784 cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
767 startLoc = min((unsigned long)UNW_SP(frame), cfa); 785 startLoc = min((unsigned long)UNW_SP(frame), cfa);
768 endLoc = max((unsigned long)UNW_SP(frame), cfa); 786 endLoc = max((unsigned long)UNW_SP(frame), cfa);
@@ -866,6 +884,7 @@ int unwind_init_frame_info(struct unwind_frame_info *info,
866 /*const*/ struct pt_regs *regs) 884 /*const*/ struct pt_regs *regs)
867{ 885{
868 info->task = tsk; 886 info->task = tsk;
887 info->call_frame = 0;
869 arch_unw_init_frame_info(info, regs); 888 arch_unw_init_frame_info(info, regs);
870 889
871 return 0; 890 return 0;
@@ -879,6 +898,7 @@ int unwind_init_blocked(struct unwind_frame_info *info,
879 struct task_struct *tsk) 898 struct task_struct *tsk)
880{ 899{
881 info->task = tsk; 900 info->task = tsk;
901 info->call_frame = 0;
882 arch_unw_init_blocked(info); 902 arch_unw_init_blocked(info);
883 903
884 return 0; 904 return 0;
@@ -894,6 +914,7 @@ int unwind_init_running(struct unwind_frame_info *info,
894 void *arg) 914 void *arg)
895{ 915{
896 info->task = current; 916 info->task = current;
917 info->call_frame = 0;
897 918
898 return arch_unwind_init_running(info, callback, arg); 919 return arch_unwind_init_running(info, callback, arg);
899} 920}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2869307ca3e4..f1ac3184dc08 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -225,7 +225,7 @@ config LOCKDEP
225 bool 225 bool
226 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT 226 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
227 select STACKTRACE 227 select STACKTRACE
228 select FRAME_POINTER 228 select FRAME_POINTER if !X86
229 select KALLSYMS 229 select KALLSYMS
230 select KALLSYMS_ALL 230 select KALLSYMS_ALL
231 231
diff --git a/lib/hweight.c b/lib/hweight.c
index 438257671708..360556a7803d 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -1,5 +1,6 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <asm/types.h> 2#include <asm/types.h>
3#include <asm/bitops.h>
3 4
4/** 5/**
5 * hweightN - returns the hamming weight of a N-bit word 6 * hweightN - returns the hamming weight of a N-bit word
@@ -40,14 +41,19 @@ unsigned long hweight64(__u64 w)
40#if BITS_PER_LONG == 32 41#if BITS_PER_LONG == 32
41 return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); 42 return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
42#elif BITS_PER_LONG == 64 43#elif BITS_PER_LONG == 64
44#ifdef ARCH_HAS_FAST_MULTIPLIER
45 w -= (w >> 1) & 0x5555555555555555ul;
46 w = (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul);
47 w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful;
48 return (w * 0x0101010101010101ul) >> 56;
49#else
43 __u64 res = w - ((w >> 1) & 0x5555555555555555ul); 50 __u64 res = w - ((w >> 1) & 0x5555555555555555ul);
44 res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); 51 res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
45 res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful; 52 res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful;
46 res = res + (res >> 8); 53 res = res + (res >> 8);
47 res = res + (res >> 16); 54 res = res + (res >> 16);
48 return (res + (res >> 32)) & 0x00000000000000FFul; 55 return (res + (res >> 32)) & 0x00000000000000FFul;
49#else 56#endif
50#error BITS_PER_LONG not defined
51#endif 57#endif
52} 58}
53EXPORT_SYMBOL(hweight64); 59EXPORT_SYMBOL(hweight64);
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 3d523899fdc0..4f5ff19b992b 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -63,6 +63,13 @@ as-option = $(shell if $(CC) $(CFLAGS) $(1) -Wa,-Z -c -o /dev/null \
63 -xassembler /dev/null > /dev/null 2>&1; then echo "$(1)"; \ 63 -xassembler /dev/null > /dev/null 2>&1; then echo "$(1)"; \
64 else echo "$(2)"; fi ;) 64 else echo "$(2)"; fi ;)
65 65
66# as-instr
67# Usage: cflags-y += $(call as-instr, instr, option1, option2)
68
69as-instr = $(shell if echo -e "$(1)" | $(AS) >/dev/null 2>&1 -W -Z -o astest$$$$.out ; \
70 then echo "$(2)"; else echo "$(3)"; fi; \
71 rm -f astest$$$$.out)
72
66# cc-option 73# cc-option
67# Usage: cflags-y += $(call cc-option, -march=winchip-c6, -march=i586) 74# Usage: cflags-y += $(call cc-option, -march=winchip-c6, -march=i586)
68 75
diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh
new file mode 100644
index 000000000000..325c0a1b03b6
--- /dev/null
+++ b/scripts/gcc-x86_64-has-stack-protector.sh
@@ -0,0 +1,6 @@
1#!/bin/sh
2
3echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
4if [ "$?" -eq "0" ] ; then
5 echo $2
6fi